win 46883cef8a feat(02-01): migrate Boss spider layer from spiderJobs.core to crawler_core
- client.py: inherit crawler_core.http_client.HTTPClient, use crawler_core.boss.sign.BossSign
- api.py: use crawler_core.base.Result/BaseFetcher/BaseSearcher, fix self._http -> self.http_client
- main.py: import BaseFetcher/BaseSearcher and BossSign from crawler_core
- sign.py: replace with backward-compat stub re-exporting BossSign from crawler_core
Satisfies ARCH-03
2026-03-21 19:00:30 +08:00

341 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Boss直聘 - 所有 API 接口
每个类只负责参数构建HTTP 和算法由 client / core 层处理
响应格式适配:
Boss 使用 code/zpData区别于智联的 statusCode/data
code=0 表示成功zpData 为实际业务数据
"""
from __future__ import annotations
from typing import Any, Optional
from urllib.parse import urlencode
from crawler_core.base import BaseFetcher, BaseSearcher, Result
from spiderJobs.platforms.boss.client import BossClient, create_client
# ─────────────────────────────────────────────
# Boss 响应解析(覆写默认算法)
# ─────────────────────────────────────────────
def _parse_boss_response(http_code: int, raw: Any) -> Result:
"""
Boss 专用响应解析
Boss 响应格式:
{"code": 0, "message": "Success", "zpData": {...}}
code=0 成功,其他为业务错误
"""
if http_code != 200:
return Result(
success=False,
status_code=http_code,
error=f"HTTP 请求失败: {http_code}",
)
if not isinstance(raw, dict):
return Result(success=False, status_code=http_code, error="响应格式异常")
biz_code = raw.get("code", -1)
if biz_code != 0:
return Result(
success=False,
status_code=biz_code,
error=raw.get("message") or f"业务错误: {biz_code}",
)
payload = raw.get("zpData") or {}
# 列表型响应
if isinstance(payload, dict) and "jobList" in payload:
job_list = payload.get("jobList", [])
has_more = payload.get("hasMore", False)
return Result(
success=True, status_code=200, data=payload,
list=job_list,
count=len(job_list),
is_end_page=not has_more,
)
# 列表型响应(公司职位列表使用 list 字段)
if isinstance(payload, dict) and "list" in payload:
items = payload.get("list", [])
has_more = payload.get("hasMore", False)
return Result(
success=True, status_code=200, data=payload,
list=items,
count=len(items),
is_end_page=not has_more,
)
return Result(success=True, status_code=200, data=payload)
# ─────────────────────────────────────────────
# 1. 首页推荐职位列表GET
# ─────────────────────────────────────────────
class SearchRecJobs(BaseSearcher):
"""
首页推荐/搜索职位列表(无需登录)
api = SearchRecJobs(city_code="101280600")
result = api.search()
all_jobs = api.load_all(max_pages=5)
"""
ENDPOINT = "/wapi/zpgeek/miniapp/homepage/recjoblist.json"
def __init__(
self,
*,
city_code: str = "101280600",
sort_type: int = 1,
district_code: str = "",
blue_welfare: str = "",
encrypt_expect_id: str = "",
page_size: int = 15,
client: Optional[BossClient] = None,
):
super().__init__(page_size=page_size, http_client=client or create_client())
self.city_code = city_code
self.sort_type = sort_type
self.district_code = district_code
self.blue_welfare = blue_welfare
self.encrypt_expect_id = encrypt_expect_id
def _build_params(self, page_index: int) -> dict:
return {
"cityCode": self.city_code,
"sortType": self.sort_type,
"page": page_index,
"pageSize": self.page_size,
"encryptExpectId": self.encrypt_expect_id,
"districtCode": self.district_code,
"blueWelfare": self.blue_welfare,
"appId": 10002,
}
def _request(self, params: dict) -> tuple[int, Any]:
"""覆写为 GET 请求"""
return self.http_client.get(self.ENDPOINT, params)
def _parse(self, http_code: int, raw: Any) -> Result:
return _parse_boss_response(http_code, raw)
# ─────────────────────────────────────────────
# 2. 职位详情(通过 batch 接口)
# ─────────────────────────────────────────────
class GetJobDetail(BaseFetcher):
"""
职位详情(无需登录),通过 /wapi/batch/requests 批量请求
detail = GetJobDetail(
security_id="xxx",
job_id="92ea3c76f9197a1503Vz09q8EFRR",
lid="8uF4BIOMvBU.search.63",
).fetch()
"""
ENDPOINT = "/wapi/batch/requests"
def __init__(
self,
*,
security_id: str,
job_id: str,
lid: str = "",
source: int = 10,
client: Optional[BossClient] = None,
):
super().__init__(http_client=client or create_client())
self.security_id = security_id
self.job_id = job_id
self.lid = lid
self.source = source
def _build_params(self) -> dict:
"""不使用batch 请求由 fetch 直接处理)"""
return {}
def fetch(self) -> Result:
"""覆写 fetch使用 batch 接口"""
detail_query = urlencode({
"securityId": self.security_id,
"jobId": self.job_id,
"lid": self.lid,
"source": self.source,
})
improvement_query = urlencode({
"securityId": self.security_id,
"jobId": self.job_id,
"lid": self.lid,
})
sub_reqs = [
{
"path": "/wapi/zpgeek/miniapp/job/detail.json",
"method": "GET",
"query": detail_query,
},
{
"path": "/wapi/zpgeek/miniapp/jobdetail/improvement/query.json",
"method": "GET",
"query": improvement_query,
},
]
try:
client: BossClient = self.http_client
http_code, data = client.batch(sub_reqs)
except Exception as e:
return Result(success=False, status_code=-1, error=str(e))
return self._parse(http_code, data)
def _parse(self, http_code: int, raw: Any) -> Result:
"""解析 batch 响应,合并子请求结果"""
if http_code != 200:
return Result(success=False, status_code=http_code, error=f"HTTP 请求失败: {http_code}")
if not isinstance(raw, dict):
return Result(success=False, status_code=http_code, error="响应格式异常")
biz_code = raw.get("code", -1)
if biz_code != 0:
return Result(
success=False,
status_code=biz_code,
error=raw.get("message") or f"业务错误: {biz_code}",
)
zp_data = raw.get("zpData") or {}
# 合并两个子请求的数据
detail = zp_data.get("/wapi/zpgeek/miniapp/job/detail.json", {})
improvement = zp_data.get("/wapi/zpgeek/miniapp/jobdetail/improvement/query.json", {})
merged = {
"detail": detail.get("zpData") if isinstance(detail, dict) else detail,
"improvement": improvement.get("zpData") if isinstance(improvement, dict) else improvement,
}
return Result(success=True, status_code=200, data=merged)
# ─────────────────────────────────────────────
# 3. 公司/品牌详情GET
# ─────────────────────────────────────────────
class GetBrandDetail(BaseFetcher):
"""
公司/品牌详情(无需登录)
detail = GetBrandDetail(brand_id="02cd05cce753437e33V50w~~").fetch()
"""
ENDPOINT = "/wapi/zpgeek/miniapp/brand/detail.json"
def __init__(self, *, brand_id: str, client: Optional[BossClient] = None):
super().__init__(http_client=client or create_client())
self.brand_id = brand_id
def _build_params(self) -> dict:
return {"brandId": self.brand_id, "appId": 10002}
def _parse(self, http_code: int, raw: Any) -> Result:
return _parse_boss_response(http_code, raw)
# ─────────────────────────────────────────────
# 4. 公司职位列表GET
# ─────────────────────────────────────────────
class SearchBrandJobs(BaseSearcher):
"""
公司在招职位列表(无需登录)
api = SearchBrandJobs(brand_id="02cd05cce753437e33V50w~~")
result = api.search()
all_jobs = api.load_all(max_pages=3)
"""
ENDPOINT = "/wapi/zpgeek/miniapp/brand/joblist.json"
def __init__(
self,
*,
brand_id: str,
query: str = "",
position_lv1: int = 0,
city: str = "",
experience: str = "",
salary: str = "",
page_size: int = 15,
client: Optional[BossClient] = None,
):
super().__init__(page_size=page_size, http_client=client or create_client())
self.brand_id = brand_id
self.query = query
self.position_lv1 = position_lv1
self.city = city
self.experience = experience
self.salary = salary
def _build_params(self, page_index: int) -> dict:
return {
"brandId": self.brand_id,
"query": self.query,
"page": page_index,
"hasMore": "true",
"positionLv1": self.position_lv1,
"city": self.city,
"experience": self.experience,
"salary": self.salary,
"appId": 10002,
}
def _request(self, params: dict) -> tuple[int, Any]:
"""覆写为 GET 请求"""
return self.http_client.get(self.ENDPOINT, params)
def _parse(self, http_code: int, raw: Any) -> Result:
return _parse_boss_response(http_code, raw)
# ─────────────────────────────────────────────
# 使用示例
# ─────────────────────────────────────────────
if __name__ == "__main__":
import json
print("=== 1. 首页推荐职位 ===")
r = SearchRecJobs(city_code="101280600").search()
print(f"成功: {r.success}, 本页 {len(r.list)} 条, is_end_page: {r.is_end_page}")
if r.list:
print(f"第一条: {json.dumps(r.list[0], ensure_ascii=False, indent=2)[:200]}...")
print("\n=== 2. 公司详情 ===")
r = GetBrandDetail(brand_id="02cd05cce753437e33V50w~~").fetch()
print(f"成功: {r.success}")
if r.data:
print(f"数据: {json.dumps(r.data, ensure_ascii=False, indent=2)[:300]}...")
print("\n=== 3. 公司职位列表 ===")
r = SearchBrandJobs(brand_id="02cd05cce753437e33V50w~~").search()
print(f"成功: {r.success}, 本页 {len(r.list)}")
# 注: 职位详情需要 security_id需要先从搜索结果中获取
print("\n=== 4. 职位详情(需要 security_id===")
if SearchRecJobs(city_code="101280600").search().list:
first_job = SearchRecJobs(city_code="101280600").search().list[0]
sid = first_job.get("securityId", "")
jid = first_job.get("encryptJobId", "")
if sid and jid:
r = GetJobDetail(security_id=sid, job_id=jid).fetch()
print(f"成功: {r.success}")
if r.data:
print(f"数据: {json.dumps(r.data, ensure_ascii=False, indent=2)[:300]}...")
else:
print("搜索结果中未找到 securityId/encryptJobId 字段")
else:
print("搜索结果为空,跳过")