- client.py: inherit crawler_core.http_client.HTTPClient, use crawler_core.boss.sign.BossSign - api.py: use crawler_core.base.Result/BaseFetcher/BaseSearcher, fix self._http -> self.http_client - main.py: import BaseFetcher/BaseSearcher and BossSign from crawler_core - sign.py: replace with backward-compat stub re-exporting BossSign from crawler_core Satisfies ARCH-03
341 lines
12 KiB
Python
341 lines
12 KiB
Python
"""
|
||
Boss直聘 - 所有 API 接口
|
||
每个类只负责参数构建,HTTP 和算法由 client / core 层处理
|
||
|
||
响应格式适配:
|
||
Boss 使用 code/zpData(区别于智联的 statusCode/data)
|
||
code=0 表示成功,zpData 为实际业务数据
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from typing import Any, Optional
|
||
from urllib.parse import urlencode
|
||
|
||
from crawler_core.base import BaseFetcher, BaseSearcher, Result
|
||
from spiderJobs.platforms.boss.client import BossClient, create_client
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# Boss 响应解析(覆写默认算法)
|
||
# ─────────────────────────────────────────────
|
||
|
||
def _parse_boss_response(http_code: int, raw: Any) -> Result:
|
||
"""
|
||
Boss 专用响应解析
|
||
|
||
Boss 响应格式:
|
||
{"code": 0, "message": "Success", "zpData": {...}}
|
||
code=0 成功,其他为业务错误
|
||
"""
|
||
if http_code != 200:
|
||
return Result(
|
||
success=False,
|
||
status_code=http_code,
|
||
error=f"HTTP 请求失败: {http_code}",
|
||
)
|
||
|
||
if not isinstance(raw, dict):
|
||
return Result(success=False, status_code=http_code, error="响应格式异常")
|
||
|
||
biz_code = raw.get("code", -1)
|
||
if biz_code != 0:
|
||
return Result(
|
||
success=False,
|
||
status_code=biz_code,
|
||
error=raw.get("message") or f"业务错误: {biz_code}",
|
||
)
|
||
|
||
payload = raw.get("zpData") or {}
|
||
|
||
# 列表型响应
|
||
if isinstance(payload, dict) and "jobList" in payload:
|
||
job_list = payload.get("jobList", [])
|
||
has_more = payload.get("hasMore", False)
|
||
return Result(
|
||
success=True, status_code=200, data=payload,
|
||
list=job_list,
|
||
count=len(job_list),
|
||
is_end_page=not has_more,
|
||
)
|
||
|
||
# 列表型响应(公司职位列表使用 list 字段)
|
||
if isinstance(payload, dict) and "list" in payload:
|
||
items = payload.get("list", [])
|
||
has_more = payload.get("hasMore", False)
|
||
return Result(
|
||
success=True, status_code=200, data=payload,
|
||
list=items,
|
||
count=len(items),
|
||
is_end_page=not has_more,
|
||
)
|
||
|
||
return Result(success=True, status_code=200, data=payload)
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# 1. 首页推荐职位列表(GET)
|
||
# ─────────────────────────────────────────────
|
||
|
||
class SearchRecJobs(BaseSearcher):
|
||
"""
|
||
首页推荐/搜索职位列表(无需登录)
|
||
|
||
api = SearchRecJobs(city_code="101280600")
|
||
result = api.search()
|
||
all_jobs = api.load_all(max_pages=5)
|
||
"""
|
||
ENDPOINT = "/wapi/zpgeek/miniapp/homepage/recjoblist.json"
|
||
|
||
def __init__(
|
||
self,
|
||
*,
|
||
city_code: str = "101280600",
|
||
sort_type: int = 1,
|
||
district_code: str = "",
|
||
blue_welfare: str = "",
|
||
encrypt_expect_id: str = "",
|
||
page_size: int = 15,
|
||
client: Optional[BossClient] = None,
|
||
):
|
||
super().__init__(page_size=page_size, http_client=client or create_client())
|
||
self.city_code = city_code
|
||
self.sort_type = sort_type
|
||
self.district_code = district_code
|
||
self.blue_welfare = blue_welfare
|
||
self.encrypt_expect_id = encrypt_expect_id
|
||
|
||
def _build_params(self, page_index: int) -> dict:
|
||
return {
|
||
"cityCode": self.city_code,
|
||
"sortType": self.sort_type,
|
||
"page": page_index,
|
||
"pageSize": self.page_size,
|
||
"encryptExpectId": self.encrypt_expect_id,
|
||
"districtCode": self.district_code,
|
||
"blueWelfare": self.blue_welfare,
|
||
"appId": 10002,
|
||
}
|
||
|
||
def _request(self, params: dict) -> tuple[int, Any]:
|
||
"""覆写为 GET 请求"""
|
||
return self.http_client.get(self.ENDPOINT, params)
|
||
|
||
def _parse(self, http_code: int, raw: Any) -> Result:
|
||
return _parse_boss_response(http_code, raw)
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# 2. 职位详情(通过 batch 接口)
|
||
# ─────────────────────────────────────────────
|
||
|
||
class GetJobDetail(BaseFetcher):
|
||
"""
|
||
职位详情(无需登录),通过 /wapi/batch/requests 批量请求
|
||
|
||
detail = GetJobDetail(
|
||
security_id="xxx",
|
||
job_id="92ea3c76f9197a1503Vz09q8EFRR",
|
||
lid="8uF4BIOMvBU.search.63",
|
||
).fetch()
|
||
"""
|
||
ENDPOINT = "/wapi/batch/requests"
|
||
|
||
def __init__(
|
||
self,
|
||
*,
|
||
security_id: str,
|
||
job_id: str,
|
||
lid: str = "",
|
||
source: int = 10,
|
||
client: Optional[BossClient] = None,
|
||
):
|
||
super().__init__(http_client=client or create_client())
|
||
self.security_id = security_id
|
||
self.job_id = job_id
|
||
self.lid = lid
|
||
self.source = source
|
||
|
||
def _build_params(self) -> dict:
|
||
"""不使用(batch 请求由 fetch 直接处理)"""
|
||
return {}
|
||
|
||
def fetch(self) -> Result:
|
||
"""覆写 fetch,使用 batch 接口"""
|
||
detail_query = urlencode({
|
||
"securityId": self.security_id,
|
||
"jobId": self.job_id,
|
||
"lid": self.lid,
|
||
"source": self.source,
|
||
})
|
||
improvement_query = urlencode({
|
||
"securityId": self.security_id,
|
||
"jobId": self.job_id,
|
||
"lid": self.lid,
|
||
})
|
||
sub_reqs = [
|
||
{
|
||
"path": "/wapi/zpgeek/miniapp/job/detail.json",
|
||
"method": "GET",
|
||
"query": detail_query,
|
||
},
|
||
{
|
||
"path": "/wapi/zpgeek/miniapp/jobdetail/improvement/query.json",
|
||
"method": "GET",
|
||
"query": improvement_query,
|
||
},
|
||
]
|
||
|
||
try:
|
||
client: BossClient = self.http_client
|
||
http_code, data = client.batch(sub_reqs)
|
||
except Exception as e:
|
||
return Result(success=False, status_code=-1, error=str(e))
|
||
|
||
return self._parse(http_code, data)
|
||
|
||
def _parse(self, http_code: int, raw: Any) -> Result:
|
||
"""解析 batch 响应,合并子请求结果"""
|
||
if http_code != 200:
|
||
return Result(success=False, status_code=http_code, error=f"HTTP 请求失败: {http_code}")
|
||
|
||
if not isinstance(raw, dict):
|
||
return Result(success=False, status_code=http_code, error="响应格式异常")
|
||
|
||
biz_code = raw.get("code", -1)
|
||
if biz_code != 0:
|
||
return Result(
|
||
success=False,
|
||
status_code=biz_code,
|
||
error=raw.get("message") or f"业务错误: {biz_code}",
|
||
)
|
||
|
||
zp_data = raw.get("zpData") or {}
|
||
# 合并两个子请求的数据
|
||
detail = zp_data.get("/wapi/zpgeek/miniapp/job/detail.json", {})
|
||
improvement = zp_data.get("/wapi/zpgeek/miniapp/jobdetail/improvement/query.json", {})
|
||
|
||
merged = {
|
||
"detail": detail.get("zpData") if isinstance(detail, dict) else detail,
|
||
"improvement": improvement.get("zpData") if isinstance(improvement, dict) else improvement,
|
||
}
|
||
return Result(success=True, status_code=200, data=merged)
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# 3. 公司/品牌详情(GET)
|
||
# ─────────────────────────────────────────────
|
||
|
||
class GetBrandDetail(BaseFetcher):
|
||
"""
|
||
公司/品牌详情(无需登录)
|
||
|
||
detail = GetBrandDetail(brand_id="02cd05cce753437e33V50w~~").fetch()
|
||
"""
|
||
ENDPOINT = "/wapi/zpgeek/miniapp/brand/detail.json"
|
||
|
||
def __init__(self, *, brand_id: str, client: Optional[BossClient] = None):
|
||
super().__init__(http_client=client or create_client())
|
||
self.brand_id = brand_id
|
||
|
||
def _build_params(self) -> dict:
|
||
return {"brandId": self.brand_id, "appId": 10002}
|
||
|
||
def _parse(self, http_code: int, raw: Any) -> Result:
|
||
return _parse_boss_response(http_code, raw)
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# 4. 公司职位列表(GET)
|
||
# ─────────────────────────────────────────────
|
||
|
||
class SearchBrandJobs(BaseSearcher):
|
||
"""
|
||
公司在招职位列表(无需登录)
|
||
|
||
api = SearchBrandJobs(brand_id="02cd05cce753437e33V50w~~")
|
||
result = api.search()
|
||
all_jobs = api.load_all(max_pages=3)
|
||
"""
|
||
ENDPOINT = "/wapi/zpgeek/miniapp/brand/joblist.json"
|
||
|
||
def __init__(
|
||
self,
|
||
*,
|
||
brand_id: str,
|
||
query: str = "",
|
||
position_lv1: int = 0,
|
||
city: str = "",
|
||
experience: str = "",
|
||
salary: str = "",
|
||
page_size: int = 15,
|
||
client: Optional[BossClient] = None,
|
||
):
|
||
super().__init__(page_size=page_size, http_client=client or create_client())
|
||
self.brand_id = brand_id
|
||
self.query = query
|
||
self.position_lv1 = position_lv1
|
||
self.city = city
|
||
self.experience = experience
|
||
self.salary = salary
|
||
|
||
def _build_params(self, page_index: int) -> dict:
|
||
return {
|
||
"brandId": self.brand_id,
|
||
"query": self.query,
|
||
"page": page_index,
|
||
"hasMore": "true",
|
||
"positionLv1": self.position_lv1,
|
||
"city": self.city,
|
||
"experience": self.experience,
|
||
"salary": self.salary,
|
||
"appId": 10002,
|
||
}
|
||
|
||
def _request(self, params: dict) -> tuple[int, Any]:
|
||
"""覆写为 GET 请求"""
|
||
return self.http_client.get(self.ENDPOINT, params)
|
||
|
||
def _parse(self, http_code: int, raw: Any) -> Result:
|
||
return _parse_boss_response(http_code, raw)
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# 使用示例
|
||
# ─────────────────────────────────────────────
|
||
|
||
if __name__ == "__main__":
|
||
import json
|
||
|
||
print("=== 1. 首页推荐职位 ===")
|
||
r = SearchRecJobs(city_code="101280600").search()
|
||
print(f"成功: {r.success}, 本页 {len(r.list)} 条, is_end_page: {r.is_end_page}")
|
||
if r.list:
|
||
print(f"第一条: {json.dumps(r.list[0], ensure_ascii=False, indent=2)[:200]}...")
|
||
|
||
print("\n=== 2. 公司详情 ===")
|
||
r = GetBrandDetail(brand_id="02cd05cce753437e33V50w~~").fetch()
|
||
print(f"成功: {r.success}")
|
||
if r.data:
|
||
print(f"数据: {json.dumps(r.data, ensure_ascii=False, indent=2)[:300]}...")
|
||
|
||
print("\n=== 3. 公司职位列表 ===")
|
||
r = SearchBrandJobs(brand_id="02cd05cce753437e33V50w~~").search()
|
||
print(f"成功: {r.success}, 本页 {len(r.list)} 条")
|
||
|
||
# 注: 职位详情需要 security_id,需要先从搜索结果中获取
|
||
print("\n=== 4. 职位详情(需要 security_id)===")
|
||
if SearchRecJobs(city_code="101280600").search().list:
|
||
first_job = SearchRecJobs(city_code="101280600").search().list[0]
|
||
sid = first_job.get("securityId", "")
|
||
jid = first_job.get("encryptJobId", "")
|
||
if sid and jid:
|
||
r = GetJobDetail(security_id=sid, job_id=jid).fetch()
|
||
print(f"成功: {r.success}")
|
||
if r.data:
|
||
print(f"数据: {json.dumps(r.data, ensure_ascii=False, indent=2)[:300]}...")
|
||
else:
|
||
print("搜索结果中未找到 securityId/encryptJobId 字段")
|
||
else:
|
||
print("搜索结果为空,跳过")
|