""" Boss直聘 - 所有 API 接口 每个类只负责参数构建,HTTP 和算法由 client / core 层处理 响应格式适配: Boss 使用 code/zpData(区别于智联的 statusCode/data) code=0 表示成功,zpData 为实际业务数据 """ from __future__ import annotations from typing import Any, Optional from urllib.parse import urlencode from crawler_core.base import BaseFetcher, BaseSearcher, Result from spiderJobs.platforms.boss.client import BossClient, create_client # ───────────────────────────────────────────── # Boss 响应解析(覆写默认算法) # ───────────────────────────────────────────── def _parse_boss_response(http_code: int, raw: Any) -> Result: """ Boss 专用响应解析 Boss 响应格式: {"code": 0, "message": "Success", "zpData": {...}} code=0 成功,其他为业务错误 """ if http_code != 200: return Result( success=False, status_code=http_code, error=f"HTTP 请求失败: {http_code}", ) if not isinstance(raw, dict): return Result(success=False, status_code=http_code, error="响应格式异常") biz_code = raw.get("code", -1) if biz_code != 0: return Result( success=False, status_code=biz_code, error=raw.get("message") or f"业务错误: {biz_code}", ) payload = raw.get("zpData") or {} # 列表型响应 if isinstance(payload, dict) and "jobList" in payload: job_list = payload.get("jobList", []) has_more = payload.get("hasMore", False) return Result( success=True, status_code=200, data=payload, list=job_list, count=len(job_list), is_end_page=not has_more, ) # 列表型响应(公司职位列表使用 list 字段) if isinstance(payload, dict) and "list" in payload: items = payload.get("list", []) has_more = payload.get("hasMore", False) return Result( success=True, status_code=200, data=payload, list=items, count=len(items), is_end_page=not has_more, ) return Result(success=True, status_code=200, data=payload) # ───────────────────────────────────────────── # 1. 首页推荐职位列表(GET) # ───────────────────────────────────────────── class SearchRecJobs(BaseSearcher): """ 首页推荐/搜索职位列表(无需登录) api = SearchRecJobs(city_code="101280600") result = api.search() all_jobs = api.load_all(max_pages=5) """ ENDPOINT = "/wapi/zpgeek/miniapp/homepage/recjoblist.json" def __init__( self, *, city_code: str = "101280600", sort_type: int = 1, district_code: str = "", blue_welfare: str = "", encrypt_expect_id: str = "", page_size: int = 15, client: Optional[BossClient] = None, ): super().__init__(page_size=page_size, http_client=client or create_client()) self.city_code = city_code self.sort_type = sort_type self.district_code = district_code self.blue_welfare = blue_welfare self.encrypt_expect_id = encrypt_expect_id def _build_params(self, page_index: int) -> dict: return { "cityCode": self.city_code, "sortType": self.sort_type, "page": page_index, "pageSize": self.page_size, "encryptExpectId": self.encrypt_expect_id, "districtCode": self.district_code, "blueWelfare": self.blue_welfare, "appId": 10002, } def _request(self, params: dict) -> tuple[int, Any]: """覆写为 GET 请求""" return self.http_client.get(self.ENDPOINT, params) def _parse(self, http_code: int, raw: Any) -> Result: return _parse_boss_response(http_code, raw) # ───────────────────────────────────────────── # 2. 职位详情(通过 batch 接口) # ───────────────────────────────────────────── class GetJobDetail(BaseFetcher): """ 职位详情(无需登录),通过 /wapi/batch/requests 批量请求 detail = GetJobDetail( security_id="xxx", job_id="92ea3c76f9197a1503Vz09q8EFRR", lid="8uF4BIOMvBU.search.63", ).fetch() """ ENDPOINT = "/wapi/batch/requests" def __init__( self, *, security_id: str, job_id: str, lid: str = "", source: int = 10, client: Optional[BossClient] = None, ): super().__init__(http_client=client or create_client()) self.security_id = security_id self.job_id = job_id self.lid = lid self.source = source def _build_params(self) -> dict: """不使用(batch 请求由 fetch 直接处理)""" return {} def fetch(self) -> Result: """覆写 fetch,使用 batch 接口""" detail_query = urlencode({ "securityId": self.security_id, "jobId": self.job_id, "lid": self.lid, "source": self.source, }) improvement_query = urlencode({ "securityId": self.security_id, "jobId": self.job_id, "lid": self.lid, }) sub_reqs = [ { "path": "/wapi/zpgeek/miniapp/job/detail.json", "method": "GET", "query": detail_query, }, { "path": "/wapi/zpgeek/miniapp/jobdetail/improvement/query.json", "method": "GET", "query": improvement_query, }, ] try: client: BossClient = self.http_client http_code, data = client.batch(sub_reqs) except Exception as e: return Result(success=False, status_code=-1, error=str(e)) return self._parse(http_code, data) def _parse(self, http_code: int, raw: Any) -> Result: """解析 batch 响应,合并子请求结果""" if http_code != 200: return Result(success=False, status_code=http_code, error=f"HTTP 请求失败: {http_code}") if not isinstance(raw, dict): return Result(success=False, status_code=http_code, error="响应格式异常") biz_code = raw.get("code", -1) if biz_code != 0: return Result( success=False, status_code=biz_code, error=raw.get("message") or f"业务错误: {biz_code}", ) zp_data = raw.get("zpData") or {} # 合并两个子请求的数据 detail = zp_data.get("/wapi/zpgeek/miniapp/job/detail.json", {}) improvement = zp_data.get("/wapi/zpgeek/miniapp/jobdetail/improvement/query.json", {}) merged = { "detail": detail.get("zpData") if isinstance(detail, dict) else detail, "improvement": improvement.get("zpData") if isinstance(improvement, dict) else improvement, } return Result(success=True, status_code=200, data=merged) # ───────────────────────────────────────────── # 3. 公司/品牌详情(GET) # ───────────────────────────────────────────── class GetBrandDetail(BaseFetcher): """ 公司/品牌详情(无需登录) detail = GetBrandDetail(brand_id="02cd05cce753437e33V50w~~").fetch() """ ENDPOINT = "/wapi/zpgeek/miniapp/brand/detail.json" def __init__(self, *, brand_id: str, client: Optional[BossClient] = None): super().__init__(http_client=client or create_client()) self.brand_id = brand_id def _build_params(self) -> dict: return {"brandId": self.brand_id, "appId": 10002} def _parse(self, http_code: int, raw: Any) -> Result: return _parse_boss_response(http_code, raw) # ───────────────────────────────────────────── # 4. 公司职位列表(GET) # ───────────────────────────────────────────── class SearchBrandJobs(BaseSearcher): """ 公司在招职位列表(无需登录) api = SearchBrandJobs(brand_id="02cd05cce753437e33V50w~~") result = api.search() all_jobs = api.load_all(max_pages=3) """ ENDPOINT = "/wapi/zpgeek/miniapp/brand/joblist.json" def __init__( self, *, brand_id: str, query: str = "", position_lv1: int = 0, city: str = "", experience: str = "", salary: str = "", page_size: int = 15, client: Optional[BossClient] = None, ): super().__init__(page_size=page_size, http_client=client or create_client()) self.brand_id = brand_id self.query = query self.position_lv1 = position_lv1 self.city = city self.experience = experience self.salary = salary def _build_params(self, page_index: int) -> dict: return { "brandId": self.brand_id, "query": self.query, "page": page_index, "hasMore": "true", "positionLv1": self.position_lv1, "city": self.city, "experience": self.experience, "salary": self.salary, "appId": 10002, } def _request(self, params: dict) -> tuple[int, Any]: """覆写为 GET 请求""" return self.http_client.get(self.ENDPOINT, params) def _parse(self, http_code: int, raw: Any) -> Result: return _parse_boss_response(http_code, raw) # ───────────────────────────────────────────── # 使用示例 # ───────────────────────────────────────────── if __name__ == "__main__": import json print("=== 1. 首页推荐职位 ===") r = SearchRecJobs(city_code="101280600").search() print(f"成功: {r.success}, 本页 {len(r.list)} 条, is_end_page: {r.is_end_page}") if r.list: print(f"第一条: {json.dumps(r.list[0], ensure_ascii=False, indent=2)[:200]}...") print("\n=== 2. 公司详情 ===") r = GetBrandDetail(brand_id="02cd05cce753437e33V50w~~").fetch() print(f"成功: {r.success}") if r.data: print(f"数据: {json.dumps(r.data, ensure_ascii=False, indent=2)[:300]}...") print("\n=== 3. 公司职位列表 ===") r = SearchBrandJobs(brand_id="02cd05cce753437e33V50w~~").search() print(f"成功: {r.success}, 本页 {len(r.list)} 条") # 注: 职位详情需要 security_id,需要先从搜索结果中获取 print("\n=== 4. 职位详情(需要 security_id)===") if SearchRecJobs(city_code="101280600").search().list: first_job = SearchRecJobs(city_code="101280600").search().list[0] sid = first_job.get("securityId", "") jid = first_job.get("encryptJobId", "") if sid and jid: r = GetJobDetail(security_id=sid, job_id=jid).fetch() print(f"成功: {r.success}") if r.data: print(f"数据: {json.dumps(r.data, ensure_ascii=False, indent=2)[:300]}...") else: print("搜索结果中未找到 securityId/encryptJobId 字段") else: print("搜索结果为空,跳过")