""" Boss直聘 HTTP 客户端 在通用 HTTPClient 上叠加 Boss 特有的 headers 和 Traceid 注入 """ from __future__ import annotations from typing import Any, Optional from crawler_core.http_client import HTTPClient from crawler_core.boss.sign import BossSign BASE_URL = "https://www.zhipin.com" # Boss 小程序特有的默认请求头 BOSS_HEADERS = { "content-type": "application/x-www-form-urlencoded", "user-agent": ( "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 " "MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI " "MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.7(0x13080712) " "UnifiedPCMacWechat(0xf2641702) XWEB/18788" ), "x-requested-with": "XMLHttpRequest", "xweb_xhr": "1", "zp_app_id": "10002", "zp_product_id": "10002", "ver": "14.0400", "mini_ver": "14.0400", "platform": "zhipin/mac", "ua": '{"model":"Mac16,8","platform":"mac"}', "scene": "1256", "sec-fetch-site": "cross-site", "sec-fetch-mode": "cors", "sec-fetch-dest": "empty", "referer": "https://servicewechat.com/wxa8da525af05281f3/601/page-frame.html", "accept-language": "zh-CN,zh;q=0.9", } class BossClient(HTTPClient): """ Boss直聘 HTTP 客户端 继承通用 HTTPClient,每次请求自动注入 Traceid Args: signer: BossSign 实例(可选) tunnel_proxy: 隧道代理地址(每次请求自动换 IP) proxy: 固定代理地址 proxy_pool: 代理池列表 timeout: 请求超时秒数 """ def __init__( self, signer: Optional[BossSign] = None, tunnel_proxy: Optional[str] = None, proxy: Optional[str] = None, proxy_pool: Optional[list[str]] = None, timeout: int = 10, ): super().__init__( base_url=BASE_URL, default_headers=BOSS_HEADERS, tunnel_proxy=tunnel_proxy, proxy=proxy, proxy_pool=proxy_pool, timeout=timeout, ) self.signer = signer or BossSign() def _boss_headers(self) -> dict: """构造每次请求需要动态更新的 Boss 请求头""" return { "mpt": self.signer.mpt, "wt2": self.signer.wt2, "Traceid": BossSign.generate_traceid("M-W"), } def post(self, path: str, body: dict, headers: Optional[dict] = None) -> tuple[int, Any]: """POST 请求,自动注入 Boss headers""" boss_h = self._boss_headers() if headers: boss_h.update(headers) return super().post(path, body, boss_h) def get(self, path: str, params: Optional[dict] = None, headers: Optional[dict] = None) -> tuple[int, Any]: """GET 请求,自动注入 Boss headers""" boss_h = self._boss_headers() if headers: boss_h.update(headers) return super().get(path, params, boss_h) def batch(self, sub_reqs: list[dict]) -> tuple[int, Any]: """ 批量请求 /wapi/batch/requests Args: sub_reqs: 子请求列表, 每个元素格式: {"path": "/wapi/...", "method": "GET", "query": "key=val&..."} Returns: (http_code, response_json) """ body = {"subReqs": sub_reqs, "appId": 10002} return self.post( "/wapi/batch/requests", body, headers={"content-type": "application/json"}, ) def create_client( signer: Optional[BossSign] = None, tunnel_proxy: Optional[str] = None, proxy: Optional[str] = None, proxy_pool: Optional[list[str]] = None, ) -> BossClient: """创建 Boss 客户端""" return BossClient(signer=signer, tunnel_proxy=tunnel_proxy, proxy=proxy, proxy_pool=proxy_pool)