""" 前程无忧 (51Job) HTTP 客户端 在通用 HTTPClient 上叠加 51job 特有的 sign 签名和默认 headers 与 Boss/智联不同,51job 的 sign 依赖完整的 URL path + body, 因此需要在 post/get 方法中先构造签名再拼接最终 URL。 """ from __future__ import annotations import json from typing import Any, Optional from urllib.parse import quote from crawler_core.http_client import HTTPClient from crawler_core.qcwy.sign import Job51Sign BASE_URL = "https://cupid.51job.com" # 51job 小程序特有的默认请求头 JOB51_HEADERS = { "user-agent": ( "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 " "(KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 " "MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI " "MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.7(0x13080712) " "UnifiedPCMacWechat(0xf2641702) XWEB/18788" ), "xweb_xhr": "1", "from-domain": "51job_weixin_wxapp", "sec-fetch-site": "cross-site", "sec-fetch-mode": "cors", "sec-fetch-dest": "empty", "referer": "https://servicewechat.com/wx1131e5c71e668b5d/426/page-frame.html", "accept-language": "zh-CN,zh;q=0.9", "priority": "u=1, i", } class Job51Client(HTTPClient): """ 前程无忧 HTTP 客户端 继承通用 HTTPClient,每次请求自动计算 HMAC-SHA256 签名 Args: signer: Job51Sign 实例(可选) tunnel_proxy: 隧道代理地址(每次请求自动换 IP) proxy: 固定代理地址 proxy_pool: 代理池列表 timeout: 请求超时秒数 """ def __init__( self, signer: Optional[Job51Sign] = None, tunnel_proxy: Optional[str] = None, proxy: Optional[str] = None, proxy_pool: Optional[list[str]] = None, timeout: int = 10, ): super().__init__( base_url=BASE_URL, default_headers=JOB51_HEADERS, tunnel_proxy=tunnel_proxy, proxy=proxy, proxy_pool=proxy_pool, timeout=timeout, ) self.signer = signer or Job51Sign() self._uuid = Job51Sign.generate_uuid() def _job51_headers(self, sign: str) -> dict: """构造每次请求的 51job 特有 headers""" property_obj = { "frompageUrl": "", "pageUrl": "pages/index/index", "isLogin": "否", "accountid": "", "resumeId": "", "firstFrompageUrl": "", "distinct_id": self._uuid, } return { "sign": sign, "partner": "", "property": quote(json.dumps(property_obj, ensure_ascii=False, separators=(",", ":")), safe=""), "uuid": self._uuid, "user-token": "", "account-id": "", } def post(self, path: str, body: dict, headers: Optional[dict] = None) -> tuple[int, Any]: """ POST 请求,自动计算签名 注意: path 参数为 endpoint(如 open/noauth/recommend/job-tab-dynamic-wx-mini) 签名后会拼为 /endpoint?api_key=51job×tamp=xxx 关键: body 必须以 compact JSON 发送(无空格),与签名字符串完全一致 不能使用 requests 的 json= 参数(会用默认带空格的序列化) """ url_path, sign = self.signer.build_sign_path(path, "POST", body=body) job51_h = self._job51_headers(sign) job51_h["Content-Type"] = "application/json" if headers: job51_h.update(headers) # 必须用 compact JSON(与签名一致),通过 _post_raw 发送预序列化 body raw_body = json.dumps(body, ensure_ascii=False, separators=(",", ":")) return self._post_raw(url_path, raw_body, job51_h) def _post_raw(self, path: str, raw_body: str, headers: dict) -> tuple[int, Any]: """发送预序列化的 POST 请求(data= 而非 json=)""" merged_headers = self._merge_headers(headers) url = f"{self.base_url}{path}" if self._tunnel_proxy: import requests_go as requests s = self._new_session() try: resp = s.post( url, data=raw_body.encode("utf-8"), headers=merged_headers, proxies={"http": self._tunnel_proxy, "https": self._tunnel_proxy}, timeout=self.timeout, ) return resp.status_code, resp.json() finally: s.close() proxies = self._get_proxies() kwargs: dict[str, Any] = { "data": raw_body.encode("utf-8"), "headers": merged_headers, "timeout": self.timeout, } if proxies: kwargs["proxies"] = proxies resp = self._session.post(url, **kwargs) return resp.status_code, resp.json() def get(self, path: str, params: Optional[dict] = None, headers: Optional[dict] = None) -> tuple[int, Any]: """ GET 请求,自动计算签名 注意: params 会被编入签名路径的 query string 中 """ url_path, sign = self.signer.build_sign_path(path, "GET", params=params) job51_h = self._job51_headers(sign) job51_h["content-type"] = "application/x-www-form-urlencoded" if headers: job51_h.update(headers) # GET 参数已经编入 url_path,不再传 params return super().get(url_path, params=None, headers=job51_h) def create_client( signer: Optional[Job51Sign] = None, tunnel_proxy: Optional[str] = None, proxy: Optional[str] = None, proxy_pool: Optional[list[str]] = None, ) -> Job51Client: """创建 51job 客户端""" return Job51Client(signer=signer, tunnel_proxy=tunnel_proxy, proxy=proxy, proxy_pool=proxy_pool)