job51 (spiderJobs/platforms/job51/): - client.py: HTTPClient+Job51Sign from crawler_core - api.py: ApiResult→Result, self._http→self.http_client, _request() POST overrides - main.py: BaseFetcher/BaseSearcher from crawler_core - sign.py: backward-compatible stub re-exporting crawler_core.qcwy.sign.Job51Sign zhilian (spiderJobs/platforms/zhilian/): - client.py: HTTPClient+ZhilianSign from crawler_core - api.py: add _parse_zhilian_response (HTTP 200=success), add _parse()/_request() to all classes (GET fetchers + POST searcher overrides) - main.py: BaseFetcher/BaseSearcher from crawler_core - sign.py: backward-compatible stub re-exporting crawler_core.zhilian.sign.ZhilianSign tests: 34 new mock tests (17 job51 + 17 zhilian) Full regression: 98 passed (job51:17 + zhilian:17 + boss:22 + crawler_core:41 + 1)
99 lines
3.2 KiB
Python
99 lines
3.2 KiB
Python
"""
|
||
智联招聘 HTTP 客户端
|
||
在通用 HTTPClient 上叠加智联特有的签名和默认 headers
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from typing import Any, Optional
|
||
|
||
from crawler_core.http_client import HTTPClient
|
||
from crawler_core.zhilian.sign import ZhilianSign
|
||
|
||
CGATE_BASE_URL = "https://cgate.zhaopin.com"
|
||
CAPI_BASE_URL = "https://capi.zhaopin.com"
|
||
|
||
# 智联特有的默认请求头(不含签名部分)
|
||
ZHILIAN_HEADERS = {
|
||
"content-type": "application/json",
|
||
"user-agent": (
|
||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 "
|
||
"(KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 "
|
||
"MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI "
|
||
"MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.7(0x13080712) "
|
||
"UnifiedPCMacWechat(0xf2641702) XWEB/18788"
|
||
),
|
||
"accept": "*/*",
|
||
"sec-fetch-site": "cross-site",
|
||
"sec-fetch-mode": "cors",
|
||
"sec-fetch-dest": "empty",
|
||
"referer": "https://servicewechat.com/wxb7718fb9257e4fd2/647/page-frame.html",
|
||
"accept-language": "zh-CN,zh;q=0.9",
|
||
"accept-encoding": "identity",
|
||
}
|
||
|
||
|
||
class ZhilianClient(HTTPClient):
|
||
"""
|
||
智联招聘 HTTP 客户端
|
||
|
||
继承通用 HTTPClient,自动注入智联签名
|
||
|
||
Args:
|
||
base_url: API 基础地址(默认 cgate)
|
||
signer: ZhilianSign 签名实例(可选)
|
||
proxy: 固定代理地址
|
||
proxy_pool: 代理池列表
|
||
timeout: 请求超时秒数
|
||
"""
|
||
|
||
def __init__(
|
||
self,
|
||
base_url: str = CGATE_BASE_URL,
|
||
signer: Optional[ZhilianSign] = None,
|
||
proxy: Optional[str] = None,
|
||
proxy_pool: Optional[list[str]] = None,
|
||
timeout: int = 10,
|
||
):
|
||
super().__init__(
|
||
base_url=base_url,
|
||
default_headers=ZHILIAN_HEADERS,
|
||
proxy=proxy,
|
||
proxy_pool=proxy_pool,
|
||
timeout=timeout,
|
||
)
|
||
self.signer = signer or ZhilianSign()
|
||
|
||
def post(self, path: str, body: dict, headers: Optional[dict] = None, page_code: str = "0") -> tuple[int, Any]:
|
||
"""POST 请求,自动注入签名头"""
|
||
sign_headers = self.signer.sign_headers(page_code)
|
||
if headers:
|
||
sign_headers.update(headers)
|
||
return super().post(path, body, sign_headers)
|
||
|
||
def get(self, path: str, params: Optional[dict] = None, headers: Optional[dict] = None, page_code: str = "0") -> tuple[int, Any]:
|
||
"""GET 请求,自动注入签名头"""
|
||
sign_headers = self.signer.sign_headers(page_code)
|
||
if headers:
|
||
sign_headers.update(headers)
|
||
return super().get(path, params, sign_headers)
|
||
|
||
|
||
def create_cgate_client(
|
||
signer: Optional[ZhilianSign] = None,
|
||
proxy: Optional[str] = None,
|
||
proxy_pool: Optional[list[str]] = None,
|
||
) -> ZhilianClient:
|
||
"""创建 cgate 客户端"""
|
||
return ZhilianClient(base_url=CGATE_BASE_URL, signer=signer, proxy=proxy, proxy_pool=proxy_pool)
|
||
|
||
|
||
def create_capi_client(
|
||
signer: Optional[ZhilianSign] = None,
|
||
proxy: Optional[str] = None,
|
||
proxy_pool: Optional[list[str]] = None,
|
||
) -> ZhilianClient:
|
||
"""创建 capi 客户端"""
|
||
return ZhilianClient(base_url=CAPI_BASE_URL, signer=signer, proxy=proxy, proxy_pool=proxy_pool)
|
||
|