Architecture clarification from user: spiderJobs/ is standalone execution,
NOT meant to be imported by app/. Correct dependency graph:
crawler_core ← shared base library
↑ ↑
spiderJobs app/services/crawler/
(standalone) (FastAPI backend, private layer)
Changes:
- boss.py/qcwy.py/zhilian.py: revert import back to private _boss_api etc.
- _boss/job51/zhilian_api.py: use crawler_core.base.Result/BaseFetcher/BaseSearcher
+ fix self._http → self.http_client
- _boss/job51/zhilian_client.py: use crawler_core.http_client.HTTPClient
+ _boss_client uses crawler_core.boss.sign.BossSign directly
- _boss/job51/zhilian_sign.py: backward-compat stubs → crawler_core.*.sign
Full regression: 106 passed in 0.68s
85 lines
2.9 KiB
Python
85 lines
2.9 KiB
Python
# ⚠️ DEPRECATED — 2026-03-21
|
|
# 此文件是内部手工复制文件,已废弃,不再由任何 facade 引用。
|
|
# 请改用 spiderJobs.platforms.* 或 crawler_core 中的对应模块。
|
|
# 将在下一里程碑中删除。
|
|
#
|
|
"""
|
|
智联招聘 HTTP 客户端
|
|
复制自 spiderJobs/platforms/zhilian/client.py — import 改为本地引用
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from typing import Any, Optional
|
|
|
|
from crawler_core.http_client import HTTPClient
|
|
from crawler_core.zhilian.sign import ZhilianSign
|
|
|
|
CGATE_BASE_URL = "https://cgate.zhaopin.com"
|
|
CAPI_BASE_URL = "https://capi.zhaopin.com"
|
|
|
|
ZHILIAN_HEADERS = {
|
|
"content-type": "application/json",
|
|
"user-agent": (
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 "
|
|
"(KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 "
|
|
"MicroMessenger/7.0.20.1781(0x6700143B) NetType/WIFI "
|
|
"MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.7(0x13080712) "
|
|
"UnifiedPCMacWechat(0xf2641702) XWEB/18788"
|
|
),
|
|
"accept": "*/*",
|
|
"sec-fetch-site": "cross-site",
|
|
"sec-fetch-mode": "cors",
|
|
"sec-fetch-dest": "empty",
|
|
"referer": "https://servicewechat.com/wxb7718fb9257e4fd2/647/page-frame.html",
|
|
"accept-language": "zh-CN,zh;q=0.9",
|
|
"accept-encoding": "identity",
|
|
}
|
|
|
|
|
|
class ZhilianClient(HTTPClient):
|
|
def __init__(
|
|
self,
|
|
base_url: str = CGATE_BASE_URL,
|
|
signer: Optional[ZhilianSign] = None,
|
|
proxy: Optional[str] = None,
|
|
proxy_pool: Optional[list[str]] = None,
|
|
timeout: int = 10,
|
|
):
|
|
super().__init__(
|
|
base_url=base_url,
|
|
default_headers=ZHILIAN_HEADERS,
|
|
proxy=proxy,
|
|
proxy_pool=proxy_pool,
|
|
timeout=timeout,
|
|
)
|
|
self.signer = signer or ZhilianSign()
|
|
|
|
def post(self, path: str, body: dict, headers: Optional[dict] = None, page_code: str = "0") -> tuple[int, Any]:
|
|
sign_headers = self.signer.sign_headers(page_code)
|
|
if headers:
|
|
sign_headers.update(headers)
|
|
return super().post(path, body, sign_headers)
|
|
|
|
def get(self, path: str, params: Optional[dict] = None, headers: Optional[dict] = None, page_code: str = "0") -> tuple[int, Any]:
|
|
sign_headers = self.signer.sign_headers(page_code)
|
|
if headers:
|
|
sign_headers.update(headers)
|
|
return super().get(path, params, sign_headers)
|
|
|
|
|
|
def create_cgate_client(
|
|
signer: Optional[ZhilianSign] = None,
|
|
proxy: Optional[str] = None,
|
|
proxy_pool: Optional[list[str]] = None,
|
|
) -> ZhilianClient:
|
|
return ZhilianClient(base_url=CGATE_BASE_URL, signer=signer, proxy=proxy, proxy_pool=proxy_pool)
|
|
|
|
|
|
def create_capi_client(
|
|
signer: Optional[ZhilianSign] = None,
|
|
proxy: Optional[str] = None,
|
|
proxy_pool: Optional[list[str]] = None,
|
|
) -> ZhilianClient:
|
|
return ZhilianClient(base_url=CAPI_BASE_URL, signer=signer, proxy=proxy, proxy_pool=proxy_pool)
|