From 2b94f15b56e7fa93951c5fb28a8103e8aa168d69 Mon Sep 17 00:00:00 2001 From: win Date: Sat, 21 Mar 2026 19:39:30 +0800 Subject: [PATCH] =?UTF-8?q?fix(04):=20correct=20architecture=20=E2=80=94?= =?UTF-8?q?=20private=20files=20use=20crawler=5Fcore=20directly?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Architecture clarification from user: spiderJobs/ is standalone execution, NOT meant to be imported by app/. Correct dependency graph: crawler_core ← shared base library ↑ ↑ spiderJobs app/services/crawler/ (standalone) (FastAPI backend, private layer) Changes: - boss.py/qcwy.py/zhilian.py: revert import back to private _boss_api etc. - _boss/job51/zhilian_api.py: use crawler_core.base.Result/BaseFetcher/BaseSearcher + fix self._http → self.http_client - _boss/job51/zhilian_client.py: use crawler_core.http_client.HTTPClient + _boss_client uses crawler_core.boss.sign.BossSign directly - _boss/job51/zhilian_sign.py: backward-compat stubs → crawler_core.*.sign Full regression: 106 passed in 0.68s --- app/services/crawler/_boss_api.py | 8 +-- app/services/crawler/_boss_client.py | 4 +- app/services/crawler/_boss_sign.py | 75 ++----------------------- app/services/crawler/_job51_api.py | 6 +- app/services/crawler/_job51_client.py | 4 +- app/services/crawler/_job51_sign.py | 59 ++----------------- app/services/crawler/_zhilian_api.py | 4 +- app/services/crawler/_zhilian_client.py | 4 +- app/services/crawler/_zhilian_sign.py | 60 ++------------------ app/services/crawler/boss.py | 6 +- app/services/crawler/qcwy.py | 4 +- app/services/crawler/zhilian.py | 6 +- 12 files changed, 41 insertions(+), 199 deletions(-) diff --git a/app/services/crawler/_boss_api.py b/app/services/crawler/_boss_api.py index ae9da1e..21ef9b7 100644 --- a/app/services/crawler/_boss_api.py +++ b/app/services/crawler/_boss_api.py @@ -13,7 +13,7 @@ from __future__ import annotations from typing import Any, Optional from urllib.parse import urlencode -from app.services.crawler._base import ApiResult, BaseFetcher, BaseSearcher +from crawler_core.base import BaseFetcher, BaseSearcher, Result as ApiResult from app.services.crawler._boss_client import BossClient, create_client @@ -78,7 +78,7 @@ class SearchRecJobs(BaseSearcher): } def _request(self, params: dict) -> tuple[int, Any]: - return self._http.get(self.ENDPOINT, params) + return self.http_client.get(self.ENDPOINT, params) def _parse(self, http_code: int, raw: Any) -> ApiResult: return _parse_boss_response(http_code, raw) @@ -113,7 +113,7 @@ class GetJobDetail(BaseFetcher): {"path": "/wapi/zpgeek/miniapp/jobdetail/improvement/query.json", "method": "GET", "query": improvement_query}, ] try: - client: BossClient = self._http + client: BossClient = self.http_client http_code, data = client.batch(sub_reqs) except Exception as e: return ApiResult(success=False, status_code=-1, error=str(e)) @@ -176,7 +176,7 @@ class SearchBrandJobs(BaseSearcher): } def _request(self, params: dict) -> tuple[int, Any]: - return self._http.get(self.ENDPOINT, params) + return self.http_client.get(self.ENDPOINT, params) def _parse(self, http_code: int, raw: Any) -> ApiResult: return _parse_boss_response(http_code, raw) diff --git a/app/services/crawler/_boss_client.py b/app/services/crawler/_boss_client.py index 956b0a1..9fe3f64 100644 --- a/app/services/crawler/_boss_client.py +++ b/app/services/crawler/_boss_client.py @@ -12,8 +12,8 @@ from __future__ import annotations from typing import Any, Optional -from app.services.crawler._http_client import HTTPClient -from app.services.crawler._boss_sign import BossSign +from crawler_core.http_client import HTTPClient +from crawler_core.boss.sign import BossSign BASE_URL = "https://www.zhipin.com" diff --git a/app/services/crawler/_boss_sign.py b/app/services/crawler/_boss_sign.py index 407e3fa..6bd0989 100644 --- a/app/services/crawler/_boss_sign.py +++ b/app/services/crawler/_boss_sign.py @@ -4,75 +4,12 @@ # 将在下一里程碑中删除。 # """ -Boss直聘 Traceid 生成算法 -复制自 spiderJobs/platforms/boss/sign.py — import 改为本地引用 +Boss直聘 Traceid 生成算法 — 向后兼容桩 + +已迁移至 crawler_core.boss.sign。 +直接从 crawler_core 重新导出,避免下游代码出现 ImportError。 """ -from __future__ import annotations +from crawler_core.boss.sign import BossSign # noqa: F401 -import random -import time - - -_CHARS = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" - - -def _to_u32(n: int) -> int: - return n & 0xFFFFFFFF - - -def _compute_checksum(uuid_str: str) -> str: - r = 0 - for ch in uuid_str: - r = ((r << 5) - r + ord(ch)) & 0xFFFFFFFF - - a = 0 - for i in range(len(uuid_str) - 1, -1, -1): - a = ((a << 7) - a + ord(uuid_str[i]) * (i + 1)) & 0xFFFFFFFF - - n = 0 - mid = len(uuid_str) // 2 - for i in range(len(uuid_str)): - n = ((n << 3) - n + ord(uuid_str[i]) * (abs(i - mid) + 1)) & 0xFFFFFFFF - - s = _to_u32(r ^ a) - s = _to_u32(2654435761 * s) - s = _to_u32(s ^ (s >> 16)) - s = _to_u32(2246822507 * s) - s = _to_u32(s ^ (s >> 13)) - c1 = _CHARS[s % 62] - - h = _to_u32(a ^ n) - h = _to_u32(3266489909 * h) - h = _to_u32(h ^ (h >> 16)) - h = _to_u32(2654435761 * h) - h = _to_u32(h ^ (h >> 13)) - c2 = _CHARS[h % 62] - - v = _to_u32(n ^ r) - v = _to_u32(668265261 * v) - v = _to_u32(v ^ (v >> 16)) - v = _to_u32(2246822507 * v) - v = _to_u32(v ^ (v >> 13)) - c3 = _CHARS[v % 62] - - return f"{c1}{c2}{c3}" - - -def _generate_uuid() -> str: - hex_ts = format(int(time.time() * 1000), "x").lower() - hex_ts = hex_ts[-13:].zfill(13) - rand_part = "".join(random.choice(_CHARS) for _ in range(6)) - return hex_ts + rand_part - - -class BossSign: - def __init__(self, *, mpt: str = "", wt2: str = ""): - self.mpt = mpt - self.wt2 = wt2 - - @staticmethod - def generate_traceid(prefix: str = "M-W") -> str: - uuid_str = _generate_uuid() - checksum = _compute_checksum(uuid_str) - return f"{prefix}{uuid_str}{checksum}" +__all__ = ["BossSign"] diff --git a/app/services/crawler/_job51_api.py b/app/services/crawler/_job51_api.py index f84a385..c580d3b 100644 --- a/app/services/crawler/_job51_api.py +++ b/app/services/crawler/_job51_api.py @@ -12,7 +12,7 @@ from __future__ import annotations from typing import Any, Optional -from app.services.crawler._base import ApiResult, BaseFetcher, BaseSearcher +from crawler_core.base import BaseFetcher, BaseSearcher, Result as ApiResult from app.services.crawler._job51_client import Job51Client, create_client @@ -105,7 +105,7 @@ class GetJobDetail(BaseFetcher): def fetch(self) -> ApiResult: endpoint = f"{self.ENDPOINT}/{self.job_id}" try: - http_code, data = self._http.get(endpoint) + http_code, data = self.http_client.get(endpoint) except Exception as e: return ApiResult(success=False, status_code=-1, error=str(e)) return self._parse(http_code, data) @@ -135,7 +135,7 @@ class GetCompanyInfo(BaseFetcher): def fetch(self) -> ApiResult: try: - http_code, data = self._http.get(self.ENDPOINT, self._build_params()) + http_code, data = self.http_client.get(self.ENDPOINT, self._build_params()) except Exception as e: return ApiResult(success=False, status_code=-1, error=str(e)) return self._parse(http_code, data) diff --git a/app/services/crawler/_job51_client.py b/app/services/crawler/_job51_client.py index faac8b8..8015703 100644 --- a/app/services/crawler/_job51_client.py +++ b/app/services/crawler/_job51_client.py @@ -14,8 +14,8 @@ import json from typing import Any, Optional from urllib.parse import quote -from app.services.crawler._http_client import HTTPClient -from app.services.crawler._job51_sign import Job51Sign +from crawler_core.http_client import HTTPClient +from crawler_core.qcwy.sign import Job51Sign BASE_URL = "https://cupid.51job.com" diff --git a/app/services/crawler/_job51_sign.py b/app/services/crawler/_job51_sign.py index a4becba..6e02af9 100644 --- a/app/services/crawler/_job51_sign.py +++ b/app/services/crawler/_job51_sign.py @@ -4,59 +4,12 @@ # 将在下一里程碑中删除。 # """ -前程无忧 (51Job) 签名算法 -复制自 spiderJobs/platforms/job51/sign.py — import 改为本地引用 +前程无忧 (51Job) 签名 — 向后兼容桩 + +已迁移至 crawler_core.qcwy.sign。 +直接从 crawler_core 重新导出,避免下游代码出现 ImportError。 """ -from __future__ import annotations +from crawler_core.qcwy.sign import Job51Sign # noqa: F401 -import hmac -import hashlib -import time -import random -from urllib.parse import quote - - -SIGN_KEY = "abfc8f9dcf8c3f3d8aa294ac5f2cf2cc7767e5592590f39c3f503271dd68562b" - - -class Job51Sign: - def __init__(self, *, sign_key: str = SIGN_KEY): - self.sign_key = sign_key - - @staticmethod - def generate_uuid() -> str: - ts = str(int(time.time() * 1000)) - rand = str(random.randint(1000000000, 9999999999)) - return ts + rand - - def build_sign_path( - self, - endpoint: str, - method: str = "GET", - params: dict | None = None, - body: dict | None = None, - ) -> tuple[str, str]: - import json - - ts = int(time.time()) - path = f"/{endpoint}?api_key=51job×tamp={ts}" - - if method.upper() == "GET" and params: - query_parts = [] - for k, v in params.items(): - query_parts.append(f"{quote(str(k), safe='')}={quote(str(v), safe='')}") - if query_parts: - path += "&" + "&".join(query_parts) - - message = path - if method.upper() == "POST" and body is not None: - message += json.dumps(body, ensure_ascii=False, separators=(",", ":")) - - sign_hex = hmac.new( - self.sign_key.encode("utf-8"), - message.encode("utf-8"), - hashlib.sha256, - ).hexdigest() - - return path, sign_hex +__all__ = ["Job51Sign"] diff --git a/app/services/crawler/_zhilian_api.py b/app/services/crawler/_zhilian_api.py index 212bb4f..36293d5 100644 --- a/app/services/crawler/_zhilian_api.py +++ b/app/services/crawler/_zhilian_api.py @@ -12,7 +12,7 @@ from __future__ import annotations from typing import Any, Optional -from app.services.crawler._base import BaseFetcher, BaseSearcher +from crawler_core.base import BaseFetcher, BaseSearcher from app.services.crawler._zhilian_client import ZhilianClient, create_cgate_client, create_capi_client @@ -145,4 +145,4 @@ class SearchCompanyPositions(BaseSearcher): return params def _request(self, params: dict) -> tuple[int, Any]: - return self._http.get(self.ENDPOINT, params) + return self.http_client.get(self.ENDPOINT, params) diff --git a/app/services/crawler/_zhilian_client.py b/app/services/crawler/_zhilian_client.py index 4cb6296..eb69733 100644 --- a/app/services/crawler/_zhilian_client.py +++ b/app/services/crawler/_zhilian_client.py @@ -12,8 +12,8 @@ from __future__ import annotations from typing import Any, Optional -from app.services.crawler._http_client import HTTPClient -from app.services.crawler._zhilian_sign import ZhilianSign +from crawler_core.http_client import HTTPClient +from crawler_core.zhilian.sign import ZhilianSign CGATE_BASE_URL = "https://cgate.zhaopin.com" CAPI_BASE_URL = "https://capi.zhaopin.com" diff --git a/app/services/crawler/_zhilian_sign.py b/app/services/crawler/_zhilian_sign.py index f249df5..d8b36cb 100644 --- a/app/services/crawler/_zhilian_sign.py +++ b/app/services/crawler/_zhilian_sign.py @@ -4,60 +4,12 @@ # 将在下一里程碑中删除。 # """ -智联招聘签名算法 -复制自 spiderJobs/platforms/zhilian/sign.py — import 改为本地引用 +智联招聘签名 — 向后兼容桩 + +已迁移至 crawler_core.zhilian.sign。 +直接从 crawler_core 重新导出,避免下游代码出现 ImportError。 """ -from __future__ import annotations +from crawler_core.zhilian.sign import ZhilianSign # noqa: F401 -import math -import random -from typing import Optional - - -class ZhilianSign: - def __init__( - self, *, at: str = "", rt: str = "", - device_id: Optional[str] = None, version: str = "4.1.259", - channel: str = "wxxiaochengxu", platform: str = "12", - ): - self.at = at - self.rt = rt - self.device_id = device_id or self.generate_uuid() - self.version = version - self.channel = channel - self.platform = platform - - @staticmethod - def generate_uuid() -> str: - chars = "0123456789ABCDEF" - uuid = [""] * 36 - for i in range(36): - uuid[i] = chars[math.floor(16 * random.random())] - uuid[14] = "4" - uuid[19] = chars[(int(uuid[19], 16) & 0x3) | 0x8] - uuid[8] = uuid[13] = uuid[18] = uuid[23] = "-" - return "".join(uuid) - - def sign_headers(self, page_code: str = "0") -> dict: - return { - "x-zp-at": self.at, - "x-zp-rt": self.rt, - "x-zp-action-id": self.generate_uuid(), - "x-zp-page-code": page_code, - "x-zp-version": self.version, - "x-zp-channel": self.channel, - "x-zp-platform": self.platform, - "x-zp-device-id": self.device_id, - "x-zp-business-system": "73", - } - - def sign_params(self) -> dict: - return { - "at": self.at, - "rt": self.rt, - "channel": self.channel, - "platform": self.platform, - "version": self.version, - "d": self.device_id, - } +__all__ = ["ZhilianSign"] diff --git a/app/services/crawler/boss.py b/app/services/crawler/boss.py index 1b14c50..07cc742 100644 --- a/app/services/crawler/boss.py +++ b/app/services/crawler/boss.py @@ -9,14 +9,14 @@ from typing import Any, Dict, List, Optional from loguru import logger -from spiderJobs.platforms.boss.api import ( +from app.services.crawler._boss_api import ( GetBrandDetail, GetJobDetail, SearchBrandJobs, SearchRecJobs, ) -from spiderJobs.platforms.boss.client import BossClient, create_client -from spiderJobs.platforms.boss.sign import BossSign +from app.services.crawler._boss_client import BossClient, create_client +from app.services.crawler._boss_sign import BossSign class BossService: diff --git a/app/services/crawler/qcwy.py b/app/services/crawler/qcwy.py index 4d7fc25..fda1177 100644 --- a/app/services/crawler/qcwy.py +++ b/app/services/crawler/qcwy.py @@ -9,13 +9,13 @@ from typing import Any, Dict, List, Optional from loguru import logger -from spiderJobs.platforms.job51.api import ( +from app.services.crawler._job51_api import ( GetCompanyInfo, GetJobDetail, SearchCompanyJobs, SearchRecommendJobs, ) -from spiderJobs.platforms.job51.client import Job51Client, create_client +from app.services.crawler._job51_client import Job51Client, create_client class QcwyService: diff --git a/app/services/crawler/zhilian.py b/app/services/crawler/zhilian.py index 1229ae7..50381f1 100644 --- a/app/services/crawler/zhilian.py +++ b/app/services/crawler/zhilian.py @@ -9,18 +9,18 @@ from typing import Any, Dict, List, Optional from loguru import logger -from spiderJobs.platforms.zhilian.api import ( +from app.services.crawler._zhilian_api import ( GetCompanyDetail, GetPositionDetail, SearchCompanyPositions, SearchPositions, ) -from spiderJobs.platforms.zhilian.client import ( +from app.services.crawler._zhilian_client import ( ZhilianClient, create_capi_client, create_cgate_client, ) -from spiderJobs.platforms.zhilian.sign import ZhilianSign +from app.services.crawler._zhilian_sign import ZhilianSign class ZhilianService: