fix(04): correct architecture — private files use crawler_core directly
Architecture clarification from user: spiderJobs/ is standalone execution,
NOT meant to be imported by app/. Correct dependency graph:
crawler_core ← shared base library
↑ ↑
spiderJobs app/services/crawler/
(standalone) (FastAPI backend, private layer)
Changes:
- boss.py/qcwy.py/zhilian.py: revert import back to private _boss_api etc.
- _boss/job51/zhilian_api.py: use crawler_core.base.Result/BaseFetcher/BaseSearcher
+ fix self._http → self.http_client
- _boss/job51/zhilian_client.py: use crawler_core.http_client.HTTPClient
+ _boss_client uses crawler_core.boss.sign.BossSign directly
- _boss/job51/zhilian_sign.py: backward-compat stubs → crawler_core.*.sign
Full regression: 106 passed in 0.68s
This commit is contained in:
parent
3aadbd128b
commit
2b94f15b56
@ -13,7 +13,7 @@ from __future__ import annotations
|
||||
from typing import Any, Optional
|
||||
from urllib.parse import urlencode
|
||||
|
||||
from app.services.crawler._base import ApiResult, BaseFetcher, BaseSearcher
|
||||
from crawler_core.base import BaseFetcher, BaseSearcher, Result as ApiResult
|
||||
from app.services.crawler._boss_client import BossClient, create_client
|
||||
|
||||
|
||||
@ -78,7 +78,7 @@ class SearchRecJobs(BaseSearcher):
|
||||
}
|
||||
|
||||
def _request(self, params: dict) -> tuple[int, Any]:
|
||||
return self._http.get(self.ENDPOINT, params)
|
||||
return self.http_client.get(self.ENDPOINT, params)
|
||||
|
||||
def _parse(self, http_code: int, raw: Any) -> ApiResult:
|
||||
return _parse_boss_response(http_code, raw)
|
||||
@ -113,7 +113,7 @@ class GetJobDetail(BaseFetcher):
|
||||
{"path": "/wapi/zpgeek/miniapp/jobdetail/improvement/query.json", "method": "GET", "query": improvement_query},
|
||||
]
|
||||
try:
|
||||
client: BossClient = self._http
|
||||
client: BossClient = self.http_client
|
||||
http_code, data = client.batch(sub_reqs)
|
||||
except Exception as e:
|
||||
return ApiResult(success=False, status_code=-1, error=str(e))
|
||||
@ -176,7 +176,7 @@ class SearchBrandJobs(BaseSearcher):
|
||||
}
|
||||
|
||||
def _request(self, params: dict) -> tuple[int, Any]:
|
||||
return self._http.get(self.ENDPOINT, params)
|
||||
return self.http_client.get(self.ENDPOINT, params)
|
||||
|
||||
def _parse(self, http_code: int, raw: Any) -> ApiResult:
|
||||
return _parse_boss_response(http_code, raw)
|
||||
|
||||
@ -12,8 +12,8 @@ from __future__ import annotations
|
||||
|
||||
from typing import Any, Optional
|
||||
|
||||
from app.services.crawler._http_client import HTTPClient
|
||||
from app.services.crawler._boss_sign import BossSign
|
||||
from crawler_core.http_client import HTTPClient
|
||||
from crawler_core.boss.sign import BossSign
|
||||
|
||||
BASE_URL = "https://www.zhipin.com"
|
||||
|
||||
|
||||
@ -4,75 +4,12 @@
|
||||
# 将在下一里程碑中删除。
|
||||
#
|
||||
"""
|
||||
Boss直聘 Traceid 生成算法
|
||||
复制自 spiderJobs/platforms/boss/sign.py — import 改为本地引用
|
||||
Boss直聘 Traceid 生成算法 — 向后兼容桩
|
||||
|
||||
已迁移至 crawler_core.boss.sign。
|
||||
直接从 crawler_core 重新导出,避免下游代码出现 ImportError。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
from crawler_core.boss.sign import BossSign # noqa: F401
|
||||
|
||||
import random
|
||||
import time
|
||||
|
||||
|
||||
_CHARS = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
|
||||
|
||||
def _to_u32(n: int) -> int:
|
||||
return n & 0xFFFFFFFF
|
||||
|
||||
|
||||
def _compute_checksum(uuid_str: str) -> str:
|
||||
r = 0
|
||||
for ch in uuid_str:
|
||||
r = ((r << 5) - r + ord(ch)) & 0xFFFFFFFF
|
||||
|
||||
a = 0
|
||||
for i in range(len(uuid_str) - 1, -1, -1):
|
||||
a = ((a << 7) - a + ord(uuid_str[i]) * (i + 1)) & 0xFFFFFFFF
|
||||
|
||||
n = 0
|
||||
mid = len(uuid_str) // 2
|
||||
for i in range(len(uuid_str)):
|
||||
n = ((n << 3) - n + ord(uuid_str[i]) * (abs(i - mid) + 1)) & 0xFFFFFFFF
|
||||
|
||||
s = _to_u32(r ^ a)
|
||||
s = _to_u32(2654435761 * s)
|
||||
s = _to_u32(s ^ (s >> 16))
|
||||
s = _to_u32(2246822507 * s)
|
||||
s = _to_u32(s ^ (s >> 13))
|
||||
c1 = _CHARS[s % 62]
|
||||
|
||||
h = _to_u32(a ^ n)
|
||||
h = _to_u32(3266489909 * h)
|
||||
h = _to_u32(h ^ (h >> 16))
|
||||
h = _to_u32(2654435761 * h)
|
||||
h = _to_u32(h ^ (h >> 13))
|
||||
c2 = _CHARS[h % 62]
|
||||
|
||||
v = _to_u32(n ^ r)
|
||||
v = _to_u32(668265261 * v)
|
||||
v = _to_u32(v ^ (v >> 16))
|
||||
v = _to_u32(2246822507 * v)
|
||||
v = _to_u32(v ^ (v >> 13))
|
||||
c3 = _CHARS[v % 62]
|
||||
|
||||
return f"{c1}{c2}{c3}"
|
||||
|
||||
|
||||
def _generate_uuid() -> str:
|
||||
hex_ts = format(int(time.time() * 1000), "x").lower()
|
||||
hex_ts = hex_ts[-13:].zfill(13)
|
||||
rand_part = "".join(random.choice(_CHARS) for _ in range(6))
|
||||
return hex_ts + rand_part
|
||||
|
||||
|
||||
class BossSign:
|
||||
def __init__(self, *, mpt: str = "", wt2: str = ""):
|
||||
self.mpt = mpt
|
||||
self.wt2 = wt2
|
||||
|
||||
@staticmethod
|
||||
def generate_traceid(prefix: str = "M-W") -> str:
|
||||
uuid_str = _generate_uuid()
|
||||
checksum = _compute_checksum(uuid_str)
|
||||
return f"{prefix}{uuid_str}{checksum}"
|
||||
__all__ = ["BossSign"]
|
||||
|
||||
@ -12,7 +12,7 @@ from __future__ import annotations
|
||||
|
||||
from typing import Any, Optional
|
||||
|
||||
from app.services.crawler._base import ApiResult, BaseFetcher, BaseSearcher
|
||||
from crawler_core.base import BaseFetcher, BaseSearcher, Result as ApiResult
|
||||
from app.services.crawler._job51_client import Job51Client, create_client
|
||||
|
||||
|
||||
@ -105,7 +105,7 @@ class GetJobDetail(BaseFetcher):
|
||||
def fetch(self) -> ApiResult:
|
||||
endpoint = f"{self.ENDPOINT}/{self.job_id}"
|
||||
try:
|
||||
http_code, data = self._http.get(endpoint)
|
||||
http_code, data = self.http_client.get(endpoint)
|
||||
except Exception as e:
|
||||
return ApiResult(success=False, status_code=-1, error=str(e))
|
||||
return self._parse(http_code, data)
|
||||
@ -135,7 +135,7 @@ class GetCompanyInfo(BaseFetcher):
|
||||
|
||||
def fetch(self) -> ApiResult:
|
||||
try:
|
||||
http_code, data = self._http.get(self.ENDPOINT, self._build_params())
|
||||
http_code, data = self.http_client.get(self.ENDPOINT, self._build_params())
|
||||
except Exception as e:
|
||||
return ApiResult(success=False, status_code=-1, error=str(e))
|
||||
return self._parse(http_code, data)
|
||||
|
||||
@ -14,8 +14,8 @@ import json
|
||||
from typing import Any, Optional
|
||||
from urllib.parse import quote
|
||||
|
||||
from app.services.crawler._http_client import HTTPClient
|
||||
from app.services.crawler._job51_sign import Job51Sign
|
||||
from crawler_core.http_client import HTTPClient
|
||||
from crawler_core.qcwy.sign import Job51Sign
|
||||
|
||||
BASE_URL = "https://cupid.51job.com"
|
||||
|
||||
|
||||
@ -4,59 +4,12 @@
|
||||
# 将在下一里程碑中删除。
|
||||
#
|
||||
"""
|
||||
前程无忧 (51Job) 签名算法
|
||||
复制自 spiderJobs/platforms/job51/sign.py — import 改为本地引用
|
||||
前程无忧 (51Job) 签名 — 向后兼容桩
|
||||
|
||||
已迁移至 crawler_core.qcwy.sign。
|
||||
直接从 crawler_core 重新导出,避免下游代码出现 ImportError。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
from crawler_core.qcwy.sign import Job51Sign # noqa: F401
|
||||
|
||||
import hmac
|
||||
import hashlib
|
||||
import time
|
||||
import random
|
||||
from urllib.parse import quote
|
||||
|
||||
|
||||
SIGN_KEY = "abfc8f9dcf8c3f3d8aa294ac5f2cf2cc7767e5592590f39c3f503271dd68562b"
|
||||
|
||||
|
||||
class Job51Sign:
|
||||
def __init__(self, *, sign_key: str = SIGN_KEY):
|
||||
self.sign_key = sign_key
|
||||
|
||||
@staticmethod
|
||||
def generate_uuid() -> str:
|
||||
ts = str(int(time.time() * 1000))
|
||||
rand = str(random.randint(1000000000, 9999999999))
|
||||
return ts + rand
|
||||
|
||||
def build_sign_path(
|
||||
self,
|
||||
endpoint: str,
|
||||
method: str = "GET",
|
||||
params: dict | None = None,
|
||||
body: dict | None = None,
|
||||
) -> tuple[str, str]:
|
||||
import json
|
||||
|
||||
ts = int(time.time())
|
||||
path = f"/{endpoint}?api_key=51job×tamp={ts}"
|
||||
|
||||
if method.upper() == "GET" and params:
|
||||
query_parts = []
|
||||
for k, v in params.items():
|
||||
query_parts.append(f"{quote(str(k), safe='')}={quote(str(v), safe='')}")
|
||||
if query_parts:
|
||||
path += "&" + "&".join(query_parts)
|
||||
|
||||
message = path
|
||||
if method.upper() == "POST" and body is not None:
|
||||
message += json.dumps(body, ensure_ascii=False, separators=(",", ":"))
|
||||
|
||||
sign_hex = hmac.new(
|
||||
self.sign_key.encode("utf-8"),
|
||||
message.encode("utf-8"),
|
||||
hashlib.sha256,
|
||||
).hexdigest()
|
||||
|
||||
return path, sign_hex
|
||||
__all__ = ["Job51Sign"]
|
||||
|
||||
@ -12,7 +12,7 @@ from __future__ import annotations
|
||||
|
||||
from typing import Any, Optional
|
||||
|
||||
from app.services.crawler._base import BaseFetcher, BaseSearcher
|
||||
from crawler_core.base import BaseFetcher, BaseSearcher
|
||||
from app.services.crawler._zhilian_client import ZhilianClient, create_cgate_client, create_capi_client
|
||||
|
||||
|
||||
@ -145,4 +145,4 @@ class SearchCompanyPositions(BaseSearcher):
|
||||
return params
|
||||
|
||||
def _request(self, params: dict) -> tuple[int, Any]:
|
||||
return self._http.get(self.ENDPOINT, params)
|
||||
return self.http_client.get(self.ENDPOINT, params)
|
||||
|
||||
@ -12,8 +12,8 @@ from __future__ import annotations
|
||||
|
||||
from typing import Any, Optional
|
||||
|
||||
from app.services.crawler._http_client import HTTPClient
|
||||
from app.services.crawler._zhilian_sign import ZhilianSign
|
||||
from crawler_core.http_client import HTTPClient
|
||||
from crawler_core.zhilian.sign import ZhilianSign
|
||||
|
||||
CGATE_BASE_URL = "https://cgate.zhaopin.com"
|
||||
CAPI_BASE_URL = "https://capi.zhaopin.com"
|
||||
|
||||
@ -4,60 +4,12 @@
|
||||
# 将在下一里程碑中删除。
|
||||
#
|
||||
"""
|
||||
智联招聘签名算法
|
||||
复制自 spiderJobs/platforms/zhilian/sign.py — import 改为本地引用
|
||||
智联招聘签名 — 向后兼容桩
|
||||
|
||||
已迁移至 crawler_core.zhilian.sign。
|
||||
直接从 crawler_core 重新导出,避免下游代码出现 ImportError。
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
from crawler_core.zhilian.sign import ZhilianSign # noqa: F401
|
||||
|
||||
import math
|
||||
import random
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class ZhilianSign:
|
||||
def __init__(
|
||||
self, *, at: str = "", rt: str = "",
|
||||
device_id: Optional[str] = None, version: str = "4.1.259",
|
||||
channel: str = "wxxiaochengxu", platform: str = "12",
|
||||
):
|
||||
self.at = at
|
||||
self.rt = rt
|
||||
self.device_id = device_id or self.generate_uuid()
|
||||
self.version = version
|
||||
self.channel = channel
|
||||
self.platform = platform
|
||||
|
||||
@staticmethod
|
||||
def generate_uuid() -> str:
|
||||
chars = "0123456789ABCDEF"
|
||||
uuid = [""] * 36
|
||||
for i in range(36):
|
||||
uuid[i] = chars[math.floor(16 * random.random())]
|
||||
uuid[14] = "4"
|
||||
uuid[19] = chars[(int(uuid[19], 16) & 0x3) | 0x8]
|
||||
uuid[8] = uuid[13] = uuid[18] = uuid[23] = "-"
|
||||
return "".join(uuid)
|
||||
|
||||
def sign_headers(self, page_code: str = "0") -> dict:
|
||||
return {
|
||||
"x-zp-at": self.at,
|
||||
"x-zp-rt": self.rt,
|
||||
"x-zp-action-id": self.generate_uuid(),
|
||||
"x-zp-page-code": page_code,
|
||||
"x-zp-version": self.version,
|
||||
"x-zp-channel": self.channel,
|
||||
"x-zp-platform": self.platform,
|
||||
"x-zp-device-id": self.device_id,
|
||||
"x-zp-business-system": "73",
|
||||
}
|
||||
|
||||
def sign_params(self) -> dict:
|
||||
return {
|
||||
"at": self.at,
|
||||
"rt": self.rt,
|
||||
"channel": self.channel,
|
||||
"platform": self.platform,
|
||||
"version": self.version,
|
||||
"d": self.device_id,
|
||||
}
|
||||
__all__ = ["ZhilianSign"]
|
||||
|
||||
@ -9,14 +9,14 @@ from typing import Any, Dict, List, Optional
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from spiderJobs.platforms.boss.api import (
|
||||
from app.services.crawler._boss_api import (
|
||||
GetBrandDetail,
|
||||
GetJobDetail,
|
||||
SearchBrandJobs,
|
||||
SearchRecJobs,
|
||||
)
|
||||
from spiderJobs.platforms.boss.client import BossClient, create_client
|
||||
from spiderJobs.platforms.boss.sign import BossSign
|
||||
from app.services.crawler._boss_client import BossClient, create_client
|
||||
from app.services.crawler._boss_sign import BossSign
|
||||
|
||||
|
||||
class BossService:
|
||||
|
||||
@ -9,13 +9,13 @@ from typing import Any, Dict, List, Optional
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from spiderJobs.platforms.job51.api import (
|
||||
from app.services.crawler._job51_api import (
|
||||
GetCompanyInfo,
|
||||
GetJobDetail,
|
||||
SearchCompanyJobs,
|
||||
SearchRecommendJobs,
|
||||
)
|
||||
from spiderJobs.platforms.job51.client import Job51Client, create_client
|
||||
from app.services.crawler._job51_client import Job51Client, create_client
|
||||
|
||||
|
||||
class QcwyService:
|
||||
|
||||
@ -9,18 +9,18 @@ from typing import Any, Dict, List, Optional
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from spiderJobs.platforms.zhilian.api import (
|
||||
from app.services.crawler._zhilian_api import (
|
||||
GetCompanyDetail,
|
||||
GetPositionDetail,
|
||||
SearchCompanyPositions,
|
||||
SearchPositions,
|
||||
)
|
||||
from spiderJobs.platforms.zhilian.client import (
|
||||
from app.services.crawler._zhilian_client import (
|
||||
ZhilianClient,
|
||||
create_capi_client,
|
||||
create_cgate_client,
|
||||
)
|
||||
from spiderJobs.platforms.zhilian.sign import ZhilianSign
|
||||
from app.services.crawler._zhilian_sign import ZhilianSign
|
||||
|
||||
|
||||
class ZhilianService:
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user