feat(01-02): port sign algorithms to crawler_core/ platform directories
- Add crawler_core/boss/sign.py: BossSign traceid generator (pure stdlib) - Add crawler_core/qcwy/sign.py: Job51Sign HMAC-SHA256 signing (pure stdlib) - Add crawler_core/zhilian/sign.py: ZhilianSign header/param signing (pure stdlib) - Add __init__.py for all three crawler_core platform directories - Updated module docstrings to reference crawler_core; all logic unchanged - No imports from spiderJobs or app; no HTTP dependencies
This commit is contained in:
parent
4932177f7c
commit
bd1e50e410
107
crawler_core/boss/sign.py
Normal file
107
crawler_core/boss/sign.py
Normal file
@ -0,0 +1,107 @@
|
||||
"""
|
||||
Boss直聘 Traceid 生成算法 (crawler_core)
|
||||
从 miniprogram_npm/trace-id/index.js 翻译而来
|
||||
|
||||
Traceid 格式: {prefix}{hex_timestamp_13}{random_6}{checksum_3}
|
||||
示例: M-W0019d0a8af5f32gtVvnD4M
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import random
|
||||
import time
|
||||
|
||||
|
||||
# base62 字符集(与 JS 端一致)
|
||||
_CHARS = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||||
|
||||
|
||||
def _to_u32(n: int) -> int:
|
||||
"""模拟 JS 的 >>> 0(无符号 32 位截断)"""
|
||||
return n & 0xFFFFFFFF
|
||||
|
||||
|
||||
def _compute_checksum(uuid_str: str) -> str:
|
||||
"""
|
||||
对 19 字符的 UUID 计算 3 位校验码
|
||||
|
||||
与 JS 端 computeChecksum() 完全一致:
|
||||
- 正向哈希 r: (r << 5) - r + charCode
|
||||
- 反向哈希 a: (a << 7) - a + charCode * (index + 1)
|
||||
- 中间扩展哈希 n: (n << 3) - n + charCode * (|index - mid| + 1)
|
||||
- 三组混合后取 base62 下标
|
||||
"""
|
||||
r = 0
|
||||
for ch in uuid_str:
|
||||
r = ((r << 5) - r + ord(ch)) & 0xFFFFFFFF
|
||||
# JS 的 r &= r 等价于保持 32 位(已在上面做了)
|
||||
|
||||
a = 0
|
||||
for i in range(len(uuid_str) - 1, -1, -1):
|
||||
a = ((a << 7) - a + ord(uuid_str[i]) * (i + 1)) & 0xFFFFFFFF
|
||||
|
||||
n = 0
|
||||
mid = len(uuid_str) // 2
|
||||
for i in range(len(uuid_str)):
|
||||
n = ((n << 3) - n + ord(uuid_str[i]) * (abs(i - mid) + 1)) & 0xFFFFFFFF
|
||||
|
||||
# ── 第 1 个校验字符 ──
|
||||
s = _to_u32(r ^ a)
|
||||
# JS: Math.abs(s) — 但 s 是 u32 所以已经 ≥0
|
||||
# 然后: s = (2654435761 * s >>> 0) ^ (s >>> 16) >>> 0
|
||||
# s = (2246822507 * s >>> 0) ^ (s >>> 13) >>> 0
|
||||
s = _to_u32(2654435761 * s)
|
||||
s = _to_u32(s ^ (s >> 16))
|
||||
s = _to_u32(2246822507 * s)
|
||||
s = _to_u32(s ^ (s >> 13))
|
||||
c1 = _CHARS[s % 62]
|
||||
|
||||
# ── 第 2 个校验字符 ──
|
||||
h = _to_u32(a ^ n)
|
||||
h = _to_u32(3266489909 * h)
|
||||
h = _to_u32(h ^ (h >> 16))
|
||||
h = _to_u32(2654435761 * h)
|
||||
h = _to_u32(h ^ (h >> 13))
|
||||
c2 = _CHARS[h % 62]
|
||||
|
||||
# ── 第 3 个校验字符 ──
|
||||
v = _to_u32(n ^ r)
|
||||
v = _to_u32(668265261 * v)
|
||||
v = _to_u32(v ^ (v >> 16))
|
||||
v = _to_u32(2246822507 * v)
|
||||
v = _to_u32(v ^ (v >> 13))
|
||||
c3 = _CHARS[v % 62]
|
||||
|
||||
return f"{c1}{c2}{c3}"
|
||||
|
||||
|
||||
def _generate_uuid() -> str:
|
||||
"""
|
||||
生成 19 字符 UUID: 13位 hex 时间戳 + 6位 base62 随机字符
|
||||
"""
|
||||
hex_ts = format(int(time.time() * 1000), "x").lower()
|
||||
hex_ts = hex_ts[-13:].zfill(13)
|
||||
rand_part = "".join(random.choice(_CHARS) for _ in range(6))
|
||||
return hex_ts + rand_part
|
||||
|
||||
|
||||
class BossSign:
|
||||
"""
|
||||
Boss直聘请求签名
|
||||
|
||||
功能: 生成 Traceid 头
|
||||
参数说明:
|
||||
mpt: 登录凭证(无登录时为空字符串)
|
||||
wt2: 登录凭证(无登录时为空字符串)
|
||||
"""
|
||||
|
||||
def __init__(self, *, mpt: str = "", wt2: str = ""):
|
||||
self.mpt = mpt
|
||||
self.wt2 = wt2
|
||||
|
||||
@staticmethod
|
||||
def generate_traceid(prefix: str = "M-W") -> str:
|
||||
"""生成 Traceid,格式: {prefix}{uuid}{checksum}"""
|
||||
uuid_str = _generate_uuid()
|
||||
checksum = _compute_checksum(uuid_str)
|
||||
return f"{prefix}{uuid_str}{checksum}"
|
||||
88
crawler_core/qcwy/sign.py
Normal file
88
crawler_core/qcwy/sign.py
Normal file
@ -0,0 +1,88 @@
|
||||
"""
|
||||
前程无忧 (51Job) 签名算法 (crawler_core)
|
||||
从小程序源码 utils/cupid.js + server/request/index.js 翻译而来
|
||||
|
||||
签名逻辑:
|
||||
1. 构造 path = /{endpoint}?api_key=51job×tamp={ts}
|
||||
2. GET: message = path + ¶m1=val1¶m2=val2
|
||||
POST: message = path + JSON.stringify(body)
|
||||
3. sign = HMAC-SHA256(message, SIGN_KEY).hex()
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hmac
|
||||
import hashlib
|
||||
import time
|
||||
import random
|
||||
from urllib.parse import quote
|
||||
|
||||
|
||||
# 与小程序硬编码一致(server/config/index.js)
|
||||
SIGN_KEY = "abfc8f9dcf8c3f3d8aa294ac5f2cf2cc7767e5592590f39c3f503271dd68562b"
|
||||
|
||||
|
||||
class Job51Sign:
|
||||
"""
|
||||
前程无忧请求签名
|
||||
|
||||
功能: 根据请求路径、方法、参数生成 sign 和带签名的 URL path
|
||||
参数说明:
|
||||
sign_key: HMAC 密钥(默认使用小程序内置 key)
|
||||
"""
|
||||
|
||||
def __init__(self, *, sign_key: str = SIGN_KEY):
|
||||
self.sign_key = sign_key
|
||||
|
||||
@staticmethod
|
||||
def generate_uuid() -> str:
|
||||
"""生成 UUID(13位时间戳 + 随机数),用作 distinct_id / uuid"""
|
||||
ts = str(int(time.time() * 1000))
|
||||
rand = str(random.randint(1000000000, 9999999999))
|
||||
return ts + rand
|
||||
|
||||
def build_sign_path(
|
||||
self,
|
||||
endpoint: str,
|
||||
method: str = "GET",
|
||||
params: dict | None = None,
|
||||
body: dict | None = None,
|
||||
) -> tuple[str, str]:
|
||||
"""
|
||||
构建签名路径和 sign 值
|
||||
|
||||
Args:
|
||||
endpoint: API 路径(不含域名,如 open/noauth/jobs/detail/base/170651439)
|
||||
method: HTTP 方法(GET / POST)
|
||||
params: GET 请求的额外 query 参数
|
||||
body: POST 请求的 JSON body
|
||||
|
||||
Returns:
|
||||
(url_path, sign_hex)
|
||||
url_path: 完整的 URL path(含 api_key 和 timestamp)
|
||||
sign_hex: HMAC-SHA256 签名
|
||||
"""
|
||||
import json
|
||||
|
||||
ts = int(time.time())
|
||||
path = f"/{endpoint}?api_key=51job×tamp={ts}"
|
||||
|
||||
if method.upper() == "GET" and params:
|
||||
query_parts = []
|
||||
for k, v in params.items():
|
||||
query_parts.append(f"{quote(str(k), safe='')}={quote(str(v), safe='')}")
|
||||
if query_parts:
|
||||
path += "&" + "&".join(query_parts)
|
||||
|
||||
# 签名字符串
|
||||
message = path
|
||||
if method.upper() == "POST" and body is not None:
|
||||
message += json.dumps(body, ensure_ascii=False, separators=(",", ":"))
|
||||
|
||||
sign_hex = hmac.new(
|
||||
self.sign_key.encode("utf-8"),
|
||||
message.encode("utf-8"),
|
||||
hashlib.sha256,
|
||||
).hexdigest()
|
||||
|
||||
return path, sign_hex
|
||||
86
crawler_core/zhilian/sign.py
Normal file
86
crawler_core/zhilian/sign.py
Normal file
@ -0,0 +1,86 @@
|
||||
"""
|
||||
智联招聘签名算法 (crawler_core)
|
||||
职责:参数构造 + 签名算法,不涉及 HTTP 请求
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import math
|
||||
import random
|
||||
from typing import Optional
|
||||
|
||||
|
||||
class ZhilianSign:
|
||||
"""
|
||||
智联招聘请求签名
|
||||
|
||||
功能 1: 生成请求所需的签名参数(device_id, action_id, at, rt 等)
|
||||
功能 2: 根据接口类型(cgate / capi)构造对应的签名头或签名参数
|
||||
|
||||
参数说明:
|
||||
at: Access Token(登录后获得,未登录为空)
|
||||
rt: Refresh Token(登录后获得,未登录为空)
|
||||
device_id: 设备 ID(自动生成 UUID,也可手动指定)
|
||||
version: 小程序版本号
|
||||
channel: 渠道标识
|
||||
platform: 平台 ID(12 = 微信小程序)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
*,
|
||||
at: str = "",
|
||||
rt: str = "",
|
||||
device_id: Optional[str] = None,
|
||||
version: str = "4.1.259",
|
||||
channel: str = "wxxiaochengxu",
|
||||
platform: str = "12",
|
||||
):
|
||||
self.at = at
|
||||
self.rt = rt
|
||||
self.device_id = device_id or self.generate_uuid()
|
||||
self.version = version
|
||||
self.channel = channel
|
||||
self.platform = platform
|
||||
|
||||
# ── 算法: UUID 生成(与小程序一致)────────
|
||||
|
||||
@staticmethod
|
||||
def generate_uuid() -> str:
|
||||
chars = "0123456789ABCDEF"
|
||||
uuid = [""] * 36
|
||||
for i in range(36):
|
||||
uuid[i] = chars[math.floor(16 * random.random())]
|
||||
uuid[14] = "4"
|
||||
uuid[19] = chars[(int(uuid[19], 16) & 0x3) | 0x8]
|
||||
uuid[8] = uuid[13] = uuid[18] = uuid[23] = "-"
|
||||
return "".join(uuid)
|
||||
|
||||
# ── cgate 签名头 ─────────────────────────
|
||||
|
||||
def sign_headers(self, page_code: str = "0") -> dict:
|
||||
"""构造 cgate 接口的签名请求头"""
|
||||
return {
|
||||
"x-zp-at": self.at,
|
||||
"x-zp-rt": self.rt,
|
||||
"x-zp-action-id": self.generate_uuid(),
|
||||
"x-zp-page-code": page_code,
|
||||
"x-zp-version": self.version,
|
||||
"x-zp-channel": self.channel,
|
||||
"x-zp-platform": self.platform,
|
||||
"x-zp-device-id": self.device_id,
|
||||
"x-zp-business-system": "73",
|
||||
}
|
||||
|
||||
# ── capi 签名参数 ────────────────────────
|
||||
|
||||
def sign_params(self) -> dict:
|
||||
"""构造 capi 接口的签名查询参数"""
|
||||
return {
|
||||
"at": self.at,
|
||||
"rt": self.rt,
|
||||
"channel": self.channel,
|
||||
"platform": self.platform,
|
||||
"version": self.version,
|
||||
"d": self.device_id,
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user