- Add crawler_core/boss/sign.py: BossSign traceid generator (pure stdlib) - Add crawler_core/qcwy/sign.py: Job51Sign HMAC-SHA256 signing (pure stdlib) - Add crawler_core/zhilian/sign.py: ZhilianSign header/param signing (pure stdlib) - Add __init__.py for all three crawler_core platform directories - Updated module docstrings to reference crawler_core; all logic unchanged - No imports from spiderJobs or app; no HTTP dependencies
108 lines
3.0 KiB
Python
108 lines
3.0 KiB
Python
"""
|
||
Boss直聘 Traceid 生成算法 (crawler_core)
|
||
从 miniprogram_npm/trace-id/index.js 翻译而来
|
||
|
||
Traceid 格式: {prefix}{hex_timestamp_13}{random_6}{checksum_3}
|
||
示例: M-W0019d0a8af5f32gtVvnD4M
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
import random
|
||
import time
|
||
|
||
|
||
# base62 字符集(与 JS 端一致)
|
||
_CHARS = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
|
||
|
||
|
||
def _to_u32(n: int) -> int:
|
||
"""模拟 JS 的 >>> 0(无符号 32 位截断)"""
|
||
return n & 0xFFFFFFFF
|
||
|
||
|
||
def _compute_checksum(uuid_str: str) -> str:
|
||
"""
|
||
对 19 字符的 UUID 计算 3 位校验码
|
||
|
||
与 JS 端 computeChecksum() 完全一致:
|
||
- 正向哈希 r: (r << 5) - r + charCode
|
||
- 反向哈希 a: (a << 7) - a + charCode * (index + 1)
|
||
- 中间扩展哈希 n: (n << 3) - n + charCode * (|index - mid| + 1)
|
||
- 三组混合后取 base62 下标
|
||
"""
|
||
r = 0
|
||
for ch in uuid_str:
|
||
r = ((r << 5) - r + ord(ch)) & 0xFFFFFFFF
|
||
# JS 的 r &= r 等价于保持 32 位(已在上面做了)
|
||
|
||
a = 0
|
||
for i in range(len(uuid_str) - 1, -1, -1):
|
||
a = ((a << 7) - a + ord(uuid_str[i]) * (i + 1)) & 0xFFFFFFFF
|
||
|
||
n = 0
|
||
mid = len(uuid_str) // 2
|
||
for i in range(len(uuid_str)):
|
||
n = ((n << 3) - n + ord(uuid_str[i]) * (abs(i - mid) + 1)) & 0xFFFFFFFF
|
||
|
||
# ── 第 1 个校验字符 ──
|
||
s = _to_u32(r ^ a)
|
||
# JS: Math.abs(s) — 但 s 是 u32 所以已经 ≥0
|
||
# 然后: s = (2654435761 * s >>> 0) ^ (s >>> 16) >>> 0
|
||
# s = (2246822507 * s >>> 0) ^ (s >>> 13) >>> 0
|
||
s = _to_u32(2654435761 * s)
|
||
s = _to_u32(s ^ (s >> 16))
|
||
s = _to_u32(2246822507 * s)
|
||
s = _to_u32(s ^ (s >> 13))
|
||
c1 = _CHARS[s % 62]
|
||
|
||
# ── 第 2 个校验字符 ──
|
||
h = _to_u32(a ^ n)
|
||
h = _to_u32(3266489909 * h)
|
||
h = _to_u32(h ^ (h >> 16))
|
||
h = _to_u32(2654435761 * h)
|
||
h = _to_u32(h ^ (h >> 13))
|
||
c2 = _CHARS[h % 62]
|
||
|
||
# ── 第 3 个校验字符 ──
|
||
v = _to_u32(n ^ r)
|
||
v = _to_u32(668265261 * v)
|
||
v = _to_u32(v ^ (v >> 16))
|
||
v = _to_u32(2246822507 * v)
|
||
v = _to_u32(v ^ (v >> 13))
|
||
c3 = _CHARS[v % 62]
|
||
|
||
return f"{c1}{c2}{c3}"
|
||
|
||
|
||
def _generate_uuid() -> str:
|
||
"""
|
||
生成 19 字符 UUID: 13位 hex 时间戳 + 6位 base62 随机字符
|
||
"""
|
||
hex_ts = format(int(time.time() * 1000), "x").lower()
|
||
hex_ts = hex_ts[-13:].zfill(13)
|
||
rand_part = "".join(random.choice(_CHARS) for _ in range(6))
|
||
return hex_ts + rand_part
|
||
|
||
|
||
class BossSign:
|
||
"""
|
||
Boss直聘请求签名
|
||
|
||
功能: 生成 Traceid 头
|
||
参数说明:
|
||
mpt: 登录凭证(无登录时为空字符串)
|
||
wt2: 登录凭证(无登录时为空字符串)
|
||
"""
|
||
|
||
def __init__(self, *, mpt: str = "", wt2: str = ""):
|
||
self.mpt = mpt
|
||
self.wt2 = wt2
|
||
|
||
@staticmethod
|
||
def generate_traceid(prefix: str = "M-W") -> str:
|
||
"""生成 Traceid,格式: {prefix}{uuid}{checksum}"""
|
||
uuid_str = _generate_uuid()
|
||
checksum = _compute_checksum(uuid_str)
|
||
return f"{prefix}{uuid_str}{checksum}"
|