win bd1e50e410 feat(01-02): port sign algorithms to crawler_core/ platform directories
- Add crawler_core/boss/sign.py: BossSign traceid generator (pure stdlib)
- Add crawler_core/qcwy/sign.py: Job51Sign HMAC-SHA256 signing (pure stdlib)
- Add crawler_core/zhilian/sign.py: ZhilianSign header/param signing (pure stdlib)
- Add __init__.py for all three crawler_core platform directories
- Updated module docstrings to reference crawler_core; all logic unchanged
- No imports from spiderJobs or app; no HTTP dependencies
2026-03-21 18:08:53 +08:00

108 lines
3.0 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Boss直聘 Traceid 生成算法 (crawler_core)
从 miniprogram_npm/trace-id/index.js 翻译而来
Traceid 格式: {prefix}{hex_timestamp_13}{random_6}{checksum_3}
示例: M-W0019d0a8af5f32gtVvnD4M
"""
from __future__ import annotations
import random
import time
# base62 字符集(与 JS 端一致)
_CHARS = "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
def _to_u32(n: int) -> int:
"""模拟 JS 的 >>> 0无符号 32 位截断)"""
return n & 0xFFFFFFFF
def _compute_checksum(uuid_str: str) -> str:
"""
对 19 字符的 UUID 计算 3 位校验码
与 JS 端 computeChecksum() 完全一致:
- 正向哈希 r: (r << 5) - r + charCode
- 反向哈希 a: (a << 7) - a + charCode * (index + 1)
- 中间扩展哈希 n: (n << 3) - n + charCode * (|index - mid| + 1)
- 三组混合后取 base62 下标
"""
r = 0
for ch in uuid_str:
r = ((r << 5) - r + ord(ch)) & 0xFFFFFFFF
# JS 的 r &= r 等价于保持 32 位(已在上面做了)
a = 0
for i in range(len(uuid_str) - 1, -1, -1):
a = ((a << 7) - a + ord(uuid_str[i]) * (i + 1)) & 0xFFFFFFFF
n = 0
mid = len(uuid_str) // 2
for i in range(len(uuid_str)):
n = ((n << 3) - n + ord(uuid_str[i]) * (abs(i - mid) + 1)) & 0xFFFFFFFF
# ── 第 1 个校验字符 ──
s = _to_u32(r ^ a)
# JS: Math.abs(s) — 但 s 是 u32 所以已经 ≥0
# 然后: s = (2654435761 * s >>> 0) ^ (s >>> 16) >>> 0
# s = (2246822507 * s >>> 0) ^ (s >>> 13) >>> 0
s = _to_u32(2654435761 * s)
s = _to_u32(s ^ (s >> 16))
s = _to_u32(2246822507 * s)
s = _to_u32(s ^ (s >> 13))
c1 = _CHARS[s % 62]
# ── 第 2 个校验字符 ──
h = _to_u32(a ^ n)
h = _to_u32(3266489909 * h)
h = _to_u32(h ^ (h >> 16))
h = _to_u32(2654435761 * h)
h = _to_u32(h ^ (h >> 13))
c2 = _CHARS[h % 62]
# ── 第 3 个校验字符 ──
v = _to_u32(n ^ r)
v = _to_u32(668265261 * v)
v = _to_u32(v ^ (v >> 16))
v = _to_u32(2246822507 * v)
v = _to_u32(v ^ (v >> 13))
c3 = _CHARS[v % 62]
return f"{c1}{c2}{c3}"
def _generate_uuid() -> str:
"""
生成 19 字符 UUID: 13位 hex 时间戳 + 6位 base62 随机字符
"""
hex_ts = format(int(time.time() * 1000), "x").lower()
hex_ts = hex_ts[-13:].zfill(13)
rand_part = "".join(random.choice(_CHARS) for _ in range(6))
return hex_ts + rand_part
class BossSign:
"""
Boss直聘请求签名
功能: 生成 Traceid 头
参数说明:
mpt: 登录凭证(无登录时为空字符串)
wt2: 登录凭证(无登录时为空字符串)
"""
def __init__(self, *, mpt: str = "", wt2: str = ""):
self.mpt = mpt
self.wt2 = wt2
@staticmethod
def generate_traceid(prefix: str = "M-W") -> str:
"""生成 Traceid格式: {prefix}{uuid}{checksum}"""
uuid_str = _generate_uuid()
checksum = _compute_checksum(uuid_str)
return f"{prefix}{uuid_str}{checksum}"