win bd1e50e410 feat(01-02): port sign algorithms to crawler_core/ platform directories
- Add crawler_core/boss/sign.py: BossSign traceid generator (pure stdlib)
- Add crawler_core/qcwy/sign.py: Job51Sign HMAC-SHA256 signing (pure stdlib)
- Add crawler_core/zhilian/sign.py: ZhilianSign header/param signing (pure stdlib)
- Add __init__.py for all three crawler_core platform directories
- Updated module docstrings to reference crawler_core; all logic unchanged
- No imports from spiderJobs or app; no HTTP dependencies
2026-03-21 18:08:53 +08:00

89 lines
2.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
前程无忧 (51Job) 签名算法 (crawler_core)
从小程序源码 utils/cupid.js + server/request/index.js 翻译而来
签名逻辑:
1. 构造 path = /{endpoint}?api_key=51job&timestamp={ts}
2. GET: message = path + &param1=val1&param2=val2
POST: message = path + JSON.stringify(body)
3. sign = HMAC-SHA256(message, SIGN_KEY).hex()
"""
from __future__ import annotations
import hmac
import hashlib
import time
import random
from urllib.parse import quote
# 与小程序硬编码一致server/config/index.js
SIGN_KEY = "abfc8f9dcf8c3f3d8aa294ac5f2cf2cc7767e5592590f39c3f503271dd68562b"
class Job51Sign:
"""
前程无忧请求签名
功能: 根据请求路径、方法、参数生成 sign 和带签名的 URL path
参数说明:
sign_key: HMAC 密钥(默认使用小程序内置 key
"""
def __init__(self, *, sign_key: str = SIGN_KEY):
self.sign_key = sign_key
@staticmethod
def generate_uuid() -> str:
"""生成 UUID13位时间戳 + 随机数),用作 distinct_id / uuid"""
ts = str(int(time.time() * 1000))
rand = str(random.randint(1000000000, 9999999999))
return ts + rand
def build_sign_path(
self,
endpoint: str,
method: str = "GET",
params: dict | None = None,
body: dict | None = None,
) -> tuple[str, str]:
"""
构建签名路径和 sign 值
Args:
endpoint: API 路径(不含域名,如 open/noauth/jobs/detail/base/170651439
method: HTTP 方法GET / POST
params: GET 请求的额外 query 参数
body: POST 请求的 JSON body
Returns:
(url_path, sign_hex)
url_path: 完整的 URL path含 api_key 和 timestamp
sign_hex: HMAC-SHA256 签名
"""
import json
ts = int(time.time())
path = f"/{endpoint}?api_key=51job&timestamp={ts}"
if method.upper() == "GET" and params:
query_parts = []
for k, v in params.items():
query_parts.append(f"{quote(str(k), safe='')}={quote(str(v), safe='')}")
if query_parts:
path += "&" + "&".join(query_parts)
# 签名字符串
message = path
if method.upper() == "POST" and body is not None:
message += json.dumps(body, ensure_ascii=False, separators=(",", ":"))
sign_hex = hmac.new(
self.sign_key.encode("utf-8"),
message.encode("utf-8"),
hashlib.sha256,
).hexdigest()
return path, sign_hex