win 8c2c2d29d7 feat(03): migrate job51+zhilian to crawler_core (ARCH-04/05)
job51 (spiderJobs/platforms/job51/):
- client.py: HTTPClient+Job51Sign from crawler_core
- api.py: ApiResult→Result, self._http→self.http_client, _request() POST overrides
- main.py: BaseFetcher/BaseSearcher from crawler_core
- sign.py: backward-compatible stub re-exporting crawler_core.qcwy.sign.Job51Sign

zhilian (spiderJobs/platforms/zhilian/):
- client.py: HTTPClient+ZhilianSign from crawler_core
- api.py: add _parse_zhilian_response (HTTP 200=success), add _parse()/_request()
  to all classes (GET fetchers + POST searcher overrides)
- main.py: BaseFetcher/BaseSearcher from crawler_core
- sign.py: backward-compatible stub re-exporting crawler_core.zhilian.sign.ZhilianSign

tests: 34 new mock tests (17 job51 + 17 zhilian)
Full regression: 98 passed (job51:17 + zhilian:17 + boss:22 + crawler_core:41 + 1)
2026-03-21 19:18:22 +08:00

307 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
前程无忧 (51Job) - 所有 API 接口
每个类只负责参数构建HTTP 和算法由 client / core 层处理
响应格式适配:
51job 使用 status/data 或直接返回数据
status=1 或 HTTP 200 表示成功
"""
from __future__ import annotations
from typing import Any, Optional
from crawler_core.base import BaseFetcher, BaseSearcher, Result
from spiderJobs.platforms.job51.client import Job51Client, create_client
# ─────────────────────────────────────────────
# 51job 响应解析(覆写默认算法)
# ─────────────────────────────────────────────
def _parse_job51_response(http_code: int, raw: Any) -> Result:
"""
51job 专用响应解析
51job 响应格式cupid 接口):
{"status": 1, "message": "成功", "resultbody": {...}}
status=1 或 "1" 表示成功resultbody 为实际业务数据
"""
if http_code != 200:
return Result(
success=False,
status_code=http_code,
error=f"HTTP 请求失败: {http_code}",
)
if not isinstance(raw, dict):
return Result(success=False, status_code=http_code, error="响应格式异常")
# 检查业务状态码status 可能是 int 1 或 str "1"
biz_status = raw.get("status")
if biz_status is not None and str(biz_status) != "1":
return Result(
success=False,
status_code=int(biz_status) if str(biz_status).isdigit() else -1,
error=raw.get("message") or f"业务错误: {biz_status}",
)
payload = raw.get("resultbody") or raw.get("data") or {}
# 列表型响应:推荐职位 resultbody.jobList.items[]
if isinstance(payload, dict) and "jobList" in payload:
job_list_wrap = payload.get("jobList", {})
if isinstance(job_list_wrap, dict) and "items" in job_list_wrap:
items = job_list_wrap.get("items", [])
return Result(
success=True, status_code=200, data=payload,
list=items,
count=len(items),
is_end_page=len(items) == 0,
)
# jobList 本身就是列表
if isinstance(job_list_wrap, list):
return Result(
success=True, status_code=200, data=payload,
list=job_list_wrap,
count=len(job_list_wrap),
is_end_page=len(job_list_wrap) == 0,
)
# 列表型响应:公司职位 resultbody.items[]
if isinstance(payload, dict) and "items" in payload:
items = payload.get("items", [])
total = payload.get("totalCount", len(items))
return Result(
success=True, status_code=200, data=payload,
list=items,
count=total,
is_end_page=len(items) == 0,
)
# 列表型响应:通用 list 字段
if isinstance(payload, dict) and "list" in payload:
items = payload.get("list", [])
return Result(
success=True, status_code=200, data=payload,
list=items,
count=len(items),
is_end_page=len(items) == 0,
)
return Result(success=True, status_code=200, data=payload)
# ─────────────────────────────────────────────
# 1. 首页推荐职位搜索POST
# ─────────────────────────────────────────────
class SearchRecommendJobs(BaseSearcher):
"""
首页推荐/搜索职位列表(无需登录)
api = SearchRecommendJobs(job_area="020000", function_type="A0N7")
result = api.search()
all_jobs = api.load_all(max_pages=5)
"""
ENDPOINT = "open/noauth/recommend/job-tab-dynamic-wx-mini"
def __init__(
self,
*,
job_area: str = "020000",
function_type: str = "",
job_type: str = "recommend",
page_size: int = 10,
client: Optional[Job51Client] = None,
):
super().__init__(page_size=page_size, http_client=client or create_client())
self.job_area = job_area
self.function_type = function_type
self.job_type = job_type
def _build_params(self, page_index: int) -> dict:
body = {
"pageNo": page_index,
"pageSize": self.page_size,
"specialPageCode": True,
"isTouristMode": True,
"type": self.job_type,
"jobArea": self.job_area,
"personAsLabel": "1",
}
if self.function_type:
body["functionType"] = self.function_type
return body
def _request(self, params: dict):
"""51job 推荐搜索使用 POST"""
return self.http_client.post(self.ENDPOINT, params)
def _parse(self, http_code: int, raw: Any) -> Result:
return _parse_job51_response(http_code, raw)
# ─────────────────────────────────────────────
# 2. 职位详情GET
# ─────────────────────────────────────────────
class GetJobDetail(BaseFetcher):
"""
职位详情(无需登录)
detail = GetJobDetail(job_id="170651439").fetch()
"""
ENDPOINT = "open/noauth/jobs/detail/base"
def __init__(self, *, job_id: str, client: Optional[Job51Client] = None):
super().__init__(http_client=client or create_client())
self.job_id = job_id
def _build_params(self) -> dict:
return {}
def fetch(self) -> Result:
"""覆写 fetch将 job_id 拼入路径"""
endpoint = f"{self.ENDPOINT}/{self.job_id}"
try:
http_code, data = self.http_client.get(endpoint)
except Exception as e:
return Result(success=False, status_code=-1, error=str(e))
return self._parse(http_code, data)
def _parse(self, http_code: int, raw: Any) -> Result:
return _parse_job51_response(http_code, raw)
# ─────────────────────────────────────────────
# 3. 公司详情GET
# ─────────────────────────────────────────────
class GetCompanyInfo(BaseFetcher):
"""
公司详细信息(无需登录)
detail = GetCompanyInfo(company_id="9825088").fetch()
"""
ENDPOINT = "open/noauth/company-info/info-data"
def __init__(
self,
*,
company_id: str,
color_one: str = "#ffffff",
color_two: str = "#ffffffcc",
client: Optional[Job51Client] = None,
):
super().__init__(http_client=client or create_client())
self.company_id = company_id
self.color_one = color_one
self.color_two = color_two
def _build_params(self) -> dict:
return {
"companyId": self.company_id,
"colorOne": self.color_one,
"colorTwo": self.color_two,
}
def fetch(self) -> Result:
"""覆写 fetch传入 query 参数"""
try:
http_code, data = self.http_client.get(self.ENDPOINT, self._build_params())
except Exception as e:
return Result(success=False, status_code=-1, error=str(e))
return self._parse(http_code, data)
def _parse(self, http_code: int, raw: Any) -> Result:
return _parse_job51_response(http_code, raw)
# ─────────────────────────────────────────────
# 4. 公司职位列表POST
# ─────────────────────────────────────────────
class SearchCompanyJobs(BaseSearcher):
"""
公司招聘职位列表(无需登录)
api = SearchCompanyJobs(company_id="9825088")
result = api.search()
all_jobs = api.load_all(max_pages=3)
"""
ENDPOINT = "open/noauth/jobs/company"
def __init__(
self,
*,
company_id: str,
job_area: str = "",
function: str = "",
salary_type: str = "",
page_size: int = 10,
client: Optional[Job51Client] = None,
):
super().__init__(page_size=page_size, http_client=client or create_client())
self.company_id = company_id
self.job_area = job_area
self.function = function
self.salary_type = salary_type
def _build_params(self, page_index: int) -> dict:
return {
"pageNum": page_index,
"pageSize": self.page_size,
"coId": self.company_id,
"jobArea": self.job_area,
"function": self.function,
"salaryType": self.salary_type,
"scene": 14,
"requestId": "",
}
def _request(self, params: dict):
"""51job 公司搜索使用 POST"""
return self.http_client.post(self.ENDPOINT, params)
def _parse(self, http_code: int, raw: Any) -> Result:
return _parse_job51_response(http_code, raw)
# ─────────────────────────────────────────────
# 使用示例
# ─────────────────────────────────────────────
if __name__ == "__main__":
import json
print("=== 1. 首页推荐职位 ===")
r = SearchRecommendJobs(job_area="020000").search()
print(f"成功: {r.success}, 本页 {len(r.list)} 条, is_end_page: {r.is_end_page}")
if r.list:
print(f"第一条: {json.dumps(r.list[0], ensure_ascii=False, indent=2)[:300]}...")
print("\n=== 2. 公司详情 ===")
r = GetCompanyInfo(company_id="9825088").fetch()
print(f"成功: {r.success}")
if r.data:
print(f"数据: {json.dumps(r.data, ensure_ascii=False, indent=2)[:300]}...")
print("\n=== 3. 公司职位列表 ===")
r = SearchCompanyJobs(company_id="9825088").search()
print(f"成功: {r.success}, 本页 {len(r.list)}")
# 职位详情:从搜索结果中获取 jobId
print("\n=== 4. 职位详情 ===")
search_r = SearchRecommendJobs(job_area="020000").search()
if search_r.list:
first_job = search_r.list[0]
job_id = str(first_job.get("jobId", "") or first_job.get("id", ""))
if job_id:
r = GetJobDetail(job_id=job_id).fetch()
print(f"成功: {r.success}")
if r.data:
print(f"数据: {json.dumps(r.data, ensure_ascii=False, indent=2)[:300]}...")
else:
print("搜索结果中未找到 jobId 字段")
else:
print("搜索结果为空,跳过")