job51 (spiderJobs/platforms/job51/): - client.py: HTTPClient+Job51Sign from crawler_core - api.py: ApiResult→Result, self._http→self.http_client, _request() POST overrides - main.py: BaseFetcher/BaseSearcher from crawler_core - sign.py: backward-compatible stub re-exporting crawler_core.qcwy.sign.Job51Sign zhilian (spiderJobs/platforms/zhilian/): - client.py: HTTPClient+ZhilianSign from crawler_core - api.py: add _parse_zhilian_response (HTTP 200=success), add _parse()/_request() to all classes (GET fetchers + POST searcher overrides) - main.py: BaseFetcher/BaseSearcher from crawler_core - sign.py: backward-compatible stub re-exporting crawler_core.zhilian.sign.ZhilianSign tests: 34 new mock tests (17 job51 + 17 zhilian) Full regression: 98 passed (job51:17 + zhilian:17 + boss:22 + crawler_core:41 + 1)
307 lines
11 KiB
Python
307 lines
11 KiB
Python
"""
|
||
前程无忧 (51Job) - 所有 API 接口
|
||
每个类只负责参数构建,HTTP 和算法由 client / core 层处理
|
||
|
||
响应格式适配:
|
||
51job 使用 status/data 或直接返回数据
|
||
status=1 或 HTTP 200 表示成功
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from typing import Any, Optional
|
||
|
||
from crawler_core.base import BaseFetcher, BaseSearcher, Result
|
||
from spiderJobs.platforms.job51.client import Job51Client, create_client
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# 51job 响应解析(覆写默认算法)
|
||
# ─────────────────────────────────────────────
|
||
|
||
def _parse_job51_response(http_code: int, raw: Any) -> Result:
|
||
"""
|
||
51job 专用响应解析
|
||
|
||
51job 响应格式(cupid 接口):
|
||
{"status": 1, "message": "成功", "resultbody": {...}}
|
||
status=1 或 "1" 表示成功,resultbody 为实际业务数据
|
||
"""
|
||
if http_code != 200:
|
||
return Result(
|
||
success=False,
|
||
status_code=http_code,
|
||
error=f"HTTP 请求失败: {http_code}",
|
||
)
|
||
|
||
if not isinstance(raw, dict):
|
||
return Result(success=False, status_code=http_code, error="响应格式异常")
|
||
|
||
# 检查业务状态码(status 可能是 int 1 或 str "1")
|
||
biz_status = raw.get("status")
|
||
if biz_status is not None and str(biz_status) != "1":
|
||
return Result(
|
||
success=False,
|
||
status_code=int(biz_status) if str(biz_status).isdigit() else -1,
|
||
error=raw.get("message") or f"业务错误: {biz_status}",
|
||
)
|
||
|
||
payload = raw.get("resultbody") or raw.get("data") or {}
|
||
|
||
# 列表型响应:推荐职位 resultbody.jobList.items[]
|
||
if isinstance(payload, dict) and "jobList" in payload:
|
||
job_list_wrap = payload.get("jobList", {})
|
||
if isinstance(job_list_wrap, dict) and "items" in job_list_wrap:
|
||
items = job_list_wrap.get("items", [])
|
||
return Result(
|
||
success=True, status_code=200, data=payload,
|
||
list=items,
|
||
count=len(items),
|
||
is_end_page=len(items) == 0,
|
||
)
|
||
# jobList 本身就是列表
|
||
if isinstance(job_list_wrap, list):
|
||
return Result(
|
||
success=True, status_code=200, data=payload,
|
||
list=job_list_wrap,
|
||
count=len(job_list_wrap),
|
||
is_end_page=len(job_list_wrap) == 0,
|
||
)
|
||
|
||
# 列表型响应:公司职位 resultbody.items[]
|
||
if isinstance(payload, dict) and "items" in payload:
|
||
items = payload.get("items", [])
|
||
total = payload.get("totalCount", len(items))
|
||
return Result(
|
||
success=True, status_code=200, data=payload,
|
||
list=items,
|
||
count=total,
|
||
is_end_page=len(items) == 0,
|
||
)
|
||
|
||
# 列表型响应:通用 list 字段
|
||
if isinstance(payload, dict) and "list" in payload:
|
||
items = payload.get("list", [])
|
||
return Result(
|
||
success=True, status_code=200, data=payload,
|
||
list=items,
|
||
count=len(items),
|
||
is_end_page=len(items) == 0,
|
||
)
|
||
|
||
return Result(success=True, status_code=200, data=payload)
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# 1. 首页推荐职位搜索(POST)
|
||
# ─────────────────────────────────────────────
|
||
|
||
class SearchRecommendJobs(BaseSearcher):
|
||
"""
|
||
首页推荐/搜索职位列表(无需登录)
|
||
|
||
api = SearchRecommendJobs(job_area="020000", function_type="A0N7")
|
||
result = api.search()
|
||
all_jobs = api.load_all(max_pages=5)
|
||
"""
|
||
ENDPOINT = "open/noauth/recommend/job-tab-dynamic-wx-mini"
|
||
|
||
def __init__(
|
||
self,
|
||
*,
|
||
job_area: str = "020000",
|
||
function_type: str = "",
|
||
job_type: str = "recommend",
|
||
page_size: int = 10,
|
||
client: Optional[Job51Client] = None,
|
||
):
|
||
super().__init__(page_size=page_size, http_client=client or create_client())
|
||
self.job_area = job_area
|
||
self.function_type = function_type
|
||
self.job_type = job_type
|
||
|
||
def _build_params(self, page_index: int) -> dict:
|
||
body = {
|
||
"pageNo": page_index,
|
||
"pageSize": self.page_size,
|
||
"specialPageCode": True,
|
||
"isTouristMode": True,
|
||
"type": self.job_type,
|
||
"jobArea": self.job_area,
|
||
"personAsLabel": "1",
|
||
}
|
||
if self.function_type:
|
||
body["functionType"] = self.function_type
|
||
return body
|
||
def _request(self, params: dict):
|
||
"""51job 推荐搜索使用 POST"""
|
||
return self.http_client.post(self.ENDPOINT, params)
|
||
|
||
def _parse(self, http_code: int, raw: Any) -> Result:
|
||
return _parse_job51_response(http_code, raw)
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# 2. 职位详情(GET)
|
||
# ─────────────────────────────────────────────
|
||
|
||
class GetJobDetail(BaseFetcher):
|
||
"""
|
||
职位详情(无需登录)
|
||
|
||
detail = GetJobDetail(job_id="170651439").fetch()
|
||
"""
|
||
ENDPOINT = "open/noauth/jobs/detail/base"
|
||
|
||
def __init__(self, *, job_id: str, client: Optional[Job51Client] = None):
|
||
super().__init__(http_client=client or create_client())
|
||
self.job_id = job_id
|
||
|
||
def _build_params(self) -> dict:
|
||
return {}
|
||
|
||
def fetch(self) -> Result:
|
||
"""覆写 fetch,将 job_id 拼入路径"""
|
||
endpoint = f"{self.ENDPOINT}/{self.job_id}"
|
||
try:
|
||
http_code, data = self.http_client.get(endpoint)
|
||
except Exception as e:
|
||
return Result(success=False, status_code=-1, error=str(e))
|
||
return self._parse(http_code, data)
|
||
|
||
def _parse(self, http_code: int, raw: Any) -> Result:
|
||
return _parse_job51_response(http_code, raw)
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# 3. 公司详情(GET)
|
||
# ─────────────────────────────────────────────
|
||
|
||
class GetCompanyInfo(BaseFetcher):
|
||
"""
|
||
公司详细信息(无需登录)
|
||
|
||
detail = GetCompanyInfo(company_id="9825088").fetch()
|
||
"""
|
||
ENDPOINT = "open/noauth/company-info/info-data"
|
||
|
||
def __init__(
|
||
self,
|
||
*,
|
||
company_id: str,
|
||
color_one: str = "#ffffff",
|
||
color_two: str = "#ffffffcc",
|
||
client: Optional[Job51Client] = None,
|
||
):
|
||
super().__init__(http_client=client or create_client())
|
||
self.company_id = company_id
|
||
self.color_one = color_one
|
||
self.color_two = color_two
|
||
|
||
def _build_params(self) -> dict:
|
||
return {
|
||
"companyId": self.company_id,
|
||
"colorOne": self.color_one,
|
||
"colorTwo": self.color_two,
|
||
}
|
||
|
||
def fetch(self) -> Result:
|
||
"""覆写 fetch,传入 query 参数"""
|
||
try:
|
||
http_code, data = self.http_client.get(self.ENDPOINT, self._build_params())
|
||
except Exception as e:
|
||
return Result(success=False, status_code=-1, error=str(e))
|
||
return self._parse(http_code, data)
|
||
|
||
def _parse(self, http_code: int, raw: Any) -> Result:
|
||
return _parse_job51_response(http_code, raw)
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# 4. 公司职位列表(POST)
|
||
# ─────────────────────────────────────────────
|
||
|
||
class SearchCompanyJobs(BaseSearcher):
|
||
"""
|
||
公司招聘职位列表(无需登录)
|
||
|
||
api = SearchCompanyJobs(company_id="9825088")
|
||
result = api.search()
|
||
all_jobs = api.load_all(max_pages=3)
|
||
"""
|
||
ENDPOINT = "open/noauth/jobs/company"
|
||
|
||
def __init__(
|
||
self,
|
||
*,
|
||
company_id: str,
|
||
job_area: str = "",
|
||
function: str = "",
|
||
salary_type: str = "",
|
||
page_size: int = 10,
|
||
client: Optional[Job51Client] = None,
|
||
):
|
||
super().__init__(page_size=page_size, http_client=client or create_client())
|
||
self.company_id = company_id
|
||
self.job_area = job_area
|
||
self.function = function
|
||
self.salary_type = salary_type
|
||
|
||
def _build_params(self, page_index: int) -> dict:
|
||
return {
|
||
"pageNum": page_index,
|
||
"pageSize": self.page_size,
|
||
"coId": self.company_id,
|
||
"jobArea": self.job_area,
|
||
"function": self.function,
|
||
"salaryType": self.salary_type,
|
||
"scene": 14,
|
||
"requestId": "",
|
||
}
|
||
def _request(self, params: dict):
|
||
"""51job 公司搜索使用 POST"""
|
||
return self.http_client.post(self.ENDPOINT, params)
|
||
|
||
def _parse(self, http_code: int, raw: Any) -> Result:
|
||
return _parse_job51_response(http_code, raw)
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# 使用示例
|
||
# ─────────────────────────────────────────────
|
||
|
||
if __name__ == "__main__":
|
||
import json
|
||
|
||
print("=== 1. 首页推荐职位 ===")
|
||
r = SearchRecommendJobs(job_area="020000").search()
|
||
print(f"成功: {r.success}, 本页 {len(r.list)} 条, is_end_page: {r.is_end_page}")
|
||
if r.list:
|
||
print(f"第一条: {json.dumps(r.list[0], ensure_ascii=False, indent=2)[:300]}...")
|
||
|
||
print("\n=== 2. 公司详情 ===")
|
||
r = GetCompanyInfo(company_id="9825088").fetch()
|
||
print(f"成功: {r.success}")
|
||
if r.data:
|
||
print(f"数据: {json.dumps(r.data, ensure_ascii=False, indent=2)[:300]}...")
|
||
|
||
print("\n=== 3. 公司职位列表 ===")
|
||
r = SearchCompanyJobs(company_id="9825088").search()
|
||
print(f"成功: {r.success}, 本页 {len(r.list)} 条")
|
||
|
||
# 职位详情:从搜索结果中获取 jobId
|
||
print("\n=== 4. 职位详情 ===")
|
||
search_r = SearchRecommendJobs(job_area="020000").search()
|
||
if search_r.list:
|
||
first_job = search_r.list[0]
|
||
job_id = str(first_job.get("jobId", "") or first_job.get("id", ""))
|
||
if job_id:
|
||
r = GetJobDetail(job_id=job_id).fetch()
|
||
print(f"成功: {r.success}")
|
||
if r.data:
|
||
print(f"数据: {json.dumps(r.data, ensure_ascii=False, indent=2)[:300]}...")
|
||
else:
|
||
print("搜索结果中未找到 jobId 字段")
|
||
else:
|
||
print("搜索结果为空,跳过")
|