win 8c2c2d29d7 feat(03): migrate job51+zhilian to crawler_core (ARCH-04/05)
job51 (spiderJobs/platforms/job51/):
- client.py: HTTPClient+Job51Sign from crawler_core
- api.py: ApiResult→Result, self._http→self.http_client, _request() POST overrides
- main.py: BaseFetcher/BaseSearcher from crawler_core
- sign.py: backward-compatible stub re-exporting crawler_core.qcwy.sign.Job51Sign

zhilian (spiderJobs/platforms/zhilian/):
- client.py: HTTPClient+ZhilianSign from crawler_core
- api.py: add _parse_zhilian_response (HTTP 200=success), add _parse()/_request()
  to all classes (GET fetchers + POST searcher overrides)
- main.py: BaseFetcher/BaseSearcher from crawler_core
- sign.py: backward-compatible stub re-exporting crawler_core.zhilian.sign.ZhilianSign

tests: 34 new mock tests (17 job51 + 17 zhilian)
Full regression: 98 passed (job51:17 + zhilian:17 + boss:22 + crawler_core:41 + 1)
2026-03-21 19:18:22 +08:00

282 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
智联招聘 - 所有 API 接口
每个类只负责参数构建HTTP 和算法由 client / core 层处理
"""
from __future__ import annotations
from typing import Any, Optional
from crawler_core.base import BaseFetcher, BaseSearcher, parse_response, Result
# ─────────────────────────────────────────────
# 智联响应解析(覆写默认算法)
# ─────────────────────────────────────────────
def _parse_zhilian_response(http_code: int, raw: Any) -> Result:
"""
智联专用响应解析
智联响应格式cgate / capi 接口):
{"data": {...}} 或 {"data": {"list": [...]}}
HTTP 200 且无 statusCode 字段时视为成功
"""
if http_code != 200:
return Result(success=False, status_code=http_code,
error=f"HTTP 请求失败: {http_code}")
if not isinstance(raw, dict):
return Result(success=False, status_code=http_code, error="响应格式异常")
payload = raw.get("data") or {}
# 列表型响应
if isinstance(payload, dict) and "list" in payload:
items = payload.get("list", [])
num_found = raw.get("pageInfo", {}).get("numFound", 0) or payload.get("numFound", len(items))
return Result(
success=True, status_code=200, data=payload,
list=items,
count=num_found,
is_end_page=len(items) == 0,
)
return Result(success=True, status_code=200, data=payload)
from spiderJobs.platforms.zhilian.client import ZhilianClient, create_cgate_client, create_capi_client
# ─────────────────────────────────────────────
# 1. 职位搜索POST cgate
# ─────────────────────────────────────────────
_SEARCH_BODY = {
"eventScenario": "wxmpZhaopinSearchV2",
"filterMinSalary": 1,
"S_SOU_EXPAND": "SOU_COMPANY_ID",
"sortType": "DEFAULT",
"resumeNumber": "",
"version": "8.11.22",
"identity": 0,
"anonymous": 1,
}
_FILTER_KEYS = [
"S_SOU_SALARY", "S_SOU_EDUCATION_LOWESTLEVEL", "S_SOU_REFRESH_DATE",
"S_SOU_WORK_EXPERIENCE", "S_SOU_POSITION_TYPE", "S_SOU_COMPANY_TYPE",
"S_SOU_COMPANY_SCALE", "welfareLabels", "S_SOU_JD_INDUSTRY_LEVEL",
]
class SearchPositions(BaseSearcher):
"""
职位搜索
api = SearchPositions(keyword="Python", city_code=538)
result = api.search()
all_jobs = api.load_all(max_pages=5)
"""
ENDPOINT = "/positionbusiness/searchrecommend/searchPositions"
def __init__(
self,
*,
keyword: str = "",
city_code: int | str = "",
collected_purpose: Optional[dict] = None,
filters: Optional[dict] = None,
page_size: int = 15,
client: Optional[ZhilianClient] = None,
):
super().__init__(page_size=page_size, http_client=client or create_cgate_client())
self.keyword = keyword
self.city_code = city_code
self.collected_purpose = collected_purpose
self.filters = filters or {}
def _build_params(self, page_index: int) -> dict:
body = {**_SEARCH_BODY, "pageIndex": page_index, "pageSize": self.page_size}
if self.collected_purpose:
body.update(self._purpose_params(self.collected_purpose, page_index))
if self.keyword and "S_SOU_JD_JOB_LEVEL3" not in body:
body["S_SOU_FULL_INDEX"] = self.keyword
if self.city_code and "S_SOU_WORK_CITY" not in body:
body["S_SOU_WORK_CITY"] = self.city_code
body.update({k: self.filters[k] for k in _FILTER_KEYS if self.filters.get(k)})
return body
def _request(self, params: dict):
"""智联职位搜索使用 POST 请求"""
return self.http_client.post(self.ENDPOINT, params)
def _parse(self, http_code: int, raw) -> "Result":
return _parse_zhilian_response(http_code, raw)
@staticmethod
def _purpose_params(purpose: dict, page_index: int) -> dict:
params: dict = {"pageIndex": page_index}
pnew = purpose.get("pnew_preferred_job_type", "")
name = purpose.get("job_type_name", "")
if pnew:
params["S_SOU_JD_JOB_LEVEL3"] = pnew
elif name:
params["S_SOU_FULL_INDEX"] = name
city = purpose.get("city_id", "") or purpose.get("preferred_location", "")
if city:
params["S_SOU_WORK_CITY"] = city
sal_min = purpose.get("preferred_salary_min", "")
sal_max = purpose.get("preferred_salary_max", "")
if sal_min not in ("", "-1") or sal_max != "":
params["S_SOU_SALARY"] = f"{sal_min},{sal_max}"
return params
# ─────────────────────────────────────────────
# 2. 职位详情GET cgate
# ─────────────────────────────────────────────
class GetPositionDetail(BaseFetcher):
"""
职位详情
detail = GetPositionDetail(number="CC462451910J40881838003").fetch()
"""
ENDPOINT = "/positionbusiness/position/getPositionModule"
def __init__(self, *, number: str, identity: int = 0, client: Optional[ZhilianClient] = None):
super().__init__(http_client=client or create_cgate_client())
self.number = number
self.identity = identity
def _build_params(self) -> dict:
return {"number": self.number, "identity": self.identity, "resumeNumber": ""}
def _parse(self, http_code: int, raw) -> "Result":
return _parse_zhilian_response(http_code, raw)
# ─────────────────────────────────────────────
# 3. 企查查工商信息GET cgate
# ─────────────────────────────────────────────
class GetCompanyExtDetail(BaseFetcher):
"""
企查查(工商)信息
detail = GetCompanyExtDetail(company_name="上海有大信息科技", company_number="CZ462451910").fetch()
"""
ENDPOINT = "/riskstorm/company/getCompanyExtDetail"
def __init__(self, *, company_name: str, company_number: str, client: Optional[ZhilianClient] = None):
super().__init__(http_client=client or create_cgate_client())
self.company_name = company_name
self.company_number = company_number
def _build_params(self) -> dict:
return {"companyName": self.company_name, "companyNumber": self.company_number}
def _parse(self, http_code: int, raw) -> "Result":
return _parse_zhilian_response(http_code, raw)
# ─────────────────────────────────────────────
# 4. 公司详细信息GET cgate
# ─────────────────────────────────────────────
class GetCompanyDetail(BaseFetcher):
"""
公司详细信息
detail = GetCompanyDetail(number="CZ462451910").fetch()
"""
ENDPOINT = "/positionbusiness/exposure/companyDetail"
def __init__(self, *, number: str, client: Optional[ZhilianClient] = None):
super().__init__(http_client=client or create_cgate_client())
self.number = number
def _build_params(self) -> dict:
return {"number": self.number}
def _parse(self, http_code: int, raw) -> "Result":
return _parse_zhilian_response(http_code, raw)
# ─────────────────────────────────────────────
# 5. 公司招聘职位列表GET capi
# ─────────────────────────────────────────────
class SearchCompanyPositions(BaseSearcher):
"""
公司招聘职位列表
api = SearchCompanyPositions(company_id="CZ462451910")
result = api.search()
all_jobs = api.load_all(max_pages=3)
"""
ENDPOINT = "/capi/searchrecommend/searchPositionsCompany"
def __init__(
self,
*,
company_id: str,
job_level: str = "",
city_code: str = "",
page_size: int = 30,
client: Optional[ZhilianClient] = None,
):
self._client = client or create_capi_client()
super().__init__(page_size=page_size, http_client=self._client)
self.company_id = company_id
self.job_level = job_level
self.city_code = city_code
def _build_params(self, page_index: int) -> dict:
params = {**self._client.signer.sign_params()}
params.update({
"S_SOU_COMPANY_ID": self.company_id,
"S_SOU_POSITION_SOURCE_TYPE": "1",
"eventScenario": "wxmpZhaopinSearchPositionsCompany",
"pageCode": "wxmpZhaopinCompanyDetailPage",
"pageIndex": page_index,
"pageSize": self.page_size,
})
if self.job_level:
params["S_SOU_JD_JOB_LEVEL"] = self.job_level
if self.city_code:
params["S_SOU_WORK_CITY"] = self.city_code
return params
def _request(self, params: dict) -> tuple[int, Any]:
return self.http_client.get(self.ENDPOINT, params)
def _parse(self, http_code: int, raw) -> "Result":
return _parse_zhilian_response(http_code, raw)
# ─────────────────────────────────────────────
# 使用示例
# ─────────────────────────────────────────────
if __name__ == "__main__":
import json
print("=== 1. 职位搜索 ===")
r = SearchPositions(keyword="Python", city_code=538).search()
print(f"{r.count} 条,本页 {len(r.list)}")
print("\n=== 2. 职位详情 ===")
r = GetPositionDetail(number="CC462451910J40881838003").fetch()
print(f"成功: {r.success}")
print("\n=== 3. 企查查信息 ===")
r = GetCompanyExtDetail(company_name="上海有大信息科技", company_number="CZ462451910").fetch()
print(f"成功: {r.success}")
print("\n=== 4. 公司详情 ===")
r = GetCompanyDetail(number="CZ462451910").fetch()
print(f"成功: {r.success}")
print("\n=== 5. 公司招聘列表 ===")
r = SearchCompanyPositions(company_id="CZ462451910").search()
print(f"{r.count} 个职位,本页 {len(r.list)}")