job51 (spiderJobs/platforms/job51/): - client.py: HTTPClient+Job51Sign from crawler_core - api.py: ApiResult→Result, self._http→self.http_client, _request() POST overrides - main.py: BaseFetcher/BaseSearcher from crawler_core - sign.py: backward-compatible stub re-exporting crawler_core.qcwy.sign.Job51Sign zhilian (spiderJobs/platforms/zhilian/): - client.py: HTTPClient+ZhilianSign from crawler_core - api.py: add _parse_zhilian_response (HTTP 200=success), add _parse()/_request() to all classes (GET fetchers + POST searcher overrides) - main.py: BaseFetcher/BaseSearcher from crawler_core - sign.py: backward-compatible stub re-exporting crawler_core.zhilian.sign.ZhilianSign tests: 34 new mock tests (17 job51 + 17 zhilian) Full regression: 98 passed (job51:17 + zhilian:17 + boss:22 + crawler_core:41 + 1)
282 lines
11 KiB
Python
282 lines
11 KiB
Python
"""
|
||
智联招聘 - 所有 API 接口
|
||
每个类只负责参数构建,HTTP 和算法由 client / core 层处理
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from typing import Any, Optional
|
||
|
||
from crawler_core.base import BaseFetcher, BaseSearcher, parse_response, Result
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# 智联响应解析(覆写默认算法)
|
||
# ─────────────────────────────────────────────
|
||
|
||
def _parse_zhilian_response(http_code: int, raw: Any) -> Result:
|
||
"""
|
||
智联专用响应解析
|
||
|
||
智联响应格式(cgate / capi 接口):
|
||
{"data": {...}} 或 {"data": {"list": [...]}}
|
||
HTTP 200 且无 statusCode 字段时视为成功
|
||
"""
|
||
if http_code != 200:
|
||
return Result(success=False, status_code=http_code,
|
||
error=f"HTTP 请求失败: {http_code}")
|
||
if not isinstance(raw, dict):
|
||
return Result(success=False, status_code=http_code, error="响应格式异常")
|
||
|
||
payload = raw.get("data") or {}
|
||
|
||
# 列表型响应
|
||
if isinstance(payload, dict) and "list" in payload:
|
||
items = payload.get("list", [])
|
||
num_found = raw.get("pageInfo", {}).get("numFound", 0) or payload.get("numFound", len(items))
|
||
return Result(
|
||
success=True, status_code=200, data=payload,
|
||
list=items,
|
||
count=num_found,
|
||
is_end_page=len(items) == 0,
|
||
)
|
||
|
||
return Result(success=True, status_code=200, data=payload)
|
||
from spiderJobs.platforms.zhilian.client import ZhilianClient, create_cgate_client, create_capi_client
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# 1. 职位搜索(POST cgate)
|
||
# ─────────────────────────────────────────────
|
||
|
||
_SEARCH_BODY = {
|
||
"eventScenario": "wxmpZhaopinSearchV2",
|
||
"filterMinSalary": 1,
|
||
"S_SOU_EXPAND": "SOU_COMPANY_ID",
|
||
"sortType": "DEFAULT",
|
||
"resumeNumber": "",
|
||
"version": "8.11.22",
|
||
"identity": 0,
|
||
"anonymous": 1,
|
||
}
|
||
|
||
_FILTER_KEYS = [
|
||
"S_SOU_SALARY", "S_SOU_EDUCATION_LOWESTLEVEL", "S_SOU_REFRESH_DATE",
|
||
"S_SOU_WORK_EXPERIENCE", "S_SOU_POSITION_TYPE", "S_SOU_COMPANY_TYPE",
|
||
"S_SOU_COMPANY_SCALE", "welfareLabels", "S_SOU_JD_INDUSTRY_LEVEL",
|
||
]
|
||
|
||
|
||
class SearchPositions(BaseSearcher):
|
||
"""
|
||
职位搜索
|
||
|
||
api = SearchPositions(keyword="Python", city_code=538)
|
||
result = api.search()
|
||
all_jobs = api.load_all(max_pages=5)
|
||
"""
|
||
ENDPOINT = "/positionbusiness/searchrecommend/searchPositions"
|
||
|
||
def __init__(
|
||
self,
|
||
*,
|
||
keyword: str = "",
|
||
city_code: int | str = "",
|
||
collected_purpose: Optional[dict] = None,
|
||
filters: Optional[dict] = None,
|
||
page_size: int = 15,
|
||
client: Optional[ZhilianClient] = None,
|
||
):
|
||
super().__init__(page_size=page_size, http_client=client or create_cgate_client())
|
||
self.keyword = keyword
|
||
self.city_code = city_code
|
||
self.collected_purpose = collected_purpose
|
||
self.filters = filters or {}
|
||
|
||
def _build_params(self, page_index: int) -> dict:
|
||
body = {**_SEARCH_BODY, "pageIndex": page_index, "pageSize": self.page_size}
|
||
if self.collected_purpose:
|
||
body.update(self._purpose_params(self.collected_purpose, page_index))
|
||
if self.keyword and "S_SOU_JD_JOB_LEVEL3" not in body:
|
||
body["S_SOU_FULL_INDEX"] = self.keyword
|
||
if self.city_code and "S_SOU_WORK_CITY" not in body:
|
||
body["S_SOU_WORK_CITY"] = self.city_code
|
||
body.update({k: self.filters[k] for k in _FILTER_KEYS if self.filters.get(k)})
|
||
return body
|
||
|
||
def _request(self, params: dict):
|
||
"""智联职位搜索使用 POST 请求"""
|
||
return self.http_client.post(self.ENDPOINT, params)
|
||
|
||
def _parse(self, http_code: int, raw) -> "Result":
|
||
return _parse_zhilian_response(http_code, raw)
|
||
|
||
@staticmethod
|
||
def _purpose_params(purpose: dict, page_index: int) -> dict:
|
||
params: dict = {"pageIndex": page_index}
|
||
pnew = purpose.get("pnew_preferred_job_type", "")
|
||
name = purpose.get("job_type_name", "")
|
||
if pnew:
|
||
params["S_SOU_JD_JOB_LEVEL3"] = pnew
|
||
elif name:
|
||
params["S_SOU_FULL_INDEX"] = name
|
||
city = purpose.get("city_id", "") or purpose.get("preferred_location", "")
|
||
if city:
|
||
params["S_SOU_WORK_CITY"] = city
|
||
sal_min = purpose.get("preferred_salary_min", "")
|
||
sal_max = purpose.get("preferred_salary_max", "")
|
||
if sal_min not in ("", "-1") or sal_max != "":
|
||
params["S_SOU_SALARY"] = f"{sal_min},{sal_max}"
|
||
return params
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# 2. 职位详情(GET cgate)
|
||
# ─────────────────────────────────────────────
|
||
|
||
class GetPositionDetail(BaseFetcher):
|
||
"""
|
||
职位详情
|
||
|
||
detail = GetPositionDetail(number="CC462451910J40881838003").fetch()
|
||
"""
|
||
ENDPOINT = "/positionbusiness/position/getPositionModule"
|
||
|
||
def __init__(self, *, number: str, identity: int = 0, client: Optional[ZhilianClient] = None):
|
||
super().__init__(http_client=client or create_cgate_client())
|
||
self.number = number
|
||
self.identity = identity
|
||
|
||
def _build_params(self) -> dict:
|
||
return {"number": self.number, "identity": self.identity, "resumeNumber": ""}
|
||
|
||
def _parse(self, http_code: int, raw) -> "Result":
|
||
return _parse_zhilian_response(http_code, raw)
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# 3. 企查查(工商)信息(GET cgate)
|
||
# ─────────────────────────────────────────────
|
||
|
||
class GetCompanyExtDetail(BaseFetcher):
|
||
"""
|
||
企查查(工商)信息
|
||
|
||
detail = GetCompanyExtDetail(company_name="上海有大信息科技", company_number="CZ462451910").fetch()
|
||
"""
|
||
ENDPOINT = "/riskstorm/company/getCompanyExtDetail"
|
||
|
||
def __init__(self, *, company_name: str, company_number: str, client: Optional[ZhilianClient] = None):
|
||
super().__init__(http_client=client or create_cgate_client())
|
||
self.company_name = company_name
|
||
self.company_number = company_number
|
||
|
||
def _build_params(self) -> dict:
|
||
return {"companyName": self.company_name, "companyNumber": self.company_number}
|
||
|
||
def _parse(self, http_code: int, raw) -> "Result":
|
||
return _parse_zhilian_response(http_code, raw)
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# 4. 公司详细信息(GET cgate)
|
||
# ─────────────────────────────────────────────
|
||
|
||
class GetCompanyDetail(BaseFetcher):
|
||
"""
|
||
公司详细信息
|
||
|
||
detail = GetCompanyDetail(number="CZ462451910").fetch()
|
||
"""
|
||
ENDPOINT = "/positionbusiness/exposure/companyDetail"
|
||
|
||
def __init__(self, *, number: str, client: Optional[ZhilianClient] = None):
|
||
super().__init__(http_client=client or create_cgate_client())
|
||
self.number = number
|
||
|
||
def _build_params(self) -> dict:
|
||
return {"number": self.number}
|
||
|
||
def _parse(self, http_code: int, raw) -> "Result":
|
||
return _parse_zhilian_response(http_code, raw)
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# 5. 公司招聘职位列表(GET capi)
|
||
# ─────────────────────────────────────────────
|
||
|
||
class SearchCompanyPositions(BaseSearcher):
|
||
"""
|
||
公司招聘职位列表
|
||
|
||
api = SearchCompanyPositions(company_id="CZ462451910")
|
||
result = api.search()
|
||
all_jobs = api.load_all(max_pages=3)
|
||
"""
|
||
ENDPOINT = "/capi/searchrecommend/searchPositionsCompany"
|
||
|
||
def __init__(
|
||
self,
|
||
*,
|
||
company_id: str,
|
||
job_level: str = "",
|
||
city_code: str = "",
|
||
page_size: int = 30,
|
||
client: Optional[ZhilianClient] = None,
|
||
):
|
||
self._client = client or create_capi_client()
|
||
super().__init__(page_size=page_size, http_client=self._client)
|
||
self.company_id = company_id
|
||
self.job_level = job_level
|
||
self.city_code = city_code
|
||
|
||
def _build_params(self, page_index: int) -> dict:
|
||
params = {**self._client.signer.sign_params()}
|
||
params.update({
|
||
"S_SOU_COMPANY_ID": self.company_id,
|
||
"S_SOU_POSITION_SOURCE_TYPE": "1",
|
||
"eventScenario": "wxmpZhaopinSearchPositionsCompany",
|
||
"pageCode": "wxmpZhaopinCompanyDetailPage",
|
||
"pageIndex": page_index,
|
||
"pageSize": self.page_size,
|
||
})
|
||
if self.job_level:
|
||
params["S_SOU_JD_JOB_LEVEL"] = self.job_level
|
||
if self.city_code:
|
||
params["S_SOU_WORK_CITY"] = self.city_code
|
||
return params
|
||
|
||
def _request(self, params: dict) -> tuple[int, Any]:
|
||
return self.http_client.get(self.ENDPOINT, params)
|
||
|
||
def _parse(self, http_code: int, raw) -> "Result":
|
||
return _parse_zhilian_response(http_code, raw)
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# 使用示例
|
||
# ─────────────────────────────────────────────
|
||
|
||
if __name__ == "__main__":
|
||
import json
|
||
|
||
print("=== 1. 职位搜索 ===")
|
||
r = SearchPositions(keyword="Python", city_code=538).search()
|
||
print(f"共 {r.count} 条,本页 {len(r.list)} 条")
|
||
|
||
print("\n=== 2. 职位详情 ===")
|
||
r = GetPositionDetail(number="CC462451910J40881838003").fetch()
|
||
print(f"成功: {r.success}")
|
||
|
||
print("\n=== 3. 企查查信息 ===")
|
||
r = GetCompanyExtDetail(company_name="上海有大信息科技", company_number="CZ462451910").fetch()
|
||
print(f"成功: {r.success}")
|
||
|
||
print("\n=== 4. 公司详情 ===")
|
||
r = GetCompanyDetail(number="CZ462451910").fetch()
|
||
print(f"成功: {r.success}")
|
||
|
||
print("\n=== 5. 公司招聘列表 ===")
|
||
r = SearchCompanyPositions(company_id="CZ462451910").search()
|
||
print(f"共 {r.count} 个职位,本页 {len(r.list)} 条")
|