Architecture clarification from user: spiderJobs/ is standalone execution,
NOT meant to be imported by app/. Correct dependency graph:
crawler_core ← shared base library
↑ ↑
spiderJobs app/services/crawler/
(standalone) (FastAPI backend, private layer)
Changes:
- boss.py/qcwy.py/zhilian.py: revert import back to private _boss_api etc.
- _boss/job51/zhilian_api.py: use crawler_core.base.Result/BaseFetcher/BaseSearcher
+ fix self._http → self.http_client
- _boss/job51/zhilian_client.py: use crawler_core.http_client.HTTPClient
+ _boss_client uses crawler_core.boss.sign.BossSign directly
- _boss/job51/zhilian_sign.py: backward-compat stubs → crawler_core.*.sign
Full regression: 106 passed in 0.68s
183 lines
6.9 KiB
Python
183 lines
6.9 KiB
Python
# ⚠️ DEPRECATED — 2026-03-21
|
|
# 此文件是内部手工复制文件,已废弃,不再由任何 facade 引用。
|
|
# 请改用 spiderJobs.platforms.* 或 crawler_core 中的对应模块。
|
|
# 将在下一里程碑中删除。
|
|
#
|
|
"""
|
|
Boss直聘 - 所有 API 接口
|
|
复制自 spiderJobs/platforms/boss/api.py — import 改为本地引用
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from typing import Any, Optional
|
|
from urllib.parse import urlencode
|
|
|
|
from crawler_core.base import BaseFetcher, BaseSearcher, Result as ApiResult
|
|
from app.services.crawler._boss_client import BossClient, create_client
|
|
|
|
|
|
def _parse_boss_response(http_code: int, raw: Any) -> ApiResult:
|
|
if http_code != 200:
|
|
return ApiResult(success=False, status_code=http_code, error=f"HTTP 请求失败: {http_code}")
|
|
|
|
if not isinstance(raw, dict):
|
|
return ApiResult(success=False, status_code=http_code, error="响应格式异常")
|
|
|
|
biz_code = raw.get("code", -1)
|
|
if biz_code != 0:
|
|
return ApiResult(
|
|
success=False, status_code=biz_code,
|
|
error=raw.get("message") or f"业务错误: {biz_code}",
|
|
)
|
|
|
|
payload = raw.get("zpData") or {}
|
|
|
|
if isinstance(payload, dict) and "jobList" in payload:
|
|
job_list = payload.get("jobList", [])
|
|
has_more = payload.get("hasMore", False)
|
|
return ApiResult(
|
|
success=True, status_code=200, data=payload,
|
|
list=job_list, count=len(job_list), is_end_page=not has_more,
|
|
)
|
|
|
|
if isinstance(payload, dict) and "list" in payload:
|
|
items = payload.get("list", [])
|
|
has_more = payload.get("hasMore", False)
|
|
return ApiResult(
|
|
success=True, status_code=200, data=payload,
|
|
list=items, count=len(items), is_end_page=not has_more,
|
|
)
|
|
|
|
return ApiResult(success=True, status_code=200, data=payload)
|
|
|
|
|
|
class SearchRecJobs(BaseSearcher):
|
|
ENDPOINT = "/wapi/zpgeek/miniapp/homepage/recjoblist.json"
|
|
|
|
def __init__(
|
|
self, *, city_code: str = "101280600", sort_type: int = 1,
|
|
district_code: str = "", blue_welfare: str = "",
|
|
encrypt_expect_id: str = "", page_size: int = 15,
|
|
client: Optional[BossClient] = None,
|
|
):
|
|
super().__init__(page_size=page_size, http_client=client or create_client())
|
|
self.city_code = city_code
|
|
self.sort_type = sort_type
|
|
self.district_code = district_code
|
|
self.blue_welfare = blue_welfare
|
|
self.encrypt_expect_id = encrypt_expect_id
|
|
|
|
def _build_params(self, page_index: int) -> dict:
|
|
return {
|
|
"cityCode": self.city_code, "sortType": self.sort_type,
|
|
"page": page_index, "pageSize": self.page_size,
|
|
"encryptExpectId": self.encrypt_expect_id,
|
|
"districtCode": self.district_code,
|
|
"blueWelfare": self.blue_welfare, "appId": 10002,
|
|
}
|
|
|
|
def _request(self, params: dict) -> tuple[int, Any]:
|
|
return self.http_client.get(self.ENDPOINT, params)
|
|
|
|
def _parse(self, http_code: int, raw: Any) -> ApiResult:
|
|
return _parse_boss_response(http_code, raw)
|
|
|
|
|
|
class GetJobDetail(BaseFetcher):
|
|
ENDPOINT = "/wapi/batch/requests"
|
|
|
|
def __init__(
|
|
self, *, security_id: str, job_id: str, lid: str = "",
|
|
source: int = 10, client: Optional[BossClient] = None,
|
|
):
|
|
super().__init__(http_client=client or create_client())
|
|
self.security_id = security_id
|
|
self.job_id = job_id
|
|
self.lid = lid
|
|
self.source = source
|
|
|
|
def _build_params(self) -> dict:
|
|
return {}
|
|
|
|
def fetch(self) -> ApiResult:
|
|
detail_query = urlencode({
|
|
"securityId": self.security_id, "jobId": self.job_id,
|
|
"lid": self.lid, "source": self.source,
|
|
})
|
|
improvement_query = urlencode({
|
|
"securityId": self.security_id, "jobId": self.job_id, "lid": self.lid,
|
|
})
|
|
sub_reqs = [
|
|
{"path": "/wapi/zpgeek/miniapp/job/detail.json", "method": "GET", "query": detail_query},
|
|
{"path": "/wapi/zpgeek/miniapp/jobdetail/improvement/query.json", "method": "GET", "query": improvement_query},
|
|
]
|
|
try:
|
|
client: BossClient = self.http_client
|
|
http_code, data = client.batch(sub_reqs)
|
|
except Exception as e:
|
|
return ApiResult(success=False, status_code=-1, error=str(e))
|
|
return self._parse(http_code, data)
|
|
|
|
def _parse(self, http_code: int, raw: Any) -> ApiResult:
|
|
if http_code != 200:
|
|
return ApiResult(success=False, status_code=http_code, error=f"HTTP 请求失败: {http_code}")
|
|
if not isinstance(raw, dict):
|
|
return ApiResult(success=False, status_code=http_code, error="响应格式异常")
|
|
biz_code = raw.get("code", -1)
|
|
if biz_code != 0:
|
|
return ApiResult(success=False, status_code=biz_code, error=raw.get("message") or f"业务错误: {biz_code}")
|
|
zp_data = raw.get("zpData") or {}
|
|
detail = zp_data.get("/wapi/zpgeek/miniapp/job/detail.json", {})
|
|
improvement = zp_data.get("/wapi/zpgeek/miniapp/jobdetail/improvement/query.json", {})
|
|
merged = {
|
|
"detail": detail.get("zpData") if isinstance(detail, dict) else detail,
|
|
"improvement": improvement.get("zpData") if isinstance(improvement, dict) else improvement,
|
|
}
|
|
return ApiResult(success=True, status_code=200, data=merged)
|
|
|
|
|
|
class GetBrandDetail(BaseFetcher):
|
|
ENDPOINT = "/wapi/zpgeek/miniapp/brand/detail.json"
|
|
|
|
def __init__(self, *, brand_id: str, client: Optional[BossClient] = None):
|
|
super().__init__(http_client=client or create_client())
|
|
self.brand_id = brand_id
|
|
|
|
def _build_params(self) -> dict:
|
|
return {"brandId": self.brand_id, "appId": 10002}
|
|
|
|
def _parse(self, http_code: int, raw: Any) -> ApiResult:
|
|
return _parse_boss_response(http_code, raw)
|
|
|
|
|
|
class SearchBrandJobs(BaseSearcher):
|
|
ENDPOINT = "/wapi/zpgeek/miniapp/brand/joblist.json"
|
|
|
|
def __init__(
|
|
self, *, brand_id: str, query: str = "", position_lv1: int = 0,
|
|
city: str = "", experience: str = "", salary: str = "",
|
|
page_size: int = 15, client: Optional[BossClient] = None,
|
|
):
|
|
super().__init__(page_size=page_size, http_client=client or create_client())
|
|
self.brand_id = brand_id
|
|
self.query = query
|
|
self.position_lv1 = position_lv1
|
|
self.city = city
|
|
self.experience = experience
|
|
self.salary = salary
|
|
|
|
def _build_params(self, page_index: int) -> dict:
|
|
return {
|
|
"brandId": self.brand_id, "query": self.query,
|
|
"page": page_index, "hasMore": "true",
|
|
"positionLv1": self.position_lv1, "city": self.city,
|
|
"experience": self.experience, "salary": self.salary, "appId": 10002,
|
|
}
|
|
|
|
def _request(self, params: dict) -> tuple[int, Any]:
|
|
return self.http_client.get(self.ENDPOINT, params)
|
|
|
|
def _parse(self, http_code: int, raw: Any) -> ApiResult:
|
|
return _parse_boss_response(http_code, raw)
|