Plan 01 - facade migration (ARCH-06/07):
- boss.py: import from spiderJobs.platforms.boss.{api,client,sign}
- qcwy.py: import from spiderJobs.platforms.job51.{api,client}
- zhilian.py: import from spiderJobs.platforms.zhilian.{api,client,sign}
- All 3 Service classes: +4 async_* methods via asyncio.to_thread()
Plan 02 - deprecation + cleanup (ARCH-08):
- 11 private copy files (_base, _http_client, _boss/job51/zhilian *): DEPRECATED header
- jobs_spider/ directory: fully deleted (user request)
Full regression: 106 passed in 0.61s
117 lines
3.5 KiB
Python
117 lines
3.5 KiB
Python
# ⚠️ DEPRECATED — 2026-03-21
|
|
# 此文件是内部手工复制文件,已废弃,不再由任何 facade 引用。
|
|
# 请改用 spiderJobs.platforms.* 或 crawler_core 中的对应模块。
|
|
# 将在下一里程碑中删除。
|
|
#
|
|
"""
|
|
通用基类与数据结构
|
|
复制自 spiderJobs/core/base.py — import 改为本地引用
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass, field
|
|
from typing import Any, Callable, Optional
|
|
|
|
from app.services.crawler._http_client import HTTPClient
|
|
|
|
|
|
@dataclass
|
|
class ApiResult:
|
|
success: bool
|
|
status_code: int
|
|
data: Any = None
|
|
list: list[dict] = field(default_factory=list)
|
|
count: int = 0
|
|
is_end_page: bool = True
|
|
error: Optional[str] = None
|
|
|
|
|
|
def parse_response(http_code: int, raw: Any) -> ApiResult:
|
|
biz_code = raw.get("statusCode") if isinstance(raw, dict) else http_code
|
|
|
|
if http_code != 200 or biz_code != 200:
|
|
return ApiResult(
|
|
success=False,
|
|
status_code=biz_code or http_code,
|
|
error=(
|
|
raw.get("statusDescription")
|
|
or raw.get("message")
|
|
or f"请求失败: {biz_code}"
|
|
) if isinstance(raw, dict) else f"请求失败: {http_code}",
|
|
)
|
|
|
|
payload = (raw.get("data") or {}) if isinstance(raw, dict) else {}
|
|
|
|
if isinstance(payload, dict) and "list" in payload:
|
|
return ApiResult(
|
|
success=True, status_code=200, data=payload,
|
|
list=payload.get("list", []),
|
|
count=payload.get("count", 0),
|
|
is_end_page=payload.get("isEndPage", True),
|
|
)
|
|
|
|
return ApiResult(success=True, status_code=200, data=payload)
|
|
|
|
|
|
class BaseFetcher:
|
|
ENDPOINT: str = ""
|
|
|
|
def __init__(self, http_client: HTTPClient):
|
|
self._http = http_client
|
|
|
|
def _build_params(self) -> dict:
|
|
raise NotImplementedError
|
|
|
|
def _parse(self, http_code: int, raw: Any) -> ApiResult:
|
|
return parse_response(http_code, raw)
|
|
|
|
def fetch(self) -> ApiResult:
|
|
try:
|
|
http_code, data = self._http.get(self.ENDPOINT, self._build_params())
|
|
except Exception as e:
|
|
return ApiResult(success=False, status_code=-1, error=str(e))
|
|
return self._parse(http_code, data)
|
|
|
|
|
|
class BaseSearcher:
|
|
ENDPOINT: str = ""
|
|
|
|
def __init__(self, page_size: int = 15, http_client: HTTPClient = None):
|
|
self.page_size = page_size
|
|
self._http = http_client
|
|
|
|
def _build_params(self, page_index: int) -> dict:
|
|
raise NotImplementedError
|
|
|
|
def _request(self, params: dict) -> tuple[int, Any]:
|
|
return self._http.post(self.ENDPOINT, params)
|
|
|
|
def _parse(self, http_code: int, raw: Any) -> ApiResult:
|
|
return parse_response(http_code, raw)
|
|
|
|
def search(self, page_index: int = 1) -> ApiResult:
|
|
params = self._build_params(page_index)
|
|
try:
|
|
http_code, data = self._request(params)
|
|
except Exception as e:
|
|
return ApiResult(success=False, status_code=-1, error=str(e))
|
|
return self._parse(http_code, data)
|
|
|
|
def load_all(
|
|
self,
|
|
max_pages: int = 10,
|
|
on_page: Optional[Callable[[ApiResult, int], None]] = None,
|
|
) -> list[dict]:
|
|
all_list: list[dict] = []
|
|
for page_index in range(1, max_pages + 1):
|
|
result = self.search(page_index=page_index)
|
|
if not result.success:
|
|
break
|
|
all_list.extend(result.list)
|
|
if on_page:
|
|
on_page(result, page_index)
|
|
if result.is_end_page:
|
|
break
|
|
return all_list
|