""" core.base - 通用基类与数据结构 提供所有招聘平台共用的:ApiResult, BaseFetcher, BaseSearcher """ from __future__ import annotations from dataclasses import dataclass, field from typing import Any, Callable, Optional from spiderJobs.core.http_client import HTTPClient # ───────────────────────────────────────────── # 通用数据结构 # ───────────────────────────────────────────── @dataclass class ApiResult: """所有接口的统一返回结构""" success: bool status_code: int data: Any = None list: list[dict] = field(default_factory=list) count: int = 0 is_end_page: bool = True error: Optional[str] = None # ───────────────────────────────────────────── # 通用响应解析(可覆写) # ───────────────────────────────────────────── def parse_response(http_code: int, raw: Any) -> ApiResult: """ 默认响应解析算法 各平台如果格式不同,可在子类中覆写 parse_response 方法 """ biz_code = raw.get("statusCode") if isinstance(raw, dict) else http_code if http_code != 200 or biz_code != 200: return ApiResult( success=False, status_code=biz_code or http_code, error=( raw.get("statusDescription") or raw.get("message") or f"请求失败: {biz_code}" ) if isinstance(raw, dict) else f"请求失败: {http_code}", ) payload = (raw.get("data") or {}) if isinstance(raw, dict) else {} if isinstance(payload, dict) and "list" in payload: return ApiResult( success=True, status_code=200, data=payload, list=payload.get("list", []), count=payload.get("count", 0), is_end_page=payload.get("isEndPage", True), ) return ApiResult(success=True, status_code=200, data=payload) # ───────────────────────────────────────────── # 基础 Fetcher(GET 详情类) # ───────────────────────────────────────────── class BaseFetcher: """ 单对象接口基类(GET 请求) 子类需实现: ENDPOINT: 接口路径 _build_params(): 构建查询参数 可覆写: parse_response(): 自定义响应解析 """ ENDPOINT: str = "" def __init__(self, http_client: HTTPClient): self._http = http_client def _build_params(self) -> dict: raise NotImplementedError def _parse(self, http_code: int, raw: Any) -> ApiResult: return parse_response(http_code, raw) def fetch(self) -> ApiResult: try: http_code, data = self._http.get(self.ENDPOINT, self._build_params()) except Exception as e: return ApiResult(success=False, status_code=-1, error=str(e)) return self._parse(http_code, data) # ───────────────────────────────────────────── # 基础 Searcher(搜索 + 分页类) # ───────────────────────────────────────────── class BaseSearcher: """ 列表接口基类(支持分页) 子类需实现: ENDPOINT: 接口路径 _build_params(page_index): 构建请求参数 可覆写: _request(params): 默认 POST,可改为 GET _parse(): 自定义响应解析 """ ENDPOINT: str = "" def __init__(self, page_size: int = 15, http_client: HTTPClient = None): self.page_size = page_size self._http = http_client def _build_params(self, page_index: int) -> dict: raise NotImplementedError def _request(self, params: dict) -> tuple[int, Any]: return self._http.post(self.ENDPOINT, params) def _parse(self, http_code: int, raw: Any) -> ApiResult: return parse_response(http_code, raw) def search(self, page_index: int = 1) -> ApiResult: params = self._build_params(page_index) try: http_code, data = self._request(params) except Exception as e: return ApiResult(success=False, status_code=-1, error=str(e)) return self._parse(http_code, data) def load_all( self, max_pages: int = 10, on_page: Optional[Callable[[ApiResult, int], None]] = None, ) -> list[dict]: """分页累积加载""" all_list: list[dict] = [] for page_index in range(1, max_pages + 1): result = self.search(page_index=page_index) if not result.success: print(f"第 {page_index} 页失败: {result.error}") break all_list.extend(result.list) if on_page: on_page(result, page_index) if result.is_end_page: break return all_list