155 lines
5.3 KiB
Python
155 lines
5.3 KiB
Python
"""
|
||
core.base - 通用基类与数据结构
|
||
提供所有招聘平台共用的:ApiResult, BaseFetcher, BaseSearcher
|
||
"""
|
||
|
||
from __future__ import annotations
|
||
|
||
from dataclasses import dataclass, field
|
||
from typing import Any, Callable, Optional
|
||
|
||
from spiderJobs.core.http_client import HTTPClient
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# 通用数据结构
|
||
# ─────────────────────────────────────────────
|
||
|
||
@dataclass
|
||
class ApiResult:
|
||
"""所有接口的统一返回结构"""
|
||
success: bool
|
||
status_code: int
|
||
data: Any = None
|
||
list: list[dict] = field(default_factory=list)
|
||
count: int = 0
|
||
is_end_page: bool = True
|
||
error: Optional[str] = None
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# 通用响应解析(可覆写)
|
||
# ─────────────────────────────────────────────
|
||
|
||
def parse_response(http_code: int, raw: Any) -> ApiResult:
|
||
"""
|
||
默认响应解析算法
|
||
|
||
各平台如果格式不同,可在子类中覆写 parse_response 方法
|
||
"""
|
||
biz_code = raw.get("statusCode") if isinstance(raw, dict) else http_code
|
||
|
||
if http_code != 200 or biz_code != 200:
|
||
return ApiResult(
|
||
success=False,
|
||
status_code=biz_code or http_code,
|
||
error=(
|
||
raw.get("statusDescription")
|
||
or raw.get("message")
|
||
or f"请求失败: {biz_code}"
|
||
) if isinstance(raw, dict) else f"请求失败: {http_code}",
|
||
)
|
||
|
||
payload = (raw.get("data") or {}) if isinstance(raw, dict) else {}
|
||
|
||
if isinstance(payload, dict) and "list" in payload:
|
||
return ApiResult(
|
||
success=True, status_code=200, data=payload,
|
||
list=payload.get("list", []),
|
||
count=payload.get("count", 0),
|
||
is_end_page=payload.get("isEndPage", True),
|
||
)
|
||
|
||
return ApiResult(success=True, status_code=200, data=payload)
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# 基础 Fetcher(GET 详情类)
|
||
# ─────────────────────────────────────────────
|
||
|
||
class BaseFetcher:
|
||
"""
|
||
单对象接口基类(GET 请求)
|
||
|
||
子类需实现:
|
||
ENDPOINT: 接口路径
|
||
_build_params(): 构建查询参数
|
||
可覆写:
|
||
parse_response(): 自定义响应解析
|
||
"""
|
||
ENDPOINT: str = ""
|
||
|
||
def __init__(self, http_client: HTTPClient):
|
||
self._http = http_client
|
||
|
||
def _build_params(self) -> dict:
|
||
raise NotImplementedError
|
||
|
||
def _parse(self, http_code: int, raw: Any) -> ApiResult:
|
||
return parse_response(http_code, raw)
|
||
|
||
def fetch(self) -> ApiResult:
|
||
try:
|
||
http_code, data = self._http.get(self.ENDPOINT, self._build_params())
|
||
except Exception as e:
|
||
return ApiResult(success=False, status_code=-1, error=str(e))
|
||
return self._parse(http_code, data)
|
||
|
||
|
||
# ─────────────────────────────────────────────
|
||
# 基础 Searcher(搜索 + 分页类)
|
||
# ─────────────────────────────────────────────
|
||
|
||
class BaseSearcher:
|
||
"""
|
||
列表接口基类(支持分页)
|
||
|
||
子类需实现:
|
||
ENDPOINT: 接口路径
|
||
_build_params(page_index): 构建请求参数
|
||
可覆写:
|
||
_request(params): 默认 POST,可改为 GET
|
||
_parse(): 自定义响应解析
|
||
"""
|
||
ENDPOINT: str = ""
|
||
|
||
def __init__(self, page_size: int = 15, http_client: HTTPClient = None):
|
||
self.page_size = page_size
|
||
self._http = http_client
|
||
|
||
def _build_params(self, page_index: int) -> dict:
|
||
raise NotImplementedError
|
||
|
||
def _request(self, params: dict) -> tuple[int, Any]:
|
||
return self._http.post(self.ENDPOINT, params)
|
||
|
||
def _parse(self, http_code: int, raw: Any) -> ApiResult:
|
||
return parse_response(http_code, raw)
|
||
|
||
def search(self, page_index: int = 1) -> ApiResult:
|
||
params = self._build_params(page_index)
|
||
try:
|
||
http_code, data = self._request(params)
|
||
except Exception as e:
|
||
return ApiResult(success=False, status_code=-1, error=str(e))
|
||
return self._parse(http_code, data)
|
||
|
||
def load_all(
|
||
self,
|
||
max_pages: int = 10,
|
||
on_page: Optional[Callable[[ApiResult, int], None]] = None,
|
||
) -> list[dict]:
|
||
"""分页累积加载"""
|
||
all_list: list[dict] = []
|
||
for page_index in range(1, max_pages + 1):
|
||
result = self.search(page_index=page_index)
|
||
if not result.success:
|
||
print(f"第 {page_index} 页失败: {result.error}")
|
||
break
|
||
all_list.extend(result.list)
|
||
if on_page:
|
||
on_page(result, page_index)
|
||
if result.is_end_page:
|
||
break
|
||
return all_list
|