From fe9a6d14039f5876394589c429ca3744591069ac Mon Sep 17 00:00:00 2001 From: win Date: Sat, 21 Mar 2026 17:53:13 +0800 Subject: [PATCH] docs(phase-1): create plans (2 plans, 2 waves) with checker revision --- .planning/phases/01-shared-core/01-01-PLAN.md | 283 ++++++++++++++---- .planning/phases/01-shared-core/01-02-PLAN.md | 47 +-- .planning/phases/01-shared-core/1-CONTEXT.md | 2 +- 3 files changed, 222 insertions(+), 110 deletions(-) diff --git a/.planning/phases/01-shared-core/01-01-PLAN.md b/.planning/phases/01-shared-core/01-01-PLAN.md index c1e27df..c72214b 100644 --- a/.planning/phases/01-shared-core/01-01-PLAN.md +++ b/.planning/phases/01-shared-core/01-01-PLAN.md @@ -23,9 +23,9 @@ requirements: must_haves: truths: - "`pip install -e ./crawler_core` succeeds without errors" - - "`from crawler_core import BaseFetcher, BaseSearcher, ApiResult, HTTPClient` imports cleanly" + - "`from crawler_core import BaseFetcher, BaseSearcher, Result, HTTPClient` imports cleanly" - "HTTPClient retries failed requests up to 3 times with exponential backoff (minimum 10s wait)" - - "All HTTP errors are logged to stderr via stdlib logging with level, url, and error message" + - "All HTTP errors are logged to stderr via stdlib logging.getLogger('crawler_core.*') in place; loguru bridge deferred to Phase 5" - "Old spiderJobs/ and jobs_spider/ code is NOT modified — feature flag isolation holds" artifacts: - path: "crawler_core/pyproject.toml" @@ -33,31 +33,32 @@ must_haves: contains: "name = \"crawler_core\"" - path: "crawler_core/__init__.py" provides: "Public API surface" - exports: ["BaseFetcher", "BaseSearcher", "ApiResult", "HTTPClient"] + exports: ["BaseFetcher", "BaseSearcher", "Result", "HTTPClient"] - path: "crawler_core/http_client.py" provides: "TLS-fingerprinted HTTP client with retry and logging" exports: ["HTTPClient"] - path: "crawler_core/base.py" - provides: "Template-method base classes" - exports: ["ApiResult", "BaseFetcher", "BaseSearcher", "parse_response"] + provides: "Template-method base classes with generic Result[T] return type" + exports: ["Result", "BaseFetcher", "BaseSearcher", "parse_response"] key_links: - from: "crawler_core/__init__.py" to: "crawler_core/http_client.py" via: "from crawler_core.http_client import HTTPClient" - from: "crawler_core/__init__.py" to: "crawler_core/base.py" - via: "from crawler_core.base import BaseFetcher, BaseSearcher, ApiResult" + via: "from crawler_core.base import BaseFetcher, BaseSearcher, Result" - from: "crawler_core/base.py" to: "crawler_core/http_client.py" via: "from crawler_core.http_client import HTTPClient" --- + Create the crawler_core/ installable shared package with its core infrastructure: HTTP client with TLS fingerprint, retry logic, stdlib logging, and the BaseFetcher/BaseSearcher template-method base classes. Purpose: This is the foundation everything else depends on. Once installed with `pip install -e ./crawler_core`, Phase 2/3 platform rewrites can import from it instead of copying code. -Output: A working Python package at crawler_core/ that installs cleanly and exposes BaseFetcher, BaseSearcher, ApiResult, and HTTPClient. +Output: A working Python package at crawler_core/ that installs cleanly and exposes BaseFetcher, BaseSearcher, Result[T], and HTTPClient. @@ -72,7 +73,7 @@ Output: A working Python package at crawler_core/ that installs cleanly and expo - + From spiderJobs/core/http_client.py: ```python @@ -86,10 +87,10 @@ class HTTPClient: def get(self, path, params=None, headers=None) -> tuple[int, Any]: ... ``` -From spiderJobs/core/base.py: +From spiderJobs/core/base.py (reference only — DO NOT copy ApiResult; use Result[T] instead per D-07): ```python @dataclass -class ApiResult: +class ApiResult: # <-- OLD: replaced by Result[T] in crawler_core/base.py success: bool status_code: int data: Any = None @@ -103,14 +104,14 @@ def parse_response(http_code: int, raw: Any) -> ApiResult: ... class BaseFetcher: ENDPOINT: str = "" def __init__(self, http_client: HTTPClient): ... - def _build_params(self) -> dict: raise NotImplementedError + def _build_params(self) -> dict: raise NotImplementedError # template method (required) def _parse(self, http_code, raw) -> ApiResult: ... def fetch(self) -> ApiResult: ... class BaseSearcher: ENDPOINT: str = "" def __init__(self, page_size=15, http_client=None): ... - def _build_params(self, page_index) -> dict: raise NotImplementedError + def _build_params(self, page_index) -> dict: raise NotImplementedError # template method (required) def _request(self, params) -> tuple[int, Any]: ... def _parse(self, http_code, raw) -> ApiResult: ... def search(self, page_index=1) -> ApiResult: ... @@ -337,7 +338,7 @@ print('HTTPClient OK') - Task 3: Create crawler_core/base.py and crawler_core/__init__.py + Task 3: Create crawler_core/base.py with Result[T] and 4 template methods, plus crawler_core/__init__.py - /Users/win/2025/AICoding/JobData/spiderJobs/core/base.py (source to port — read every line) - /Users/win/2025/AICoding/JobData/.planning/research/ARCHITECTURE.md (abstract base class hierarchy section) @@ -348,56 +349,186 @@ print('HTTPClient OK') crawler_core/__init__.py -Port `spiderJobs/core/base.py` to `crawler_core/base.py` and create the public `__init__.py`. +Create `crawler_core/base.py` with the new `Result[T]` generic dataclass (replacing ApiResult per D-07) and four template methods (per D-06), then create the public `__init__.py`. **crawler_core/base.py:** -Port the full file from `spiderJobs/core/base.py` with ONE import change: - -Change: -```python -from spiderJobs.core.http_client import HTTPClient -``` - -To: -```python -from crawler_core.http_client import HTTPClient -``` - -Everything else stays identical to `spiderJobs/core/base.py`: -- `ApiResult` dataclass with fields: `success`, `status_code`, `data`, `list`, `count`, `is_end_page`, `error` -- `parse_response(http_code, raw)` function -- `BaseFetcher` class with `ENDPOINT`, `__init__`, `_build_params`, `_parse`, `fetch` -- `BaseSearcher` class with `ENDPOINT`, `__init__`, `_build_params`, `_request`, `_parse`, `search`, `load_all` - Add module docstring at the top: ```python """ crawler_core.base — 通用基类与数据结构 -提供所有招聘平台共用的: ApiResult, BaseFetcher, BaseSearcher, parse_response +提供所有招聘平台共用的: Result, BaseFetcher, BaseSearcher, parse_response 不依赖任何平台特定代码。 """ ``` -Replace the existing inline print in `load_all`: -```python -# Change this: -print(f"第 {page_index} 页失败: {result.error}") +**Step 1: Generic Result[T] dataclass (replaces ApiResult — per D-07)** -# To this (use stdlib logging, not print): -import logging as _logging -_log = _logging.getLogger("crawler_core.base") -_log.warning("第 %d 页失败: %s", page_index, result.error) -``` - -Actually, define the logger at module level (not inside the method): ```python +from __future__ import annotations + import logging +from dataclasses import dataclass, field +from typing import Any, Generic, Optional, TypeVar + +from crawler_core.http_client import HTTPClient + +T = TypeVar("T") + _logger = logging.getLogger("crawler_core.base") + + +@dataclass +class Result(Generic[T]): + """Typed result wrapper returned by all BaseFetcher and BaseSearcher methods. + + Replaces the untyped ApiResult. Callers annotate as Result[MyJobModel] etc. + """ + success: bool + status_code: int + data: Optional[T] = None + list: list[T] = field(default_factory=list) + count: int = 0 + is_end_page: bool = True + error: Optional[str] = None ``` -Then in `load_all`, replace `print(...)` with `_logger.warning(...)`. +**Step 2: parse_response — adapt from spiderJobs/core/base.py but return Result[Any]** + +Port `parse_response(http_code, raw)` from `spiderJobs/core/base.py` verbatim, changing only the return type annotation from `ApiResult` to `Result[Any]`. + +**Step 3: BaseFetcher — 4 template methods (per D-06)** + +```python +class BaseFetcher: + """Template-method base class for single-item fetchers. + + Required overrides: _build_params(), _parse() + Optional overrides: _build_headers(), _check_blocked() + """ + ENDPOINT: str = "" + + def __init__(self, http_client: HTTPClient) -> None: + self.http_client = http_client + + # --- Required template methods --- + + def _build_params(self) -> dict: + """Build query/body parameters for the request. MUST be overridden.""" + raise NotImplementedError(f"{type(self).__name__} must implement _build_params()") + + def _parse(self, http_code: int, raw: Any) -> Result: + """Parse the HTTP response into a Result. MUST be overridden.""" + raise NotImplementedError(f"{type(self).__name__} must implement _parse()") + + # --- Optional template methods --- + + def _build_headers(self) -> dict: + """Build extra request headers. Override to add platform-specific headers. + + Default: returns {} (no extra headers beyond HTTPClient defaults). + """ + return {} + + def _check_blocked(self, status_code: int, body: str) -> bool: + """Detect platform-specific anti-crawl blocks. + + Override to inspect response body/status for block signals. + Default: returns False (assume not blocked). + """ + return False + + # --- Orchestration --- + + def fetch(self) -> Result: + """Execute the fetch: build params → request → check blocked → parse.""" + params = self._build_params() + extra_headers = self._build_headers() + http_code, raw = self.http_client.get( + self.ENDPOINT, params=params, headers=extra_headers or None + ) + raw_str = str(raw) if not isinstance(raw, str) else raw + if self._check_blocked(http_code, raw_str): + return Result(success=False, status_code=http_code, error="blocked") + return self._parse(http_code, raw) +``` + +**Step 4: BaseSearcher — 4 template methods (per D-06)** + +```python +class BaseSearcher: + """Template-method base class for paginated list searchers. + + Required overrides: _build_params(), _parse() + Optional overrides: _build_headers(), _check_blocked() + """ + ENDPOINT: str = "" + + def __init__(self, page_size: int = 15, http_client: Optional[HTTPClient] = None) -> None: + self.page_size = page_size + self.http_client = http_client + + # --- Required template methods --- + + def _build_params(self, page_index: int) -> dict: + """Build pagination query params. MUST be overridden.""" + raise NotImplementedError(f"{type(self).__name__} must implement _build_params()") + + def _parse(self, http_code: int, raw: Any) -> Result: + """Parse the HTTP response into a Result. MUST be overridden.""" + raise NotImplementedError(f"{type(self).__name__} must implement _parse()") + + # --- Optional template methods --- + + def _build_headers(self) -> dict: + """Build extra request headers. Override for platform-specific headers. + + Default: returns {} (no extra headers beyond HTTPClient defaults). + """ + return {} + + def _check_blocked(self, status_code: int, body: str) -> bool: + """Detect platform-specific anti-crawl blocks. + + Override to inspect response body/status for block signals. + Default: returns False (assume not blocked). + """ + return False + + # --- Orchestration --- + + def _request(self, params: dict) -> tuple[int, Any]: + """Execute a single HTTP request. Uses _build_headers() for extra headers.""" + extra_headers = self._build_headers() + return self.http_client.get( + self.ENDPOINT, params=params, headers=extra_headers or None + ) + + def search(self, page_index: int = 1) -> Result: + """Fetch a single page: build params → request → check blocked → parse.""" + params = self._build_params(page_index) + http_code, raw = self._request(params) + raw_str = str(raw) if not isinstance(raw, str) else raw + if self._check_blocked(http_code, raw_str): + return Result(success=False, status_code=http_code, error="blocked") + return self._parse(http_code, raw) + + def load_all(self, max_pages: int = 10, on_page=None) -> list: + """Iterate pages until is_end_page=True or max_pages reached.""" + all_items: list = [] + for page_index in range(1, max_pages + 1): + result = self.search(page_index) + if not result.success: + _logger.warning("第 %d 页失败: %s", page_index, result.error) + break + all_items.extend(result.list) + if on_page: + on_page(page_index, result) + if result.is_end_page: + break + return all_items +``` **crawler_core/__init__.py:** @@ -406,14 +537,14 @@ Then in `load_all`, replace `print(...)` with `_logger.warning(...)`. crawler_core — 招聘爬虫共享核心包 安装方式: pip install -e ./crawler_core -使用方式: from crawler_core import BaseFetcher, BaseSearcher, ApiResult, HTTPClient +使用方式: from crawler_core import BaseFetcher, BaseSearcher, Result, HTTPClient """ -from crawler_core.base import ApiResult, BaseFetcher, BaseSearcher, parse_response +from crawler_core.base import Result, BaseFetcher, BaseSearcher, parse_response from crawler_core.http_client import HTTPClient __all__ = [ - "ApiResult", + "Result", "BaseFetcher", "BaseSearcher", "HTTPClient", @@ -424,7 +555,7 @@ __version__ = "0.1.0" ``` **Do NOT:** -- Change the logic of `BaseFetcher.fetch()`, `BaseSearcher.search()`, or `BaseSearcher.load_all()` beyond the logger swap +- Keep the old `ApiResult` name anywhere in crawler_core (it's fully replaced by `Result[T]`) - Import from `spiderJobs.*` or `app.*` - Import loguru - Add any platform-specific code to base.py or __init__.py @@ -433,26 +564,40 @@ __version__ = "0.1.0" cd /Users/win/2025/AICoding/JobData && python -c " import sys sys.path.insert(0, '.') -from crawler_core import BaseFetcher, BaseSearcher, ApiResult, HTTPClient, parse_response -import dataclasses -fields = {f.name for f in dataclasses.fields(ApiResult)} -assert fields == {'success','status_code','data','list','count','is_end_page','error'}, f'ApiResult fields wrong: {fields}' +from crawler_core import BaseFetcher, BaseSearcher, Result, HTTPClient, parse_response +import dataclasses, typing +fields = {f.name for f in dataclasses.fields(Result)} +assert fields == {'success','status_code','data','list','count','is_end_page','error'}, f'Result fields wrong: {fields}' assert hasattr(BaseFetcher, 'fetch'), 'BaseFetcher.fetch missing' +assert hasattr(BaseFetcher, '_build_headers'), 'BaseFetcher._build_headers missing' +assert hasattr(BaseFetcher, '_check_blocked'), 'BaseFetcher._check_blocked missing' +assert BaseFetcher._build_headers(object()) == {}, '_build_headers default must return {}' +assert BaseFetcher._check_blocked(object(), 200, '') == False, '_check_blocked default must return False' assert hasattr(BaseSearcher, 'load_all'), 'BaseSearcher.load_all missing' -print('All imports OK, ApiResult fields OK') +assert hasattr(BaseSearcher, '_build_headers'), 'BaseSearcher._build_headers missing' +assert hasattr(BaseSearcher, '_check_blocked'), 'BaseSearcher._check_blocked missing' +print('All imports OK, Result fields OK, 4 template methods verified') " - - `from crawler_core import BaseFetcher, BaseSearcher, ApiResult, HTTPClient` succeeds (with repo root on sys.path) + - `from crawler_core import BaseFetcher, BaseSearcher, Result, HTTPClient` succeeds (with repo root on sys.path) + - `crawler_core/base.py` defines `Result` as a generic dataclass using `TypeVar` and `Generic[T]` + - `crawler_core/base.py` does NOT contain `ApiResult` anywhere: `grep "ApiResult" crawler_core/base.py` returns empty - `crawler_core/base.py` does NOT contain `from spiderJobs` anywhere: `grep "from spiderJobs" crawler_core/base.py` returns empty - `crawler_core/base.py` does NOT contain `print(` anywhere: `grep "print(" crawler_core/base.py` returns empty - - `crawler_core/__init__.py` contains `__all__` with all 5 exports + - `BaseFetcher._build_headers(self)` exists and returns `{}` by default + - `BaseFetcher._check_blocked(self, status_code, body)` exists and returns `False` by default + - `BaseFetcher.fetch()` calls `_build_headers()` and `_check_blocked()` in its implementation + - `BaseSearcher._build_headers(self)` exists and returns `{}` by default + - `BaseSearcher._check_blocked(self, status_code, body)` exists and returns `False` by default + - `BaseSearcher.search()` calls `_check_blocked()` in its implementation + - `crawler_core/__init__.py` exports `Result` (not `ApiResult`) in `__all__` - `crawler_core/__init__.py` contains `__version__ = "0.1.0"` - - `ApiResult` dataclass has exactly 7 fields: success, status_code, data, list, count, is_end_page, error + - `Result` dataclass has exactly 7 fields: success, status_code, data, list, count, is_end_page, error - `BaseFetcher._build_params` raises `NotImplementedError` - `BaseSearcher._build_params` raises `NotImplementedError` - base.py ported (no spiderJobs imports, no print statements), __init__.py exposes clean public API. + base.py uses Result[T] generic (no ApiResult), 4 template methods wired into fetch()/search(), __init__.py exports clean public API. @@ -465,10 +610,10 @@ cd /Users/win/2025/AICoding/JobData python -c " import sys sys.path.insert(0, '.') -from crawler_core import BaseFetcher, BaseSearcher, ApiResult, HTTPClient, parse_response +from crawler_core import BaseFetcher, BaseSearcher, Result, HTTPClient, parse_response -# Verify ApiResult structure -r = ApiResult(success=True, status_code=200) +# Verify Result structure +r = Result(success=True, status_code=200) assert r.success and r.list == [] and r.error is None # Verify BaseFetcher requires _build_params @@ -476,6 +621,13 @@ class TestFetcher(BaseFetcher): ENDPOINT = '/test' def _build_params(self): return {'q': 'test'} + def _parse(self, http_code, raw): + return Result(success=True, status_code=http_code) + +# Verify default template method overrides +tf = TestFetcher(http_client=None) +assert tf._build_headers() == {}, '_build_headers default failed' +assert tf._check_blocked(200, '') == False, '_check_blocked default failed' # Verify parse_response with dict input result = parse_response(200, {'statusCode': 200, 'data': {'list': [{'id': 1}], 'count': 1, 'isEndPage': False}}) @@ -492,11 +644,12 @@ Also confirm no cross-contamination: grep -r "from spiderJobs" /Users/win/2025/AICoding/JobData/crawler_core/ && echo "FAIL: found spiderJobs import" || echo "OK: no spiderJobs imports" grep -r "from app" /Users/win/2025/AICoding/JobData/crawler_core/ && echo "FAIL: found app import" || echo "OK: no app imports" grep -r "loguru" /Users/win/2025/AICoding/JobData/crawler_core/ && echo "FAIL: found loguru" || echo "OK: no loguru" +grep -r "ApiResult" /Users/win/2025/AICoding/JobData/crawler_core/ && echo "FAIL: ApiResult still present" || echo "OK: ApiResult fully replaced by Result[T]" ``` -1. `python -c "from crawler_core import BaseFetcher, BaseSearcher, ApiResult, HTTPClient"` exits 0 (with repo root on sys.path) +1. `python -c "from crawler_core import BaseFetcher, BaseSearcher, Result, HTTPClient"` exits 0 (with repo root on sys.path) 2. `crawler_core/pyproject.toml` passes `python -c "import tomllib; tomllib.load(open('crawler_core/pyproject.toml','rb'))"` 3. `grep "requests_go" Pipfile` has output — dependency declared 4. `grep "tenacity" Pipfile` has output — dependency declared @@ -504,13 +657,15 @@ grep -r "loguru" /Users/win/2025/AICoding/JobData/crawler_core/ && echo "FAIL: f 6. `grep -r "from spiderJobs" crawler_core/` has NO output 7. `grep -r "loguru" crawler_core/` has NO output 8. `grep "min=10" crawler_core/http_client.py` has output — anti-detection delay preserved -9. `spiderJobs/` and `jobs_spider/` directories are UNCHANGED (no files modified) +9. `grep -r "ApiResult" crawler_core/` has NO output — fully replaced by Result[T] +10. `BaseFetcher._build_headers` and `BaseFetcher._check_blocked` exist and are wired into `fetch()` +11. `spiderJobs/` and `jobs_spider/` directories are UNCHANGED (no files modified) After completion, create `.planning/phases/01-shared-core/01-01-SUMMARY.md` with: - What was created (file list with line counts) - Key decisions made (pyproject.toml structure, tenacity config values, logging approach) -- Interface contracts (the public exports from crawler_core/__init__.py) +- Interface contracts (the public exports from crawler_core/__init__.py, Result[T] field list, 4 template method signatures) - Any deviations from this plan and why diff --git a/.planning/phases/01-shared-core/01-02-PLAN.md b/.planning/phases/01-shared-core/01-02-PLAN.md index 83eedc0..f96b266 100644 --- a/.planning/phases/01-shared-core/01-02-PLAN.md +++ b/.planning/phases/01-shared-core/01-02-PLAN.md @@ -125,7 +125,7 @@ class ZhilianSign: - + Task 1: Port sign algorithms to crawler_core/ platform directories - /Users/win/2025/AICoding/JobData/spiderJobs/platforms/boss/sign.py (source — read every line before writing) @@ -138,20 +138,6 @@ class ZhilianSign: crawler_core/qcwy/sign.py crawler_core/zhilian/sign.py - - - BossSign.generate_traceid("M-W") returns a 25-char string starting with "M-W" - - BossSign.generate_traceid("M-W") result matches regex r'^M-W[0-9a-f]{13}[0-9a-zA-Z]{6}[0-9a-zA-Z]{3}$' - - _compute_checksum produces exactly 3 characters from the _CHARS set - - _generate_uuid produces exactly 19 characters (13 hex + 6 base62) - - Job51Sign().build_sign_path("open/test", "GET") returns tuple of length 2 - - Job51Sign().build_sign_path("open/test", "GET")[0] starts with "/open/test?api_key=51job×tamp=" - - Job51Sign().build_sign_path("open/test", "GET")[1] is 64-char hex string (HMAC-SHA256) - - Job51Sign().build_sign_path("open/test", "POST", body={"k": "v"})[1] != Job51Sign().build_sign_path("open/test", "GET")[1] — method affects signature - - ZhilianSign().sign_headers() returns dict with exactly 9 keys - - ZhilianSign().sign_headers()["x-zp-business-system"] == "73" - - ZhilianSign().sign_params() returns dict with exactly 6 keys: at, rt, channel, platform, version, d - - ZhilianSign(at="tok123").sign_params()["at"] == "tok123" - Copy the three sign algorithm files to their new locations under crawler_core/, making only one change per file: update the module docstring to reference crawler_core. @@ -238,7 +224,7 @@ print('All sign algorithms imported and validated') Three sign.py files in crawler_core/ — pure functions, no HTTP, no cross-imports from app or spiderJobs. - + Task 2: Write sign algorithm unit tests - /Users/win/2025/AICoding/JobData/crawler_core/boss/sign.py (just created — read to understand exact exports) @@ -253,35 +239,6 @@ print('All sign algorithms imported and validated') tests/crawler_core/test_qcwy_sign.py tests/crawler_core/test_zhilian_sign.py - - Boss sign tests: - - test_traceid_format: generate_traceid() matches regex r'^M-W[0-9a-f]{13}[0-9a-zA-Z]{6}[0-9a-zA-Z]{3}$' - - test_traceid_length: generate_traceid() is exactly 25 chars - - test_traceid_custom_prefix: generate_traceid("X-Y") starts with "X-Y" - - test_traceid_uniqueness: two calls return different values - - test_compute_checksum_length: _compute_checksum(any 19-char string) returns 3 chars - - test_compute_checksum_chars: all 3 chars are in _CHARS (base62) - - test_compute_checksum_deterministic: same input → same output - - test_generate_uuid_length: _generate_uuid() returns 19 chars - - Job51 sign tests: - - test_build_sign_path_get_format: GET path starts with "/{endpoint}?api_key=51job×tamp=" - - test_build_sign_path_returns_tuple: returns tuple of (str, str) - - test_sign_hex_length: sign is 64-char hex string matching r'^[0-9a-f]{64}$' - - test_get_vs_post_different_sign: same endpoint, different method → different sign - - test_sign_with_params_includes_params_in_path: GET with params={'k':'v'} → path contains "k=v" - - test_sign_key_in_path: path contains "api_key=51job" - - test_generate_uuid_length: generate_uuid() returns string of length 23 (13+10) - - Zhilian sign tests: - - test_sign_headers_keys: returns dict with exactly these 9 keys: x-zp-at, x-zp-rt, x-zp-action-id, x-zp-page-code, x-zp-version, x-zp-channel, x-zp-platform, x-zp-device-id, x-zp-business-system - - test_sign_headers_business_system: x-zp-business-system == "73" - - test_sign_headers_tokens: x-zp-at and x-zp-rt reflect constructor args - - test_sign_params_keys: returns dict with exactly these 6 keys: at, rt, channel, platform, version, d - - test_sign_params_device_id_matches: d == device_id from constructor - - test_generate_uuid_format: matches UUID4 pattern r'^[0-9A-F]{8}-[0-9A-F]{4}-4[0-9A-F]{3}-[89AB][0-9A-F]{3}-[0-9A-F]{12}$' - - test_action_id_unique_per_call: two sign_headers() calls produce different x-zp-action-id values - Create `tests/crawler_core/__init__.py` (empty file) and three test files. diff --git a/.planning/phases/01-shared-core/1-CONTEXT.md b/.planning/phases/01-shared-core/1-CONTEXT.md index 517bdf6..4b13050 100644 --- a/.planning/phases/01-shared-core/1-CONTEXT.md +++ b/.planning/phases/01-shared-core/1-CONTEXT.md @@ -16,7 +16,7 @@ ### 包结构和安装方式 - **D-01:** 包放在项目根目录 `crawler_core/`,与 `app/` 和 `spiderJobs/` 平级 - **D-02:** 使用 `pyproject.toml` 管理包元数据,支持 `pip install -e ./crawler_core` -- **D-03:** 最小依赖范围 — 只依赖 `requests_go` + Python 标准库,不拉入 FastAPI/Tortoise/loguru +- **D-03:** 最小依赖范围 — 只依赖 `requests_go` + `tenacity` + Python 标准库,不拉入 FastAPI/Tortoise/loguru - **D-04:** 包名为 `crawler_core` ### 基类接口设计