win 2b94f15b56 fix(04): correct architecture — private files use crawler_core directly
Architecture clarification from user: spiderJobs/ is standalone execution,
NOT meant to be imported by app/. Correct dependency graph:

  crawler_core   ← shared base library
    ↑        ↑
spiderJobs  app/services/crawler/
(standalone) (FastAPI backend, private layer)

Changes:
- boss.py/qcwy.py/zhilian.py: revert import back to private _boss_api etc.
- _boss/job51/zhilian_api.py: use crawler_core.base.Result/BaseFetcher/BaseSearcher
  + fix self._http → self.http_client
- _boss/job51/zhilian_client.py: use crawler_core.http_client.HTTPClient
  + _boss_client uses crawler_core.boss.sign.BossSign directly
- _boss/job51/zhilian_sign.py: backward-compat stubs → crawler_core.*.sign

Full regression: 106 passed in 0.68s
2026-03-21 19:39:30 +08:00

140 lines
5.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
Boss直聘 Service — 基于新算法文件的封装
保持对外公开接口不变cleaning.py / company_cleaner.py 依赖)
"""
from __future__ import annotations
from typing import Any, Dict, List, Optional
from loguru import logger
from app.services.crawler._boss_api import (
GetBrandDetail,
GetJobDetail,
SearchBrandJobs,
SearchRecJobs,
)
from app.services.crawler._boss_client import BossClient, create_client
from app.services.crawler._boss_sign import BossSign
class BossService:
def __init__(self, proxy_pool: Optional[List[Dict[str, str]]] = None):
self._signer = BossSign()
proxy = None
if proxy_pool:
proxy = proxy_pool[0].get("https") or proxy_pool[0].get("http") if proxy_pool else None
self._client = create_client(signer=self._signer, proxy=proxy)
# login_data 用于外部检查cleaning.py 通过 boss_service.login_data.get("mpt") 判断)
self.login_data: Dict[str, str] = {
"mpt": "",
"wt2": "",
"openId": "",
}
def set_login_data(self, mpt: str, wt2: str, open_id: str = "") -> None:
self.login_data.update({"mpt": mpt, "wt2": wt2, "openId": open_id})
self._signer.mpt = mpt
self._signer.wt2 = wt2
def set_proxy(self, proxy: Optional[str]) -> None:
if proxy:
proxy = proxy.strip().strip("`")
self._client = create_client(signer=self._signer, proxy=proxy or None)
logger.info(f"BossService proxy set to: {proxy or 'direct'}")
def get_job_detail_by_id(
self, job_id: str, lid: str = "", security_id: str = ""
) -> Optional[Dict]:
"""根据招聘ID获取招聘详情"""
logger.info(f"获取招聘详情: {job_id}")
try:
fetcher = GetJobDetail(
security_id=security_id, job_id=job_id, lid=lid,
client=self._client,
)
result = fetcher.fetch()
if result.success:
return result.data
logger.warning(f"Boss get_job_detail failed: {result.error}")
return None
except Exception as e:
logger.error(f"Boss get_job_detail exception: {e}")
return None
def get_company_detail_by_id(self, company_id: str) -> Optional[Dict]:
"""根据公司ID获取公司详情"""
logger.info(f"获取公司详情: {company_id}")
try:
fetcher = GetBrandDetail(brand_id=company_id, client=self._client)
result = fetcher.fetch()
if result.success:
return result.data
logger.warning(f"Boss get_company_detail failed: {result.error}")
return None
except Exception as e:
logger.error(f"Boss get_company_detail exception: {e}")
return None
def get_company_jobs_by_id(
self, company_id: str, page: int = 1
) -> Optional[Dict]:
"""根据公司ID获取该公司职位列表"""
logger.info(f"获取公司职位列表: {company_id}, page={page}")
try:
searcher = SearchBrandJobs(
brand_id=company_id, page_size=15, client=self._client,
)
result = searcher.search(page_index=page)
if result.success:
return result.data
logger.warning(f"Boss get_company_jobs failed: {result.error}")
return None
except Exception as e:
logger.error(f"Boss get_company_jobs exception: {e}")
return None
def search_jobs(
self, keyword: str, city_code: str = "101010100", page: int = 1
) -> Optional[Dict]:
"""搜索职位"""
logger.info(f"Boss search_jobs: keyword={keyword}, city={city_code}, page={page}")
try:
searcher = SearchRecJobs(
city_code=city_code, page_size=15, client=self._client,
)
result = searcher.search(page_index=page)
if result.success:
return result.data
logger.warning(f"Boss search_jobs failed: {result.error}")
return None
except Exception as e:
logger.error(f"Boss search_jobs exception: {e}")
return None
# ── asyncio.to_thread 桥接ARCH-06────────────────────────
async def async_get_job_detail(
self, job_id: str, lid: str = "", security_id: str = ""
) -> Optional[Dict]:
import asyncio
return await asyncio.to_thread(self.get_job_detail_by_id, job_id, lid, security_id)
async def async_get_company_detail(self, company_id: str) -> Optional[Dict]:
import asyncio
return await asyncio.to_thread(self.get_company_detail_by_id, company_id)
async def async_get_company_jobs(
self, company_id: str, page: int = 1
) -> Optional[Dict]:
import asyncio
return await asyncio.to_thread(self.get_company_jobs_by_id, company_id, page)
async def async_search_jobs(
self, keyword: str, city_code: str = "101010100", page: int = 1
) -> Optional[Dict]:
import asyncio
return await asyncio.to_thread(self.search_jobs, keyword, city_code, page)