win 2b94f15b56 fix(04): correct architecture — private files use crawler_core directly
Architecture clarification from user: spiderJobs/ is standalone execution,
NOT meant to be imported by app/. Correct dependency graph:

  crawler_core   ← shared base library
    ↑        ↑
spiderJobs  app/services/crawler/
(standalone) (FastAPI backend, private layer)

Changes:
- boss.py/qcwy.py/zhilian.py: revert import back to private _boss_api etc.
- _boss/job51/zhilian_api.py: use crawler_core.base.Result/BaseFetcher/BaseSearcher
  + fix self._http → self.http_client
- _boss/job51/zhilian_client.py: use crawler_core.http_client.HTTPClient
  + _boss_client uses crawler_core.boss.sign.BossSign directly
- _boss/job51/zhilian_sign.py: backward-compat stubs → crawler_core.*.sign

Full regression: 106 passed in 0.68s
2026-03-21 19:39:30 +08:00

128 lines
4.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
前程无忧 (51Job) Service — 基于新算法文件的封装
保持对外公开接口不变cleaning.py / company_cleaner.py 依赖)
"""
from __future__ import annotations
from typing import Any, Dict, List, Optional
from loguru import logger
from app.services.crawler._job51_api import (
GetCompanyInfo,
GetJobDetail,
SearchCompanyJobs,
SearchRecommendJobs,
)
from app.services.crawler._job51_client import Job51Client, create_client
class QcwyService:
def __init__(self, proxy_url: Optional[str] = None):
self._client = create_client(proxy=proxy_url or None)
def set_proxy(self, proxy_url: Optional[str]) -> None:
self._client = create_client(proxy=proxy_url or None)
logger.info(f"QcwyService proxy set to: {proxy_url or 'direct'}")
def get_job_detail(self, job_id: str) -> Dict[str, Any]:
"""获取职位详情"""
logger.info(f"Qcwy get_job_detail: {job_id}")
try:
fetcher = GetJobDetail(job_id=job_id, client=self._client)
result = fetcher.fetch()
if result.success:
return result.data or {}
logger.warning(f"Qcwy get_job_detail failed: {result.error}")
return {}
except Exception as e:
logger.error(f"Qcwy get_job_detail exception: {e}")
return {}
def get_company_info(self, company_id: str) -> Dict[str, Any]:
"""获取公司信息"""
logger.info(f"Qcwy get_company_info: {company_id}")
try:
fetcher = GetCompanyInfo(company_id=company_id, client=self._client)
result = fetcher.fetch()
if result.success:
return result.data or {}
logger.warning(f"Qcwy get_company_info failed: {result.error}")
return {}
except Exception as e:
logger.error(f"Qcwy get_company_info exception: {e}")
return {}
def search_jobs(
self, keyword: str, job_area: str = "020000", page: int = 1
) -> List[Dict[str, Any]]:
"""搜索职位(返回列表)"""
logger.info(f"Qcwy search_jobs: keyword={keyword}, area={job_area}, page={page}")
try:
searcher = SearchRecommendJobs(
job_area=job_area, page_size=20, client=self._client,
)
result = searcher.search(page_index=page)
if result.success:
return result.list or []
logger.warning(f"Qcwy search_jobs failed: {result.error}")
return []
except Exception as e:
logger.error(f"Qcwy search_jobs exception: {e}")
return []
def get_company_jobs_by_id(
self,
company_id: str,
page: int = 1,
page_size: int = 30,
job_area: str = "",
function: str = "",
salary_type: str = "",
) -> Dict[str, Any]:
"""获取公司职位列表"""
logger.info(f"Qcwy get_company_jobs: company={company_id}, page={page}")
try:
searcher = SearchCompanyJobs(
company_id=company_id,
job_area=job_area,
function=function,
salary_type=salary_type,
page_size=page_size,
client=self._client,
)
result = searcher.search(page_index=page)
if result.success:
return result.data or {}
logger.warning(f"Qcwy get_company_jobs failed: {result.error}")
return {}
except Exception as e:
logger.error(f"Qcwy get_company_jobs exception: {e}")
return {}
# ── asyncio.to_thread 桥接ARCH-06────────────────────────
async def async_get_job_detail(self, job_id: str) -> Dict:
import asyncio
return await asyncio.to_thread(self.get_job_detail, job_id)
async def async_get_company_info(self, company_id: str) -> Dict:
import asyncio
return await asyncio.to_thread(self.get_company_info, company_id)
async def async_get_company_jobs(
self, company_id: str, page: int = 1, page_size: int = 30, **kwargs
) -> Dict:
import asyncio
return await asyncio.to_thread(
self.get_company_jobs_by_id, company_id, page, page_size
)
async def async_search_jobs(
self, keyword: str, job_area: str = "020000", page: int = 1
) -> List:
import asyncio
return await asyncio.to_thread(self.search_jobs, keyword, job_area, page)