234 lines
9.1 KiB
Python

import httpx
import time
import random
import json
import os
from typing import Dict, Any, Optional, List
from urllib.parse import quote
from loguru import logger
from app.core.algorithms.signature import SignatureGenerator
from jobs_spider.qcwy import search_company_jobs as qcwy_spider
class QcwyService:
def __init__(self, proxy_url: Optional[str] = None):
self.signature_generator = SignatureGenerator("abfc8f9dcf8c3f3d8aa294ac5f2cf2cc7767e5592590f39c3f503271dd68562b")
self.base_url = "https://cupid.51job.com"
self.api_key = "51job"
self.base_headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.10(0x13080a10) XWEB/1227",
"Connection": "keep-alive",
"Accept": "*/*",
"Accept-Encoding": "gzip, deflate, br",
"Content-Type": "application/json",
"account-id": "",
"From-Domain": "51job_weixin_wxapp",
"xweb_xhr": "1",
"user-token": "",
"uuid": str(int(time.time() * 1000)) + str(random.randint(10000000, 99999999)),
"partner": "",
"timestamp": str(int(time.time() * 1000)),
"Sec-Fetch-Site": "cross-site",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Dest": "empty",
"Referer": "https://servicewechat.com/wx1131e5c71e668b5d/391/page-frame.html",
"Accept-Language": "zh-CN,zh;q=0.9"
}
env_account_id = os.getenv("QCWY_ACCOUNT_ID", "").strip()
env_user_token = os.getenv("QCWY_USER_TOKEN", "").strip()
if env_account_id:
self.base_headers["account-id"] = env_account_id
if env_user_token:
self.base_headers["user-token"] = env_user_token
client_kwargs = {
"timeout": 30.0,
"verify": True,
"trust_env": False
}
if proxy_url:
client_kwargs["proxy"] = proxy_url
self.client = httpx.Client(**client_kwargs)
def set_proxy(self, proxy_url: Optional[str]) -> None:
client_kwargs = {
"timeout": 30.0,
"verify": True,
"trust_env": False,
}
if proxy_url:
client_kwargs["proxy"] = proxy_url
try:
old_client = self.client
except AttributeError:
old_client = None
self.client = httpx.Client(**client_kwargs)
if old_client is not None:
try:
old_client.close()
except Exception:
pass
def _sanitize_headers(self, headers: Dict[str, Any]) -> Dict[str, Any]:
masked_headers: Dict[str, Any] = {}
for k, v in headers.items():
key_lower = str(k).lower()
if key_lower in {"authorization", "cookie", "set-cookie"}:
masked_headers[k] = "***"
else:
masked_headers[k] = v
return masked_headers
def _log_request_response(
self,
label: str,
method: str,
url: str,
headers: Dict[str, Any],
params: Optional[Dict[str, Any]] = None,
json_body: Optional[Dict[str, Any]] = None,
response: Optional[httpx.Response] = None,
) -> None:
safe_headers = self._sanitize_headers(headers)
logger.info(
f"[Qcwy-{label}] request method={method} url={url} headers={safe_headers} "
f"params={params} json={json_body}"
)
if response is not None:
text_sample = ""
try:
body = response.text or ""
text_sample = body[:1000]
except Exception:
text_sample = "<unreadable>"
logger.info(
f"[Qcwy-{label}] response status={response.status_code} "
f"headers={self._sanitize_headers(dict(response.headers))} "
f"body_sample={text_sample}"
)
def build_property(self, page_code: str = "home|hotjob|jobfxlist") -> str:
distinct_id = str(int(time.time() * 1000)) + str(random.randint(100000, 999999))
property_data = {
"frompageUrl": "",
"pageUrl": "pages/index/index",
"isLogin": "",
"accountid": "",
"resumeId": "",
"firstFrompageUrl": "",
"distinct_id": distinct_id,
"pageCode": page_code,
"shortPageCode": page_code,
"policyType": "推荐"
}
return quote(json.dumps(property_data, ensure_ascii=False, separators=(',', ':')))
def _make_request(self, url: str, data: Dict[str, Any] = None, headers: Dict[str, str] = None, method: str = "POST") -> Optional[Dict[str, Any]]:
try:
local_headers: Dict[str, str] = headers or {}
if method.upper() == "GET":
response = self.client.get(url, headers=local_headers)
else:
response = self.client.post(url, headers=local_headers, json=data)
self._log_request_response(
"request",
method.upper(),
url,
local_headers,
params=None,
json_body=data if method.upper() != "GET" else None,
response=response,
)
if response.status_code == 200:
return response.json()
else:
logger.warning(f"Request failed: {response.status_code} - {response.text}")
return None
except Exception as e:
logger.error(f"Request exception: {e}")
return None
def get_job_detail(self, job_id: str) -> Dict[str, Any]:
timestamp = int(time.time())
api_path = f"open/noauth/jobs/detail/base/{job_id}"
url_path = f"/{api_path}?api_key={self.api_key}&timestamp={timestamp}"
full_url = f"{self.base_url}{url_path}"
signature = self.signature_generator.generate_signature(url_path)
property_value = self.build_property(page_code="pages/jobs/jobdetail/jobdetail")
headers = self.base_headers.copy()
headers["sign"] = signature
headers["property"] = property_value
headers["Content-Type"] = "application/x-www-form-urlencoded"
response = self._make_request(full_url, None, headers, method="GET")
if response and response.get('status') in ['1', 1]:
return response.get('resultbody', {})
return {}
def get_company_info(self, company_id: str) -> Dict[str, Any]:
try:
return qcwy_spider.get_company_info(company_id)
except Exception as e:
logger.error(f"Qcwy get_company_info failed: {e}")
return {}
def search_jobs(self, keyword: str, job_area: str = "020000", page: int = 1) -> List[Dict[str, Any]]:
# This uses the recommend/search logic
timestamp = int(time.time())
data = {
"pageNo": page,
"pageSize": 20,
"keyword": keyword, # QCwy usually recommends, but let's assume recommend for now or search
"jobArea": job_area,
"type": "recommend", # fallback to recommend if keyword search API is different/complex
"isTouristMode": True,
"specialPageCode": True
}
# Note: QCwy search API might be different, but using the recommend endpoint from original script
# If real search is needed, we might need to reverse engineer 'search/job-list' endpoint.
# For now, let's stick to what was in the script or use recommend.
# The original script used `open/noauth/recommend/job-tab-dynamic-wx-mini`
api_path = "open/noauth/recommend/job-tab-dynamic-wx-mini"
url_path = f"/{api_path}?api_key={self.api_key}&timestamp={timestamp}"
full_url = f"{self.base_url}{url_path}"
signature = self.signature_generator.generate_signature(url_path, data)
property_value = self.build_property()
headers = self.base_headers.copy()
headers["sign"] = signature
headers["property"] = property_value
# Convert bools
for key, value in data.items():
if isinstance(value, bool):
data[key] = "true" if value else "false"
response = self._make_request(full_url, data, headers, method="POST")
if response and response.get("status") in ['1', 1]:
return response.get("resultbody", {}).get("jobList", {}).get("items", [])
return []
def get_company_jobs_by_id(
self,
company_id: str,
page: int = 1,
page_size: int = 30,
job_area: str = "",
function: str = "",
salary_type: str = "",
) -> Dict[str, Any]:
try:
return qcwy_spider.company_jobs_by_id(
co_id=company_id,
page=page,
size=page_size,
)
except Exception as e:
logger.error(f"Qcwy get_company_jobs_by_id failed: {e}")
return {}