373 lines
14 KiB
Python

import requests
import time
import json
import uuid
from typing import Dict, Any, Optional, List
from app.core.algorithms.antispider import IPStrategyConfig, IPAnomalyDetector, SmartIPManager, generate_boss_trace_id, generate_token
from loguru import logger
import os
from urllib.parse import urlencode
class BossService:
def __init__(self, proxy_pool: Optional[List[Dict[str, str]]] = None):
self.app_id = 10002
self.zp_product_id = 10002
self.serve_domain = "https://www.zhipin.com"
self.api_domain = "https://wxapp.zhipin.com"
self.session = requests.Session()
self.session.trust_env = False
self.session.headers.update({'no_proxy': '10.0.0.0/16,example.com,.example.com'})
# Initialize IP Strategy
self.ip_cfg = IPStrategyConfig()
self.ip_detector = IPAnomalyDetector(self.ip_cfg)
self.ip_manager = SmartIPManager(proxy_pool, self.ip_cfg)
# Initial route
route_mode, route_cfg = self.ip_manager.current_route()
if route_mode == 'proxy' and route_cfg:
self.session.proxies = route_cfg
self.device_id = str(uuid.uuid4())
self.wx_version = "8.0.43"
self.mini_version = "1.0.0"
self.scene = 1001
self.default_headers = {
"Accept": "*/*",
"Accept-Language": "zh-CN,zh;q=0.9",
"Accept-Encoding": "gzip, deflate, br",
"Connection": "keep-alive",
"Content-Type": "application/x-www-form-urlencoded",
"Host": "www.zhipin.com",
"Referer": "https://servicewechat.com/wxa8da525af05281f3/571/page-frame.html",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/107.0.0.0 Safari/537.36 MicroMessenger/6.8.0(0x16080000) NetType/WIFI MiniProgramEnv/Mac MacWechat/WMPF MacWechat/3.8.10(0x13080a10) XWEB/1227",
"X-Requested-With": "XMLHttpRequest",
"platform": "zhipin/mac",
"zp_app_id": str(self.app_id),
"ver": "100.0000",
"mini_ver": "100.0000",
"ua": json.dumps({"model": "Mac16,8", "platform": "mac"}),
"zp_product_id": str(self.zp_product_id),
"scene": "1006",
"xweb_xhr": "1",
"sec-fetch-site": "cross-site",
"sec-fetch-mode": "cors",
"sec-fetch-dest": "empty"
}
self.login_data = {
"mpt": "", # Needs to be filled via login/token logic if required
"wt2": "",
"openId": "",
"traceid": "F-77d05bnXuMVrHIB3"
}
self.current_token_id: Optional[int] = None
self.init_cookies()
def init_cookies(self):
cookies = {
'__zp_stoken__': generate_token(),
'Hm_lvt_194df3105ad7148dcf2b98a91b5e727a': str(int(time.time())),
'Hm_lpvt_194df3105ad7148dcf2b98a91b5e727a': str(int(time.time())),
'__c': self.device_id[:8],
'__g': '-',
'__l': 'l=%2Fwww.zhipin.com%2F&r=&friend_source=0&s=3&friend_source=0',
'lastCity': '101010100',
'cityName': '%E5%8C%97%E4%BA%AC',
'__zp_sseed__': 'btHZ0bjBq8m//WNwlVrPUnVcIvini5J5P5LQUbflM24=',
'__zp_sname__': '3998243a',
'__zp_sts__': str(int(time.time() * 1000))
}
for name, value in cookies.items():
self.session.cookies.set(name, value, domain='.zhipin.com')
def set_login_data(self, mpt: str, wt2: str, open_id: str = "") -> None:
self.login_data.update(
{
"mpt": mpt,
"wt2": wt2,
"openId": open_id,
}
)
if wt2:
self.session.cookies.set("wt2", wt2, domain=".zhipin.com")
if mpt:
self.session.cookies.set("mpt", mpt, domain=".zhipin.com")
def set_proxy(self, proxy: Optional[str]) -> None:
if not proxy:
self.session.proxies = {}
route_mode, route_cfg = self.ip_manager.current_route()
if route_mode == "proxy" and route_cfg:
self.session.proxies = route_cfg
logger.info("BossService proxy reset to default route")
return
proxy = proxy.strip().strip("`")
proxies = {"http": proxy, "https": proxy}
self.session.proxies = proxies
logger.info(f"BossService using user proxy: {proxies}")
def build_request_headers(self, custom_headers: Optional[Dict] = None) -> Dict[str, str]:
headers = self.default_headers.copy()
headers.update({
"mpt": self.login_data.get("mpt", ""),
"scene": "1006",
"wt2": self.login_data.get("wt2", ""),
"Traceid": generate_boss_trace_id()
})
headers["timestamp"] = str(int(time.time() * 1000))
if custom_headers:
headers.update(custom_headers)
return headers
def _sanitize_headers(self, headers: Dict[str, Any]) -> Dict[str, Any]:
return headers
def _log_request_response(
self,
label: str,
method: str,
url: str,
headers: Dict[str, Any],
params: Optional[Dict[str, Any]] = None,
json_body: Optional[Dict[str, Any]] = None,
response: Optional[requests.Response] = None,
) -> None:
safe_headers = self._sanitize_headers(headers)
current_proxies = getattr(self.session, "proxies", None)
proxy_info = current_proxies if current_proxies else {}
login_flags = {
"mpt_set": bool(self.login_data.get("mpt")),
"wt2_set": bool(self.login_data.get("wt2")),
}
logger.info(
f"[Boss-{label}] request method={method} url={url} headers={safe_headers} "
f"params={params} json={json_body} proxies={proxy_info} login={login_flags}"
)
try:
curl_url = url
if params and isinstance(params, dict):
query_string = urlencode(params)
if query_string:
separator = "&" if "?" in curl_url else "?"
curl_url = f"{curl_url}{separator}{query_string}"
header_parts = []
for k, v in safe_headers.items():
v_str = str(v).replace("'", "'\"'\"'")
header_parts.append(f"-H '{k}: {v_str}'")
data_part = ""
if json_body is not None:
body_str = json.dumps(json_body, ensure_ascii=False)
body_str = body_str.replace("'", "'\"'\"'")
data_part = f" --data '{body_str}'"
curl_cmd = f"curl -X {method} '{curl_url}' " + " ".join(header_parts) + data_part
logger.info(f"[Boss-{label}] curl_debug {curl_cmd}")
except Exception as e:
logger.debug(f"[Boss-{label}] build curl error: {e}")
if response is not None:
text_sample = ""
try:
body = response.text or ""
text_sample = body[:1000]
except Exception:
text_sample = "<unreadable>"
logger.info(
f"[Boss-{label}] response status={response.status_code} "
f"headers={self._sanitize_headers(dict(response.headers))} "
f"body_sample={text_sample}"
)
def build_request_data(self, data: Optional[Dict] = None) -> Dict[str, Any]:
request_data = {
"appId": self.app_id,
"scene": self.scene,
"timestamp": int(time.time() * 1000)
}
if data:
request_data.update(data)
return request_data
def get_job_detail_by_id(self, job_id: str, lid: str = "", security_id: str = "") -> Optional[Dict]:
"""根据招聘ID获取招聘详情"""
logger.info(f"🔍 获取招聘详情: {job_id}")
# Batch request simulation
sub_reqs = [
{
"path": "/wapi/zpgeek/miniapp/job/detail.json",
"method": "GET",
"query": urlencode({
"securityId": security_id,
"jobId": job_id,
"lid": lid,
"source": "10"
})
},
{
"path": "/wapi/zpgeek/miniapp/jobdetail/improvement/query.json",
"method": "GET",
"query": urlencode({
"securityId": security_id,
"jobId": job_id,
"lid": lid
})
}
]
post_data = {
"subReqs": sub_reqs,
"appId": 10002
}
headers = self.build_request_headers({
"Content-Type": "application/json",
"Referer": "https://servicewechat.com/wxa8da525af05281f3/585/page-frame.html"
})
try:
response = self.session.post(
"https://www.zhipin.com/wapi/batch/requests",
json=post_data,
headers=headers,
timeout=30
)
self._log_request_response(
"job-detail",
"POST",
"https://www.zhipin.com/wapi/batch/requests",
headers,
params=None,
json_body=post_data,
response=response,
)
response.raise_for_status()
data = response.json()
# Extract relevant part from batch response
if data.get("code") == 0 and data.get("zpData"):
# Simplification: return the whole structure or extract job detail
# Usually we want the job detail part
job_detail_path = "/wapi/zpgeek/miniapp/job/detail.json"
if job_detail_path in data["zpData"]:
return data["zpData"][job_detail_path]
return data
except Exception as e:
logger.error(f"Failed to fetch job detail: {e}")
return None
def get_company_detail_by_id(self, company_id: str) -> Optional[Dict]:
"""根据公司ID获取公司详情"""
logger.info(f"🏢 获取公司详情: {company_id}")
params = {
"brandId": company_id,
"appId": "10002"
}
headers = self.build_request_headers({
"Referer": "https://servicewechat.com/wxa8da525af05281f3/574/page-frame.html"
})
try:
request_data = self.build_request_data(params)
response = self.session.get(
f"{self.serve_domain}/wapi/zpgeek/miniapp/brand/detail.json",
headers=headers,
params=request_data,
timeout=30
)
self._log_request_response(
"company-detail",
"GET",
f"{self.serve_domain}/wapi/zpgeek/miniapp/brand/detail.json",
headers,
params=request_data,
json_body=None,
response=response,
)
response.raise_for_status()
return response.json()
except Exception as e:
logger.error(f"Failed to fetch company detail: {e}")
return None
def get_company_jobs_by_id(self, company_id: str, page: int = 1) -> Optional[Dict]:
"""根据公司ID获取该公司职位列表"""
logger.info(f"📄 获取公司职位列表: {company_id}, page={page}")
params = {
"brandId": company_id,
"query": "",
"page": page,
"hasMore": "true",
"positionLv1": 0,
"city": "",
"experience": "",
"salary": "",
"appId": "10002",
}
headers = self.build_request_headers({
"Referer": "https://servicewechat.com/wxa8da525af05281f3/587/page-frame.html"
})
try:
request_data = self.build_request_data(params)
response = self.session.get(
f"{self.serve_domain}/wapi/zpgeek/miniapp/brand/joblist.json",
headers=headers,
params=request_data,
timeout=30,
)
self._log_request_response(
"company-joblist",
"GET",
f"{self.serve_domain}/wapi/zpgeek/miniapp/brand/joblist.json",
headers,
params=request_data,
json_body=None,
response=response,
)
response.raise_for_status()
return response.json()
except Exception as e:
logger.error(f"Failed to fetch company job list: {e}")
return None
def search_jobs(self, keyword: str, city_code: str = "101010100", page: int = 1) -> Optional[Dict]:
"""搜索职位"""
params = {
'pageSize': 15,
'query': keyword,
'city': city_code,
'page': page,
'appId': '10002'
}
try:
headers = self.build_request_headers({
"Referer": "https://www.zhipin.com/web/geek/job"
})
request_data = self.build_request_data(params)
response = self.session.get(
f"{self.serve_domain}/wapi/zpgeek/miniapp/search/joblist.json",
headers=headers,
params=request_data,
timeout=30
)
self._log_request_response(
"search-jobs",
"GET",
f"{self.serve_domain}/wapi/zpgeek/miniapp/search/joblist.json",
headers,
params=request_data,
json_body=None,
response=response,
)
response.raise_for_status()
return response.json()
except Exception as e:
logger.error(f"Search failed: {e}")
return None