""" core.http_client - 通用 HTTP 客户端 基于 requests-go,自带 Chrome TLS 指纹伪装 支持代理 IP / 隧道代理 / 代理池轮换 与任何招聘平台无关,纯粹负责发请求 """ from __future__ import annotations import random from typing import Any, Optional import requests_go as requests from requests_go.tls_config import TLS_CHROME_LATEST class HTTPClient: """ 通用 HTTP 客户端 Args: base_url: API 基础地址 default_headers: 默认请求头 proxy: 固定代理地址(绑定到 session,复用连接) tunnel_proxy: 隧道代理地址(每次请求新建 session,确保 IP 轮换) proxy_pool: 代理池列表(每次请求随机选一个) timeout: 请求超时秒数(默认 10) 代理优先级: tunnel_proxy > proxy_pool > proxy 三者只用其一即可。 代理格式示例: 普通代理: "http://127.0.0.1:7890" SOCKS5 代理: "socks5://127.0.0.1:1080" 隧道代理: "http://user:pass@tunnel.example.com:12345" 隧道代理(认证): "http://account-zone-xxx:password@proxy.host:port" 隧道代理用法(每次请求自动换 IP): client = HTTPClient( base_url="https://example.com", tunnel_proxy="http://user:pass@tunnel.example.com:12345", ) # 每次 get/post 都会新建 TCP 连接,隧道代理自动分配新 IP """ def __init__( self, base_url: str, default_headers: Optional[dict] = None, proxy: Optional[str] = None, tunnel_proxy: Optional[str] = None, proxy_pool: Optional[list[str]] = None, timeout: int = 10, ): self.base_url = base_url self.default_headers = default_headers or {} self.timeout = timeout # 代理配置 self._proxy = proxy self._tunnel_proxy = tunnel_proxy self._proxy_pool = proxy_pool # 创建 session + TLS 指纹 self._session = requests.Session() self._session.tls_config = TLS_CHROME_LATEST TLS_CHROME_LATEST.random_ja3 = True # 固定代理直接设到 session 上 if proxy and not proxy_pool and not tunnel_proxy: self._session.proxies = {"http": proxy, "https": proxy} def _new_session(self) -> requests.Session: """创建全新 session(用于隧道代理 IP 轮换)""" s = requests.Session() s.tls_config = TLS_CHROME_LATEST TLS_CHROME_LATEST.random_ja3 = True return s def _get_proxies(self) -> Optional[dict]: """获取本次请求的代理配置""" if self._proxy_pool: # 代理池:随机选一个,加 #random_hash 打破连接复用 chosen = random.choice(self._proxy_pool) unique = f"{chosen}#{random.randint(100000, 999999)}" return {"http": unique, "https": unique} return None # 固定代理已在 session 上,不需要每次传 def _merge_headers(self, extra: Optional[dict] = None) -> dict: headers = {**self.default_headers} if extra: headers.update(extra) return headers def post(self, path: str, body: dict, headers: Optional[dict] = None) -> tuple[int, Any]: """发送 POST 请求""" merged_headers = self._merge_headers(headers) # 隧道代理:每次新 session,确保 IP 轮换 if self._tunnel_proxy: s = self._new_session() try: resp = s.post( f"{self.base_url}{path}", json=body, headers=merged_headers, proxies={"http": self._tunnel_proxy, "https": self._tunnel_proxy}, timeout=self.timeout, ) return resp.status_code, resp.json() finally: s.close() kwargs: dict[str, Any] = { "json": body, "headers": merged_headers, "timeout": self.timeout, } proxies = self._get_proxies() if proxies: kwargs["proxies"] = proxies resp = self._session.post(f"{self.base_url}{path}", **kwargs) return resp.status_code, resp.json() def get(self, path: str, params: Optional[dict] = None, headers: Optional[dict] = None) -> tuple[int, Any]: """发送 GET 请求""" merged_headers = self._merge_headers(headers) # 隧道代理:每次新 session,确保 IP 轮换 if self._tunnel_proxy: s = self._new_session() try: resp = s.get( f"{self.base_url}{path}", params=params, headers=merged_headers, proxies={"http": self._tunnel_proxy, "https": self._tunnel_proxy}, timeout=self.timeout, ) return resp.status_code, resp.json() finally: s.close() kwargs: dict[str, Any] = { "params": params, "headers": merged_headers, "timeout": self.timeout, } proxies = self._get_proxies() if proxies: kwargs["proxies"] = proxies resp = self._session.get(f"{self.base_url}{path}", **kwargs) return resp.status_code, resp.json()