FROM python:3.11-slim-bullseye WORKDIR /opt/spider # 时区 + 基础工具 RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \ && echo "Asia/Shanghai" > /etc/timezone \ && apt-get update \ && apt-get install -y --no-install-recommends curl \ && rm -rf /var/lib/apt/lists/* # Python 依赖(先复制依赖文件,利用 Docker 缓存) COPY crawler_core/pyproject.toml /opt/spider/crawler_core/pyproject.toml RUN pip install --no-cache-dir \ requests_go==1.0.9 \ tenacity>=8.0 \ requests \ PySocks \ six \ -i https://pypi.tuna.tsinghua.edu.cn/simple # 复制代码 COPY crawler_core/ /opt/spider/crawler_core/ COPY spiderJobs/ /opt/spider/spiderJobs/ # 默认环境变量 ENV API_BASE_URL=http://124.222.106.226:9999 \ API_TOKEN=dev \ PLATFORM=boss \ SLEEP_MIN_SECONDS=5 \ SLEEP_MAX_SECONDS=12 \ MAX_PAGES=100 \ INLINE_COMPANY=0 \ PYTHONUNBUFFERED=1 # 代理已内置到代码中(spiderJobs/__init__.py) # 设 PROXY_TUNNEL=none 可禁用代理 COPY spider-entrypoint.sh /opt/spider/entrypoint.sh RUN chmod +x /opt/spider/entrypoint.sh ENTRYPOINT ["/opt/spider/entrypoint.sh"]