42 lines
1.1 KiB
Docker
42 lines
1.1 KiB
Docker
FROM python:3.11-slim-bullseye
|
||
|
||
WORKDIR /opt/spider
|
||
|
||
# 时区 + 基础工具
|
||
RUN ln -sf /usr/share/zoneinfo/Asia/Shanghai /etc/localtime \
|
||
&& echo "Asia/Shanghai" > /etc/timezone \
|
||
&& apt-get update \
|
||
&& apt-get install -y --no-install-recommends curl \
|
||
&& rm -rf /var/lib/apt/lists/*
|
||
|
||
# Python 依赖(先复制依赖文件,利用 Docker 缓存)
|
||
COPY crawler_core/pyproject.toml /opt/spider/crawler_core/pyproject.toml
|
||
RUN pip install --no-cache-dir \
|
||
requests_go==1.0.9 \
|
||
tenacity>=8.0 \
|
||
requests \
|
||
PySocks \
|
||
six \
|
||
-i https://pypi.tuna.tsinghua.edu.cn/simple
|
||
|
||
# 复制代码
|
||
COPY crawler_core/ /opt/spider/crawler_core/
|
||
COPY spiderJobs/ /opt/spider/spiderJobs/
|
||
|
||
# 默认环境变量
|
||
ENV API_BASE_URL=http://124.222.106.226:9999 \
|
||
API_TOKEN=dev \
|
||
PLATFORM=boss \
|
||
SLEEP_MIN_SECONDS=5 \
|
||
SLEEP_MAX_SECONDS=12 \
|
||
MAX_PAGES=100 \
|
||
INLINE_COMPANY=0 \
|
||
PYTHONUNBUFFERED=1
|
||
# 代理已内置到代码中(spiderJobs/__init__.py)
|
||
# 设 PROXY_TUNNEL=none 可禁用代理
|
||
|
||
COPY spider-entrypoint.sh /opt/spider/entrypoint.sh
|
||
RUN chmod +x /opt/spider/entrypoint.sh
|
||
|
||
ENTRYPOINT ["/opt/spider/entrypoint.sh"]
|