JobData/company_spider/Dockerfile
2026-01-24 17:07:34 +08:00

47 lines
1.4 KiB
Docker

FROM python:3.11-slim
# 配置 apt-get 使用阿里云镜像源
RUN sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list.d/debian.sources || \
sed -i 's/deb.debian.org/mirrors.aliyun.com/g' /etc/apt/sources.list || true
# Install system dependencies
# Node.js is required for PyExecJS
RUN apt-get update && apt-get install -y \
nodejs \
npm \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# 配置 npm 使用淘宝镜像源
RUN npm config set registry https://registry.npmmirror.com
WORKDIR /app
# 配置 pip 使用国内镜像源(创建配置文件)
RUN mkdir -p /root/.pip && \
echo '[global]' > /root/.pip/pip.conf && \
echo 'index-url = https://pypi.tuna.tsinghua.edu.cn/simple' >> /root/.pip/pip.conf && \
echo 'trusted-host = pypi.tuna.tsinghua.edu.cn' >> /root/.pip/pip.conf && \
echo 'timeout = 120' >> /root/.pip/pip.conf
# Copy requirements first to leverage cache
COPY requirements.txt .
# 使用配置的镜像源安装依赖
RUN pip install --no-cache-dir -r requirements.txt
# Install Playwright browsers and system dependencies
# We only need chromium for this project
# 配置 Playwright 使用国内镜像
ENV PLAYWRIGHT_DOWNLOAD_HOST=https://npmmirror.com/mirrors/playwright
RUN playwright install chromium
RUN playwright install-deps chromium
COPY . .
# Expose the port
EXPOSE 8000
# Run the application
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]