2020-08-13 22:23:27 -04:00
|
|
|
# This is the Dockerfile for ArchiveBox, it bundles the following dependencies:
|
|
|
|
# python3, ArchiveBox, curl, wget, git, chromium, youtube-dl, single-file
|
2019-02-28 14:04:37 -05:00
|
|
|
# Usage:
|
2020-08-13 22:23:27 -04:00
|
|
|
# docker build . -t archivebox --no-cache
|
2020-07-22 01:30:58 -04:00
|
|
|
# docker run -v "$PWD/data":/data archivebox init
|
|
|
|
# docker run -v "$PWD/data":/data archivebox add 'https://example.com'
|
2020-08-13 22:23:27 -04:00
|
|
|
# docker run -v "$PWD/data":/data -it archivebox manage createsuperuser
|
|
|
|
# docker run -v "$PWD/data":/data -p 8000:8000 archivebox server
|
2019-02-28 14:04:37 -05:00
|
|
|
|
2020-11-28 05:24:49 -05:00
|
|
|
FROM python:3.9-slim-buster
|
2019-07-09 13:05:51 -04:00
|
|
|
|
2020-06-25 17:46:11 -04:00
|
|
|
LABEL name="archivebox" \
|
2020-08-13 22:23:27 -04:00
|
|
|
maintainer="Nick Sweeting <archivebox-docker@sweeting.me>" \
|
|
|
|
description="All-in-one personal internet archiving container" \
|
2020-11-23 02:04:39 -05:00
|
|
|
homepage="https://github.com/ArchiveBox/ArchiveBox" \
|
|
|
|
documentation="https://github.com/ArchiveBox/ArchiveBox/wiki/Docker#docker"
|
2018-10-13 22:47:30 -04:00
|
|
|
|
2020-08-13 22:23:27 -04:00
|
|
|
# System-level base config
|
2020-06-25 21:30:29 -04:00
|
|
|
ENV TZ=UTC \
|
2020-06-25 17:46:11 -04:00
|
|
|
LANGUAGE=en_US:en \
|
|
|
|
LC_ALL=C.UTF-8 \
|
2020-06-25 21:30:29 -04:00
|
|
|
LANG=C.UTF-8 \
|
2020-06-25 17:46:11 -04:00
|
|
|
PYTHONIOENCODING=UTF-8 \
|
|
|
|
PYTHONUNBUFFERED=1 \
|
2020-08-13 22:23:27 -04:00
|
|
|
DEBIAN_FRONTEND=noninteractive \
|
|
|
|
APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1
|
2018-10-13 22:47:30 -04:00
|
|
|
|
2020-08-13 22:23:27 -04:00
|
|
|
# Application-level base config
|
|
|
|
ENV CODE_DIR=/app \
|
2020-06-25 17:46:11 -04:00
|
|
|
VENV_PATH=/venv \
|
2020-08-13 22:23:27 -04:00
|
|
|
DATA_DIR=/data \
|
|
|
|
NODE_DIR=/node \
|
|
|
|
ARCHIVEBOX_USER="archivebox"
|
2020-08-03 14:19:47 -04:00
|
|
|
|
2020-08-13 22:23:27 -04:00
|
|
|
# Create non-privileged user for archivebox and chrome
|
|
|
|
RUN groupadd --system $ARCHIVEBOX_USER \
|
|
|
|
&& useradd --system --create-home --gid $ARCHIVEBOX_USER --groups audio,video $ARCHIVEBOX_USER
|
2020-08-03 14:19:47 -04:00
|
|
|
|
2020-08-13 22:23:27 -04:00
|
|
|
# Install system dependencies
|
|
|
|
RUN apt-get update -qq \
|
2020-06-25 17:46:11 -04:00
|
|
|
&& apt-get install -qq -y --no-install-recommends \
|
2020-08-14 00:14:18 -04:00
|
|
|
apt-transport-https ca-certificates gnupg2 zlib1g-dev \
|
2020-08-18 01:59:04 -04:00
|
|
|
dumb-init gosu cron unzip curl \
|
2020-08-13 22:23:27 -04:00
|
|
|
&& rm -rf /var/lib/apt/lists/*
|
2019-01-23 01:06:47 -05:00
|
|
|
|
2020-08-14 00:14:18 -04:00
|
|
|
# Install apt dependencies
|
|
|
|
RUN apt-get update -qq \
|
|
|
|
&& apt-get install -qq -y --no-install-recommends \
|
2020-11-22 20:56:24 -05:00
|
|
|
wget curl chromium git ffmpeg youtube-dl ripgrep \
|
2020-08-14 00:14:18 -04:00
|
|
|
fontconfig fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-symbola fonts-noto fonts-freefont-ttf \
|
|
|
|
&& rm -rf /var/lib/apt/lists/*
|
2020-08-11 12:52:43 -04:00
|
|
|
|
2020-08-13 22:23:27 -04:00
|
|
|
# Install Node environment
|
|
|
|
RUN curl -s https://deb.nodesource.com/gpgkey/nodesource.gpg.key | apt-key add - \
|
2020-11-28 05:24:49 -05:00
|
|
|
&& echo 'deb https://deb.nodesource.com/node_15.x buster main' >> /etc/apt/sources.list \
|
2020-06-25 21:30:29 -04:00
|
|
|
&& apt-get update -qq \
|
2020-06-25 17:46:11 -04:00
|
|
|
&& apt-get install -qq -y --no-install-recommends \
|
2021-02-01 05:45:21 -05:00
|
|
|
nodejs \
|
2021-02-01 06:05:12 -05:00
|
|
|
# && npm install -g npm \
|
2020-08-13 22:23:27 -04:00
|
|
|
&& rm -rf /var/lib/apt/lists/*
|
2020-04-22 21:13:49 -04:00
|
|
|
|
2020-09-08 18:12:55 -04:00
|
|
|
# Install Node dependencies
|
|
|
|
WORKDIR "$NODE_DIR"
|
|
|
|
ENV PATH="${PATH}:$NODE_DIR/node_modules/.bin" \
|
|
|
|
npm_config_loglevel=error
|
|
|
|
ADD ./package.json ./package.json
|
|
|
|
ADD ./package-lock.json ./package-lock.json
|
|
|
|
RUN npm ci
|
|
|
|
|
2020-08-13 22:23:27 -04:00
|
|
|
# Install Python dependencies
|
|
|
|
WORKDIR "$CODE_DIR"
|
2020-07-28 00:53:50 -04:00
|
|
|
ENV PATH="${PATH}:$VENV_PATH/bin"
|
2020-06-25 21:30:29 -04:00
|
|
|
RUN python -m venv --clear --symlinks "$VENV_PATH" \
|
2021-04-23 16:12:25 -04:00
|
|
|
&& pip install --upgrade --quiet pip setuptools \
|
|
|
|
&& mkdir -p "$CODE_DIR/archivebox"
|
2021-02-15 14:51:32 -05:00
|
|
|
ADD "./setup.py" "$CODE_DIR/"
|
2021-04-05 23:17:07 -04:00
|
|
|
ADD "./package.json" "$CODE_DIR/archivebox/"
|
2020-08-13 23:35:31 -04:00
|
|
|
RUN apt-get update -qq \
|
|
|
|
&& apt-get install -qq -y --no-install-recommends \
|
2020-08-13 23:55:02 -04:00
|
|
|
build-essential python-dev python3-dev \
|
2021-04-05 23:17:07 -04:00
|
|
|
&& echo 'empty placeholder for setup.py to use' > "$CODE_DIR/archivebox/README.md" \
|
2021-02-15 14:51:32 -05:00
|
|
|
&& python3 -c 'from distutils.core import run_setup; result = run_setup("./setup.py", stop_after="init"); print("\n".join(result.install_requires + result.extras_require["sonic"]))' > /tmp/requirements.txt \
|
|
|
|
&& pip install --quiet -r /tmp/requirements.txt \
|
2020-08-13 23:55:02 -04:00
|
|
|
&& apt-get purge -y build-essential python-dev python3-dev \
|
2020-08-13 23:43:02 -04:00
|
|
|
&& apt-get autoremove -y \
|
2020-08-13 23:35:31 -04:00
|
|
|
&& rm -rf /var/lib/apt/lists/*
|
2018-10-13 22:47:30 -04:00
|
|
|
|
2021-02-16 15:55:47 -05:00
|
|
|
# Install apt development dependencies
|
|
|
|
# RUN apt-get install -qq \
|
|
|
|
# && apt-get install -qq -y --no-install-recommends \
|
|
|
|
# python3 python3-dev python3-pip python3-venv python3-all \
|
|
|
|
# dh-python debhelper devscripts dput software-properties-common \
|
|
|
|
# python3-distutils python3-setuptools python3-wheel python3-stdeb
|
|
|
|
# RUN python3 -c 'from distutils.core import run_setup; result = run_setup("./setup.py", stop_after="init"); print("\n".join(result.extras_require["dev"]))' > /tmp/dev_requirements.txt \
|
|
|
|
# && pip install --quiet -r /tmp/dev_requirements.txt
|
|
|
|
|
2020-12-19 20:11:19 -05:00
|
|
|
# Install ArchiveBox Python package and its dependencies
|
2020-08-13 22:23:27 -04:00
|
|
|
WORKDIR "$CODE_DIR"
|
|
|
|
ADD . "$CODE_DIR"
|
|
|
|
RUN pip install -e .
|
|
|
|
|
|
|
|
# Setup ArchiveBox runtime config
|
|
|
|
WORKDIR "$DATA_DIR"
|
2020-08-10 14:15:53 -04:00
|
|
|
ENV IN_DOCKER=True \
|
2020-08-04 12:50:01 -04:00
|
|
|
CHROME_SANDBOX=False \
|
2020-08-13 22:23:27 -04:00
|
|
|
CHROME_BINARY="chromium" \
|
2020-08-14 12:35:35 -04:00
|
|
|
USE_SINGLEFILE=True \
|
2020-08-13 22:23:27 -04:00
|
|
|
SINGLEFILE_BINARY="$NODE_DIR/node_modules/.bin/single-file" \
|
2020-08-14 12:35:35 -04:00
|
|
|
USE_READABILITY=True \
|
2020-09-22 04:46:50 -04:00
|
|
|
READABILITY_BINARY="$NODE_DIR/node_modules/.bin/readability-extractor" \
|
|
|
|
USE_MERCURY=True \
|
|
|
|
MERCURY_BINARY="$NODE_DIR/node_modules/.bin/mercury-parser"
|
2018-10-13 22:47:30 -04:00
|
|
|
|
2020-08-13 22:23:27 -04:00
|
|
|
# Print version for nice docker finish summary
|
2020-10-27 10:11:41 -04:00
|
|
|
# RUN archivebox version
|
|
|
|
RUN /app/bin/docker_entrypoint.sh archivebox version
|
2018-10-13 22:47:30 -04:00
|
|
|
|
2020-08-13 22:23:27 -04:00
|
|
|
# Open up the interfaces to the outside world
|
|
|
|
VOLUME "$DATA_DIR"
|
|
|
|
EXPOSE 8000
|
2020-07-22 01:30:58 -04:00
|
|
|
|
2021-12-02 21:03:19 -05:00
|
|
|
# Optional:
|
|
|
|
# HEALTHCHECK --interval=30s --timeout=20s --retries=15 \
|
|
|
|
# CMD curl --silent 'http://localhost:8000/admin/login/' || exit 1
|
2021-02-17 18:24:38 -05:00
|
|
|
|
2020-08-10 14:15:53 -04:00
|
|
|
ENTRYPOINT ["dumb-init", "--", "/app/bin/docker_entrypoint.sh"]
|
2021-02-28 22:53:23 -05:00
|
|
|
CMD ["archivebox", "server", "--quick-init", "0.0.0.0:8000"]
|