2020-06-25 17:46:11 -04:00
|
|
|
# This is the Dockerfile for ArchiveBox, it includes the following major pieces:
|
|
|
|
# git, curl, wget, python3, youtube-dl, google-chrome-stable, ArchiveBox
|
2019-02-28 14:04:37 -05:00
|
|
|
# Usage:
|
2020-07-22 01:30:58 -04:00
|
|
|
# docker build . -t archivebox
|
|
|
|
# docker run -v "$PWD/data":/data archivebox init
|
|
|
|
# docker run -v "$PWD/data":/data archivebox add 'https://example.com'
|
2019-02-28 14:04:37 -05:00
|
|
|
# Documentation:
|
|
|
|
# https://github.com/pirate/ArchiveBox/wiki/Docker#docker
|
|
|
|
|
2020-06-25 17:46:11 -04:00
|
|
|
FROM python:3.8-slim-buster
|
2019-07-09 13:05:51 -04:00
|
|
|
|
2020-06-25 17:46:11 -04:00
|
|
|
LABEL name="archivebox" \
|
2020-08-03 14:19:47 -04:00
|
|
|
maintainer="Nick Sweeting <archivebox-git@sweeting.me>" \
|
|
|
|
description="All-in-one personal internet archiving container"
|
2018-10-13 22:47:30 -04:00
|
|
|
|
2020-06-25 21:30:29 -04:00
|
|
|
ENV TZ=UTC \
|
2020-06-25 17:46:11 -04:00
|
|
|
LANGUAGE=en_US:en \
|
|
|
|
LC_ALL=C.UTF-8 \
|
2020-06-25 21:30:29 -04:00
|
|
|
LANG=C.UTF-8 \
|
2020-06-25 17:46:11 -04:00
|
|
|
PYTHONIOENCODING=UTF-8 \
|
|
|
|
PYTHONUNBUFFERED=1 \
|
|
|
|
APT_KEY_DONT_WARN_ON_DANGEROUS_USAGE=1 \
|
|
|
|
CODE_PATH=/app \
|
|
|
|
VENV_PATH=/venv \
|
2020-08-03 14:19:47 -04:00
|
|
|
DATA_PATH=/data \
|
|
|
|
EXTRA_PATH=/extra
|
2018-10-13 22:47:30 -04:00
|
|
|
|
2020-08-03 14:19:47 -04:00
|
|
|
# First install CLI utils and base deps, then Chrome + Fons + nodejs
|
2020-06-25 17:46:11 -04:00
|
|
|
RUN echo 'debconf debconf/frontend select Noninteractive' | debconf-set-selections \
|
|
|
|
&& apt-get update -qq \
|
|
|
|
&& apt-get install -qq -y --no-install-recommends \
|
2020-08-03 14:19:47 -04:00
|
|
|
apt-transport-https ca-certificates apt-utils gnupg gosu gnupg2 libgconf-2-4 zlib1g-dev \
|
|
|
|
dumb-init jq git wget curl youtube-dl ffmpeg \
|
2020-06-25 21:30:29 -04:00
|
|
|
&& curl -sSL "https://dl.google.com/linux/linux_signing_key.pub" | apt-key add - \
|
2020-06-25 17:46:11 -04:00
|
|
|
&& echo "deb https://dl.google.com/linux/chrome/deb/ stable main" > /etc/apt/sources.list.d/google-chrome.list \
|
2020-08-03 14:19:47 -04:00
|
|
|
&& curl -sL https://deb.nodesource.com/setup_14.x | bash - \
|
2020-06-25 21:30:29 -04:00
|
|
|
&& apt-get update -qq \
|
2020-06-25 17:46:11 -04:00
|
|
|
&& apt-get install -qq -y --no-install-recommends \
|
2020-08-03 14:19:47 -04:00
|
|
|
google-chrome-stable \
|
|
|
|
fontconfig \
|
|
|
|
fonts-ipafont-gothic \
|
|
|
|
fonts-wqy-zenhei \
|
|
|
|
fonts-thai-tlwg \
|
|
|
|
fonts-kacst \
|
|
|
|
fonts-symbola \
|
|
|
|
fonts-noto \
|
|
|
|
fonts-freefont-ttf \
|
|
|
|
nodejs \
|
|
|
|
unzip \
|
|
|
|
&& rm -rf /var/lib/apt/lists/*
|
|
|
|
|
|
|
|
# Clone singlefile and move it to the /bin folder so archivebox can find it
|
|
|
|
|
|
|
|
WORKDIR "$EXTRA_PATH"
|
|
|
|
RUN wget -qO - https://github.com/gildas-lormeau/SingleFile/archive/master.zip > SingleFile.zip \
|
|
|
|
&& unzip -q SingleFile.zip \
|
|
|
|
&& npm install --prefix SingleFile-master/cli --production > /dev/null 2>&1 \
|
2020-08-04 12:50:01 -04:00
|
|
|
&& chmod +x SingleFile-master/cli/single-file
|
2019-01-23 01:06:47 -05:00
|
|
|
|
2020-06-25 21:30:29 -04:00
|
|
|
# Run everything from here on out as non-privileged user
|
|
|
|
RUN groupadd --system archivebox \
|
2020-07-09 11:35:33 -04:00
|
|
|
&& useradd --system --create-home --gid archivebox --groups audio,video archivebox
|
2020-04-22 21:13:49 -04:00
|
|
|
|
2020-06-25 17:46:11 -04:00
|
|
|
ADD . "$CODE_PATH"
|
2020-06-25 21:30:29 -04:00
|
|
|
WORKDIR "$CODE_PATH"
|
2020-07-28 00:53:50 -04:00
|
|
|
ENV PATH="${PATH}:$VENV_PATH/bin"
|
2020-06-25 21:30:29 -04:00
|
|
|
RUN python -m venv --clear --symlinks "$VENV_PATH" \
|
2020-07-22 01:30:58 -04:00
|
|
|
&& pip install --upgrade pip setuptools \
|
2020-06-25 21:30:29 -04:00
|
|
|
&& pip install -e .
|
2018-10-13 22:47:30 -04:00
|
|
|
|
2020-06-25 17:46:11 -04:00
|
|
|
VOLUME "$DATA_PATH"
|
2020-06-25 21:30:29 -04:00
|
|
|
WORKDIR "$DATA_PATH"
|
|
|
|
EXPOSE 8000
|
2020-06-25 17:46:11 -04:00
|
|
|
ENV CHROME_BINARY=google-chrome \
|
2020-08-04 12:50:01 -04:00
|
|
|
CHROME_SANDBOX=False \
|
|
|
|
SINGLEFILE_BINARY="$EXTRA_PATH/SingleFile-master/cli/single-file"
|
2018-10-13 22:47:30 -04:00
|
|
|
|
2020-07-22 01:30:58 -04:00
|
|
|
RUN env ALLOW_ROOT=True archivebox version
|
|
|
|
|
|
|
|
ENTRYPOINT ["dumb-init", "--", "/app/bin/docker_entrypoint.sh", "archivebox"]
|
2020-06-25 21:30:29 -04:00
|
|
|
CMD ["server", "0.0.0.0:8000"]
|