1
0
Fork 0
archivebox/Dockerfile

85 lines
3.5 KiB
Text
Raw Normal View History

2019-02-28 14:04:37 -05:00
# This Dockerfile for ArchiveBox installs the following in a container:
2019-03-30 17:40:55 -04:00
# - curl, wget, python3, youtube-dl, google-chrome-beta
2019-02-28 14:04:37 -05:00
# - ArchiveBox
# Usage:
# docker build github.com/pirate/ArchiveBox -t archivebox
# echo 'https://example.com' | docker run -i --mount type=bind,source=./data,target=/data archivebox /bin/archive
# docker run --mount type=bind,source=./data,target=/data archivebox /bin/archive 'https://example.com/some/rss/feed.xml'
# Documentation:
# https://github.com/pirate/ArchiveBox/wiki/Docker#docker
2020-04-22 21:13:49 -04:00
FROM node:13-slim
2018-12-31 20:53:01 -05:00
LABEL maintainer="Nick Sweeting <archivebox-git@sweeting.me>"
2018-10-13 22:47:30 -04:00
RUN apt-get update \
&& apt-get install -yq --no-install-recommends \
2020-04-22 21:13:49 -04:00
jq git zlib1g-dev wget curl youtube-dl gnupg2 libgconf-2-4 python3 python3-pip \
2018-10-13 22:47:30 -04:00
&& rm -rf /var/lib/apt/lists/*
# Install latest chrome package and fonts to support major charsets (Chinese, Japanese, Arabic, Hebrew, Thai and a few others)
2019-03-30 17:35:25 -04:00
RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \
2018-10-13 22:47:30 -04:00
&& sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list' \
&& apt-get update \
2019-03-30 17:40:55 -04:00
&& apt-get install -y google-chrome-beta fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst ttf-freefont \
2018-10-13 22:47:30 -04:00
--no-install-recommends \
&& rm -rf /var/lib/apt/lists/* \
&& rm -rf /src/*.deb
2018-10-13 22:47:30 -04:00
# It's a good idea to use dumb-init to help prevent zombie chrome processes.
ADD https://github.com/Yelp/dumb-init/releases/download/v1.2.0/dumb-init_1.2.0_amd64 /usr/local/bin/dumb-init
RUN chmod +x /usr/local/bin/dumb-init
2018-10-13 22:47:30 -04:00
# Uncomment to skip the chromium download when installing puppeteer. If you do,
# you'll need to launch puppeteer with:
2019-03-30 17:40:55 -04:00
# browser.launch({executablePath: 'google-chrome-beta'})
ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD true
# Install puppeteer so it's available in the container.
2020-04-22 21:13:49 -04:00
RUN npm install puppeteer
2018-10-13 22:47:30 -04:00
# Add user so we don't need --no-sandbox.
RUN groupadd -r pptruser && useradd -r -g pptruser -G audio,video pptruser \
&& mkdir -p /home/pptruser/Downloads \
&& chown -R pptruser:pptruser /home/pptruser \
&& chown -R pptruser:pptruser /node_modules
2020-04-22 21:13:49 -04:00
WORKDIR /home/pptruser/app
RUN python3 -m pip install --upgrade pip setuptools && python3 -m pip install virtualenv \
&& python3 -m virtualenv ".docker-venv"
ENV PATH="/home/pttruser/app/.docker-venv/bin:${PATH}"
COPY ./Pipfile.lock "/home/pttruser/app/Pipfile.lock"
RUN jq -r \
'.default,.develop | to_entries[] | .key + .value.version' \
"/home/pttruser/app/Pipfile.lock" \
| /home/pttruser/app/.docker-venv/bin/python -m pip install --no-cache-dir -r /dev/stdin \
&& rm "/home/pttruser/app/Pipfile.lock"
# Install the ArchiveBox repository and pip requirements
2020-04-22 21:13:49 -04:00
# RUN git clone https://github.com/pirate/ArchiveBox /home/pptruser/app \
ADD . /home/pptruser/app
RUN mkdir -p /data \
&& chown -R pptruser:pptruser /data \
&& ln -s /data /home/pptruser/app/archivebox/output \
&& ln -s /home/pptruser/app/bin/* /bin/ \
&& ln -s /home/pptruser/app/bin/archivebox /bin/archive \
&& chown -R pptruser:pptruser /home/pptruser/app/archivebox
2018-10-13 22:47:30 -04:00
2019-01-16 08:05:01 -05:00
VOLUME /data
2020-04-22 21:13:49 -04:00
EXPOSE 8000
2018-10-13 22:47:30 -04:00
ENV LANG=C.UTF-8 \
2018-10-13 22:47:30 -04:00
LANGUAGE=en_US:en \
LC_ALL=C.UTF-8 \
2018-10-13 22:47:30 -04:00
PYTHONIOENCODING=UTF-8 \
CHROME_SANDBOX=False \
2019-03-30 17:40:55 -04:00
CHROME_BINARY=google-chrome-beta \
2019-01-16 08:05:01 -05:00
OUTPUT_DIR=/data
2018-10-13 22:47:30 -04:00
# Run everything from here on out as non-privileged user
USER pptruser
WORKDIR /home/pptruser/app
2018-10-13 22:47:30 -04:00
ENTRYPOINT ["dumb-init", "--"]
2020-04-22 21:13:49 -04:00
CMD ["/bin/archivebox"]