From 76f9b91ed3138613b16006f69d78e5c1c296e37f Mon Sep 17 00:00:00 2001 From: Nick Sweeting Date: Fri, 20 Oct 2023 04:08:38 -0700 Subject: [PATCH] dockerfile fixes --- Dockerfile | 54 ++++++++++++++++++++++--------------- archivebox/core/settings.py | 13 ++++----- bin/build_docker.sh | 3 ++- pyproject.toml | 3 +++ 4 files changed, 44 insertions(+), 29 deletions(-) diff --git a/Dockerfile b/Dockerfile index adcbc36f..db9986b7 100644 --- a/Dockerfile +++ b/Dockerfile @@ -49,17 +49,19 @@ ENV PATH="$PATH:$GLOBAL_VENV/bin:$APP_VENV/bin:$NODE_MODULES/.bin" # Create non-privileged user for archivebox and chrome -RUN groupadd --system $ARCHIVEBOX_USER \ +RUN echo "[*] Setting up system environment..." \ + && groupadd --system $ARCHIVEBOX_USER \ && useradd --system --create-home --gid $ARCHIVEBOX_USER --groups audio,video $ARCHIVEBOX_USER \ && mkdir -p /etc/apt/keyrings # Install system apt dependencies (adding backports to access more recent apt updates) -RUN echo 'deb https://deb.debian.org/debian bullseye-backports main contrib non-free' >> /etc/apt/sources.list.d/backports.list \ +RUN echo "[+] Installing system dependencies..." \ + && echo 'deb https://deb.debian.org/debian bullseye-backports main contrib non-free' >> /etc/apt/sources.list.d/backports.list \ && apt-get update -qq \ && apt-get install -qq -y \ apt-transport-https ca-certificates gnupg2 curl wget \ zlib1g-dev dumb-init gosu cron unzip \ - nano iputils-ping dnsutils \ + nano iputils-ping dnsutils htop procps \ # 1. packaging dependencies # 2. docker and init system dependencies # 3. frivolous CLI helpers to make debugging failed archiving easier @@ -70,7 +72,8 @@ RUN echo 'deb https://deb.debian.org/debian bullseye-backports main contrib non- ######### Language Environments #################################### # Install Node environment -RUN echo 'deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main' >> /etc/apt/sources.list.d/nodejs.list \ +RUN echo "[+] Installing Node environment..." \ + && echo 'deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_20.x nodistro main' >> /etc/apt/sources.list.d/nodejs.list \ && curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | gpg --dearmor | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \ && apt-get update -qq \ && apt-get install -qq -y nodejs \ @@ -79,18 +82,21 @@ RUN echo 'deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesourc && npm --version # Install Python environment -RUN apt-get update -qq \ +RUN echo "[+] Installing Python environment..." \ + && apt-get update -qq \ && apt-get install -qq -y -t bookworm-backports --no-install-recommends \ python3 python3-pip python3-venv python3-setuptools python3-wheel python-dev-is-python3 \ + python3-ldap libldap2-dev libsasl2-dev libssl-dev \ && rm /usr/lib/python3*/EXTERNALLY-MANAGED \ - && python3 -m venv $GLOBAL_VENV \ - && $GLOBAL_VENV/bin/pip install --upgrade pip pdm setuptools wheel \ + && python3 -m venv --system-site-packages --symlinks $GLOBAL_VENV \ + && $GLOBAL_VENV/bin/pip install --upgrade pip pdm setuptools wheel python-ldap \ && rm -rf /var/lib/apt/lists/* ######### Extractor Dependencies ################################## # Install apt dependencies -RUN apt-get update -qq \ +RUN echo "[+] Installing extractor APT dependencies..." \ + && apt-get update -qq \ && apt-get install -qq -y -t bookworm-backports --no-install-recommends \ curl wget git yt-dlp ffmpeg ripgrep \ # Packages we have also needed in the past: @@ -99,8 +105,9 @@ RUN apt-get update -qq \ && rm -rf /var/lib/apt/lists/* # Install chromium browser using playwright -ENV PLAYWRIGHT_BROWSERS_PATH=/browsers -RUN apt-get update -qq \ +ENV PLAYWRIGHT_BROWSERS_PATH="/browsers" +RUN echo "[+] Installing extractor Chromium dependency..." \ + && apt-get update -qq \ && $GLOBAL_VENV/bin/pip install playwright \ && $GLOBAL_VENV/bin/playwright install --with-deps chromium \ && CHROME_BINARY="$($GLOBAL_VENV/bin/python -c 'from playwright.sync_api import sync_playwright; print(sync_playwright().start().chromium.executable_path)')" \ @@ -110,21 +117,22 @@ RUN apt-get update -qq \ # Install Node dependencies WORKDIR "$CODE_DIR" -ADD "package.json" "package-lock.json" "$CODE_DIR/" -RUN npm ci --prefer-offline --no-audit -RUN "$NODE_MODULES/.bin/readability-extractor" --version +COPY --chown=root:root --chmod=755 "package.json" "package-lock.json" "$CODE_DIR/" +RUN echo "[+] Installing extractor Node dependencies..." \ + && npm ci --prefer-offline --no-audit \ + && npm version ######### Build Dependencies #################################### -WORKDIR "$CODE_DIR" -COPY --chown=root:root . "$CODE_DIR/" - -# Install Python Build dependencies & build ArchiveBox package -# RUN apt-get update -qq \ +# # Installing Python dependencies to build from source +# WORKDIR "$CODE_DIR" +# COPY --chown=root:root --chmod=755 "./pyproject.toml" "./pdm.lock" "$CODE_DIR/" +# RUN echo "[+] Installing project Python dependencies..." \ +# && apt-get update -qq \ # && apt-get install -qq -y -t bookworm-backports --no-install-recommends \ # build-essential libssl-dev libldap2-dev libsasl2-dev \ -# && pdm venv create \ -# && pdm install --fail-fast --no-lock --group :all \ +# && pdm use -f $GLOBAL_VENV \ +# && pdm install --fail-fast --no-lock --group :all --no-self \ # && pdm build \ # && apt-get purge -y \ # build-essential libssl-dev libldap2-dev libsasl2-dev \ @@ -132,14 +140,16 @@ COPY --chown=root:root . "$CODE_DIR/" # && apt-get autoremove -y \ # && rm -rf /var/lib/apt/lists/* - # Install ArchiveBox Python package from source -RUN apt-get update -qq \ +COPY --chown=root:root --chmod=755 "." "$CODE_DIR/" +RUN echo "[*] Installing ArchiveBox package from /app..." \ + && apt-get update -qq \ && $GLOBAL_VENV/bin/pip install -e "$CODE_DIR"[sonic,ldap] #################################################### # Setup ArchiveBox runtime config +WORKDIR "$DATA_DIR" ENV IN_DOCKER=True \ WGET_BINARY="wget" \ YOUTUBEDL_BINARY="yt-dlp" \ diff --git a/archivebox/core/settings.py b/archivebox/core/settings.py index 222b13e9..11fd649d 100644 --- a/archivebox/core/settings.py +++ b/archivebox/core/settings.py @@ -99,22 +99,23 @@ if LDAP: from django_auth_ldap.config import LDAPSearch global AUTH_LDAP_SERVER_URI - AUTH_LDAP_SERVER_URI = LDAP_SERVER_URI - global AUTH_LDAP_BIND_DN - AUTH_LDAP_BIND_DN = LDAP_BIND_DN - global AUTH_LDAP_BIND_PASSWORD + global AUTH_LDAP_USER_SEARCH + global AUTH_LDAP_USER_ATTR_MAP + + AUTH_LDAP_SERVER_URI = LDAP_SERVER_URI + AUTH_LDAP_BIND_DN = LDAP_BIND_DN AUTH_LDAP_BIND_PASSWORD = LDAP_BIND_PASSWORD - global AUTH_LDAP_USER_SEARCH + assert AUTH_LDAP_SERVER_URI and LDAP_USERNAME_ATTR and LDAP_USER_FILTER, 'LDAP_* config options must all be set if LDAP=True' + AUTH_LDAP_USER_SEARCH = LDAPSearch( LDAP_USER_BASE, ldap.SCOPE_SUBTREE, '(&(' + LDAP_USERNAME_ATTR + '=%(user)s)' + LDAP_USER_FILTER + ')', ) - global AUTH_LDAP_USER_ATTR_MAP AUTH_LDAP_USER_ATTR_MAP = { 'username': LDAP_USERNAME_ATTR, 'first_name': LDAP_FIRSTNAME_ATTR, diff --git a/bin/build_docker.sh b/bin/build_docker.sh index 1b9d32fc..65b55d0b 100755 --- a/bin/build_docker.sh +++ b/bin/build_docker.sh @@ -65,7 +65,8 @@ check_platforms || (recreate_builder && check_platforms) || exit 1 echo "[+] Building archivebox:$VERSION docker image..." -#docker build . \ +# docker builder prune +# docker build . --no-cache -t archivebox-dev \ docker buildx build --platform "$REQUIRED_PLATFORMS" --load . \ -t archivebox \ -t archivebox:$TAG_NAME \ diff --git a/pyproject.toml b/pyproject.toml index 4f3135b4..aee8ac84 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -85,9 +85,12 @@ doc = [ [project.optional-dependencies] sonic = [ + # echo "deb [signed-by=/usr/share/keyrings/valeriansaliou_sonic.gpg] https://packagecloud.io/valeriansaliou/sonic/debian/ bookworm main" > /etc/apt/sources.list.d/valeriansaliou_sonic.list + # curl -fsSL https://packagecloud.io/valeriansaliou/sonic/gpgkey | gpg --dearmor -o /usr/share/keyrings/valeriansaliou_sonic.gpg "sonic-client>=0.0.5", ] ldap = [ + # apt install libldap2-dev libsasl2-dev "django-auth-ldap>=4.1.0", ]