From a167d2a1f47879c7be489818c51c244e9b343a5e Mon Sep 17 00:00:00 2001
From: Nick Sweeting <git@nicksweeting.com>
Date: Wed, 5 Jul 2017 16:33:51 -0500
Subject: [PATCH] colorize output and add progress bar

---
 archive.py |  55 +++++++++++-----------
 config.py  | 136 +++++++++++++++++++++++++++++++++++++++++++++++------
 fetch.py   |  89 ++++++++++++++++++++---------------
 index.py   |  19 +++++++-
 parse.py   |  16 +++----
 5 files changed, 226 insertions(+), 89 deletions(-)

diff --git a/archive.py b/archive.py
index 138ae9d4..eb6f76fb 100755
--- a/archive.py
+++ b/archive.py
@@ -7,13 +7,13 @@ import os
 import sys
 
 from datetime import datetime
-from subprocess import run
 
 from parse import parse_export
 from index import dump_index
 from fetch import dump_website
 from config import (
     ARCHIVE_PERMISSIONS,
+    ANSI,
     check_dependencies,
 )
 
@@ -24,23 +24,23 @@ __DOCUMENTATION__ = 'https://github.com/pirate/bookmark-archiver'
 def create_archive(export_file, service=None, resume=None):
     """update or create index.html and download archive of all links"""
 
-    with open(export_file, 'r', encoding='utf-8') as f:
-        print('[+] [{}] Starting archive from {} export file.'.format(
-            datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
-            export_file
-        ))
+    print('[*] [{}] Starting archive from {} export file.'.format(
+        datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
+        export_file,
+    ))
 
+    with open(export_file, 'r', encoding='utf-8') as f:
         links, service = parse_export(f, service=service)
 
-        if resume:
-            try:
-                links = [
-                    link
-                    for link in links
-                    if float(link['timestamp']) >= float(resume)
-                ]
-            except TypeError:
-                print('Resume value and all timestamp values must be valid numbers.')
+    if resume:
+        try:
+            links = [
+                link
+                for link in links
+                if float(link['timestamp']) >= float(resume)
+            ]
+        except TypeError:
+            print('Resume value and all timestamp values must be valid numbers.')
 
     if not links or not service:
         print('[X] No links found in {}, is it a {} export file?'.format(export_file, service))
@@ -53,20 +53,21 @@ def create_archive(export_file, service=None, resume=None):
         os.makedirs(os.path.join(service, 'archive'))
 
     dump_index(links, service)
-
-    run(['chmod', '-R', ARCHIVE_PERMISSIONS, service], timeout=30)
-
-    print('[*] [{}] Created archive index with {} links.'.format(
-        datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
-        len(links),
-    ))
-
     check_dependencies()
+    try:
+        for link in links:
+            dump_website(link, service)
+    except (KeyboardInterrupt, SystemExit, Exception):
+        print('{red}[X] Archive creation stopped.{reset}'.format(**ANSI))
+        print('    Continue where you left off by running:')
+        print('       ./archive.py {} {} {}'.format(
+            export_file,
+            service,
+            link['timestamp'],
+        ))
+        raise SystemExit(1)
 
-    for link in links:
-        dump_website(link, service)
-
-    print('[√] [{}] Archive update complete.'.format(datetime.now().strftime('%Y-%m-%d %H:%M:%S')))
+    print('{}[√] [{}] Archive update complete.{}'.format(ANSI['green'], datetime.now().strftime('%Y-%m-%d %H:%M:%S'), ANSI['reset']))
 
 
 if __name__ == '__main__':
diff --git a/config.py b/config.py
index 5cd8d5c4..9933a86e 100644
--- a/config.py
+++ b/config.py
@@ -1,7 +1,10 @@
 import os
 import sys
+import time
+import shutil
 
-from subprocess import run, PIPE
+from subprocess import run, PIPE, DEVNULL
+from multiprocessing import Process
 
 # os.getenv('VARIABLE', 'DEFAULT') gets the value of environment
 # variable "VARIABLE" and if it is not set, sets it to 'DEFAULT'
@@ -9,10 +12,14 @@ from subprocess import run, PIPE
 # for boolean values, check to see if the string is 'true', and
 # if so, the python variable will be True
 
+IS_TTY = sys.stdout.isatty()
+
+USE_COLOR =              os.getenv('USE_COLOR',              str(IS_TTY)        ).lower() == 'true'
+SHOW_PROGRESS =          os.getenv('SHOW_PROGRESS',          str(IS_TTY)        ).lower() == 'true'
 FETCH_WGET =             os.getenv('FETCH_WGET',             'True'             ).lower() == 'true'
 FETCH_WGET_REQUISITES =  os.getenv('FETCH_WGET_REQUISITES',  'True'             ).lower() == 'true'
-FETCH_AUDIO =            os.getenv('FETCH_AUDIO',            'False'             ).lower() == 'true'
-FETCH_VIDEO =            os.getenv('FETCH_VIDEO',            'False'             ).lower() == 'true'
+FETCH_AUDIO =            os.getenv('FETCH_AUDIO',            'False'            ).lower() == 'true'
+FETCH_VIDEO =            os.getenv('FETCH_VIDEO',            'False'            ).lower() == 'true'
 FETCH_PDF =              os.getenv('FETCH_PDF',              'True'             ).lower() == 'true'
 FETCH_SCREENSHOT =       os.getenv('FETCH_SCREENSHOT',       'True'             ).lower() == 'true'
 FETCH_FAVICON =          os.getenv('FETCH_FAVICON',          'True'             ).lower() == 'true'
@@ -25,44 +32,143 @@ TIMEOUT =                int(os.getenv('TIMEOUT',            '60'))
 INDEX_TEMPLATE =         os.getenv('INDEX_TEMPLATE',         'templates/index.html')
 INDEX_ROW_TEMPLATE =     os.getenv('INDEX_ROW_TEMPLATE',     'templates/index_row.html')
 
+TERM_WIDTH = shutil.get_terminal_size((100, 10)).columns
+ANSI = {
+    'reset': '\033[00;00m',
+    'lightblue': '\033[01;30m',
+    'lightyellow': '\033[01;33m',
+    'lightred': '\033[01;35m',
+    'red': '\033[01;31m',
+    'green': '\033[01;32m',
+    'blue': '\033[01;34m',
+    'white': '\033[01;37m',
+    'black': '\033[01;30m',
+}
+if not USE_COLOR:
+    # dont show colors if USE_COLOR is False
+    ANSI = {k: '' for k in ANSI.keys()}
+
+
+### Util Functions
 
 def check_dependencies():
+    """Check that all necessary dependencies are installed, and have valid versions"""
+
     print('[*] Checking Dependencies:')
 
     python_vers = float('{}.{}'.format(sys.version_info.major, sys.version_info.minor))
     if python_vers < 3.5:
-        print('[X] Python version is not new enough: {} (>3.5 is required)'.format(python_vers))
+        print('{}[X] Python version is not new enough: {} (>3.5 is required){}'.format(ANSI['red'], python_vers, ANSI['reset']))
         print('    See https://github.com/pirate/bookmark-archiver#troubleshooting for help upgrading your Python installation.')
         raise SystemExit(1)
 
     if FETCH_PDF or FETCH_SCREENSHOT:
         if run(['which', CHROME_BINARY]).returncode:
-            print('[X] Missing dependency: {}'.format(CHROME_BINARY))
+            print('{}[X] Missing dependency: {}{}'.format(ANSI['red'], CHROME_BINARY, ANSI['reset']))
+            print('    Run ./setup.sh, then confirm it was installed with: {} --version'.format(CHROME_BINARY))
             print('    See https://github.com/pirate/bookmark-archiver for help.')
             raise SystemExit(1)
 
         # parse chrome --version e.g. Google Chrome 61.0.3114.0 canary / Chromium 59.0.3029.110 built on Ubuntu, running on Ubuntu 16.04
-        result = run([CHROME_BINARY, '--version'], stdout=PIPE)
-        version = result.stdout.decode('utf-8').replace('Google Chrome ', '').replace('Chromium ', '').split(' ', 1)[0].split('.', 1)[0]  # TODO: regex might be better
-        if int(version) < 59:
-            print('[X] Chrome version must be 59 or greater for headless PDF and screenshot saving')
+        try:
+            result = run([CHROME_BINARY, '--version'], stdout=PIPE)
+            version = result.stdout.decode('utf-8').replace('Google Chrome ', '').replace('Chromium ', '').split(' ', 1)[0].split('.', 1)[0]  # TODO: regex might be better
+            if int(version) < 59:
+                print('{red}[X] Chrome version must be 59 or greater for headless PDF and screenshot saving{reset}'.format(**ANSI))
+                print('    See https://github.com/pirate/bookmark-archiver for help.')
+                raise SystemExit(1)
+        except (TypeError, OSError):
+            print('{red}[X] Failed to parse Chrome version, is it installed properly?{reset}'.format(**ANSI))
+            print('    Run ./setup.sh, then confirm it was installed with: {} --version'.format(CHROME_BINARY))
             print('    See https://github.com/pirate/bookmark-archiver for help.')
             raise SystemExit(1)
 
     if FETCH_WGET:
-        if run(['which', 'wget']).returncode:
-            print('[X] Missing dependency: wget')
+        if run(['which', 'wget']).returncode or run(['wget', '--version'], stdout=DEVNULL).returncode:
+            print('{red}[X] Missing dependency: wget{reset}'.format(**ANSI))
+            print('    Run ./setup.sh, then confirm it was installed with: {} --version'.format('wget'))
             print('    See https://github.com/pirate/bookmark-archiver for help.')
             raise SystemExit(1)
 
     if FETCH_FAVICON or SUBMIT_ARCHIVE_DOT_ORG:
-        if run(['which', 'curl']).returncode:
-            print('[X] Missing dependency: curl')
+        if run(['which', 'curl']).returncode or run(['curl', '--version'], stdout=DEVNULL).returncode:
+            print('{red}[X] Missing dependency: curl{reset}'.format(**ANSI))
+            print('    Run ./setup.sh, then confirm it was installed with: {} --version'.format('curl'))
             print('    See https://github.com/pirate/bookmark-archiver for help.')
             raise SystemExit(1)
 
     if FETCH_AUDIO or FETCH_VIDEO:
-        if run(['which', 'youtube-dl']).returncode:
-            print('[X] Missing dependency: youtube-dl')
+        if run(['which', 'youtube-dl']).returncode or run(['youtube-dl', '--version'], stdout=DEVNULL).returncode:
+            print('{red}[X] Missing dependency: youtube-dl{reset}'.format(**ANSI))
+            print('    Run ./setup.sh, then confirm it was installed with: {} --version'.format('youtube-dl'))
             print('    See https://github.com/pirate/bookmark-archiver for help.')
             raise SystemExit(1)
+
+
+def chmod_file(path, cwd='.', permissions=ARCHIVE_PERMISSIONS, timeout=30):
+    """chmod -R <permissions> <cwd>/<path>"""
+
+    if not os.path.exists(os.path.join(cwd, path)):
+        raise Exception('Failed to chmod: {} does not exist (did the previous step fail?)'.format(path))
+
+    chmod_result = run(['chmod', '-R', permissions, path], cwd=cwd, stdout=DEVNULL, stderr=PIPE, timeout=timeout)
+    if chmod_result.returncode == 1:
+        print('     ', chmod_result.stderr.decode())
+        raise Exception('Failed to chmod {}/{}'.format(cwd, path))
+
+
+def progress(seconds=TIMEOUT, prefix=''):
+    """Show a (subprocess-controlled) progress bar with a <seconds> timeout,
+       returns end() function to instantly finish the progress
+    """
+
+    if not SHOW_PROGRESS:
+        return lambda: None
+
+    chunks = TERM_WIDTH - len(prefix) - 20  # number of progress chunks to show (aka max bar width)
+
+    def progress_bar(seconds=seconds, prefix=prefix):
+        """show timer in the form of progress bar, with percentage and seconds remaining"""
+        try:
+            for s in range(seconds * chunks):
+                progress = s / chunks / seconds * 100
+                bar_width = round(progress/(100/chunks))
+
+                # ████████████████████           0.9% (1/60sec)
+                sys.stdout.write('\r{0}{1}{2}{3} {4}% ({5}/{6}sec)'.format(
+                    prefix,
+                    ANSI['green'],
+                    ('█' * bar_width).ljust(chunks),
+                    ANSI['reset'],
+                    round(progress, 1),
+                    round(s/chunks),
+                    seconds,
+                ))
+                sys.stdout.flush()
+                time.sleep(1 / chunks)
+
+            # ██████████████████████████████████ 100.0% (60/60sec)
+            sys.stdout.write('\r{0}{1}{2}{3} {4}% ({5}/{6}sec)\n'.format(
+                prefix,
+                ANSI['red'],
+                '█' * chunks,
+                ANSI['reset'],
+                100.0,
+                seconds,
+                seconds,
+            ))
+            sys.stdout.flush()
+        except KeyboardInterrupt:
+            print()
+            pass
+
+    p = Process(target=progress_bar)
+    p.start()
+
+    def end():
+        """immediately finish progress and clear the progressbar line"""
+        p.terminate()
+        sys.stdout.write('\r{}\r'.format(' ' * TERM_WIDTH))  # clear whole terminal line
+        sys.stdout.flush()
+
+    return end
diff --git a/fetch.py b/fetch.py
index 7978b815..57d545a5 100644
--- a/fetch.py
+++ b/fetch.py
@@ -16,38 +16,35 @@ from config import (
     FETCH_AUDIO,
     FETCH_VIDEO,
     FETCH_FAVICON,
+    TIMEOUT,
+    ANSI,
+    progress,
+    chmod_file,
 )
 
 
-def chmod_file(path, cwd='.', permissions='755', timeout=30):
-    if not os.path.exists(os.path.join(cwd, path)):
-        raise Exception('Failed to chmod: {} does not exist (did the previous step fail?)'.format(path))
-
-    chmod_result = run(['chmod', '-R', permissions, path], cwd=cwd, stdout=DEVNULL, stderr=PIPE, timeout=timeout)
-    if chmod_result.returncode == 1:
-        print('     ', chmod_result.stderr.decode())
-        raise Exception('Failed to chmod {}/{}'.format(cwd, path))
-
 def fetch_wget(out_dir, link, overwrite=False, requisites=True, timeout=60):
     """download full site using wget"""
 
-    domain = link['base_url'].split('/', 1)[0]
-    if not os.path.exists(os.path.join(out_dir, domain)) or overwrite:
-        print('    - Downloading Full Site')
+    if not os.path.exists(os.path.join(out_dir, link['domain'])) or overwrite:
+        print('    - Downloading full site')
         CMD = [
             *'wget --timestamping --adjust-extension --no-parent'.split(' '),                # Docs: https://www.gnu.org/software/wget/manual/wget.html
             *(('--page-requisites', '--convert-links') if requisites else ()),
             link['url'],
         ]
+        end = progress(timeout, prefix='      ')
         try:
-            result = run(CMD, stdout=PIPE, stderr=PIPE, cwd=out_dir, timeout=timeout)  # dom.html
+            result = run(CMD, stdout=PIPE, stderr=PIPE, cwd=out_dir, timeout=timeout + 1)  # index.html
+            end()
             if result.returncode > 0:
-                print('     ', result.stderr.decode().split('\n')[-1])
+                print('\n'.join('       ' + line for line in result.stderr.decode().rsplit('\n', 5)[-3:] if line.strip()))
                 raise Exception('Failed to wget download')
-            chmod_file(domain, cwd=out_dir)
+            chmod_file(link['domain'], cwd=out_dir)
         except Exception as e:
+            end()
             print('       Run to see full output:', 'cd {}; {}'.format(out_dir, ' '.join(CMD)))
-            print('       Failed: {} {}'.format(e.__class__.__name__, e))
+            print('       {}Failed: {} {}{}'.format(ANSI['red'], e.__class__.__name__, e, ANSI['reset']))
     else:
         print('    √ Skipping site download')
 
@@ -63,15 +60,18 @@ def fetch_pdf(out_dir, link, overwrite=False, timeout=60, chrome_binary='chromiu
             *'--headless --disable-gpu --print-to-pdf'.split(' '),
             link['url']
         ]
+        end = progress(timeout, prefix='      ')
         try:
-            result = run(CMD, stdout=DEVNULL, stderr=PIPE, cwd=out_dir, timeout=timeout)  # output.pdf
+            result = run(CMD, stdout=DEVNULL, stderr=PIPE, cwd=out_dir, timeout=timeout + 1)  # output.pdf
+            end()
             if result.returncode:
                 print('     ', result.stderr.decode())
                 raise Exception('Failed to print PDF')
             chmod_file('output.pdf', cwd=out_dir)
         except Exception as e:
+            end()
             print('       Run to see full output:', 'cd {}; {}'.format(out_dir, ' '.join(CMD)))
-            print('       Failed: {} {}'.format(e.__class__.__name__, e))
+            print('       {}Failed: {} {}{}'.format(ANSI['red'], e.__class__.__name__, e, ANSI['reset']))
     else:
         print('    √ Skipping PDF print')
 
@@ -88,15 +88,18 @@ def fetch_screenshot(out_dir, link, overwrite=False, timeout=60, chrome_binary='
             '--window-size={}'.format(resolution),
             link['url']
         ]
+        end = progress(timeout, prefix='      ')
         try:
-            result = run(CMD, stdout=DEVNULL, stderr=DEVNULL, cwd=out_dir, timeout=timeout)  # sreenshot.png
+            result = run(CMD, stdout=DEVNULL, stderr=DEVNULL, cwd=out_dir, timeout=timeout + 1)  # sreenshot.png
+            end()
             if result.returncode:
                 print('     ', result.stderr.decode())
                 raise Exception('Failed to take screenshot')
             chmod_file('screenshot.png', cwd=out_dir)
         except Exception as e:
+            end()
             print('       Run to see full output:', 'cd {}; {}'.format(out_dir, ' '.join(CMD)))
-            print('       Failed: {} {}'.format(e.__class__.__name__, e))
+            print('       {}Failed: {} {}{}'.format(ANSI['red'], e.__class__.__name__, e, ANSI['reset']))
     else:
         print('    √ Skipping screenshot')
 
@@ -111,8 +114,10 @@ def archive_dot_org(out_dir, link, overwrite=False, timeout=60):
 
         success = False
         CMD = ['curl', '-I', submit_url]
+        end = progress(timeout, prefix='      ')
         try:
-            result = run(CMD, stdout=PIPE, stderr=DEVNULL, cwd=out_dir, timeout=timeout)  # archive.org
+            result = run(CMD, stdout=PIPE, stderr=DEVNULL, cwd=out_dir, timeout=timeout + 1)  # archive.org.txt
+            end()
             headers = result.stdout.splitlines()
             content_location = [h for h in headers if b'Content-Location: ' in h]
             if content_location:
@@ -122,11 +127,12 @@ def archive_dot_org(out_dir, link, overwrite=False, timeout=60):
             else:
                 raise Exception('Failed to find "Content-Location" URL header in Archive.org response.')
         except Exception as e:
+            end()
             print('       Visit url to see output:', ' '.join(CMD))
-            print('       Failed: {} {}'.format(e.__class__.__name__, e))
+            print('       {}Failed: {} {}{}'.format(ANSI['red'], e.__class__.__name__, e, ANSI['reset']))
 
         if success:
-            with open('{}/archive.org.txt'.format(out_dir), 'w', encoding='utf-8') as f:
+            with open(os.path.join(out_dir, 'archive.org.txt'), 'w', encoding='utf-8') as f:
                 f.write(saved_url)
             chmod_file('archive.org.txt', cwd=out_dir)
 
@@ -140,14 +146,17 @@ def fetch_favicon(out_dir, link, overwrite=False, timeout=60):
 
     if not os.path.exists(path) or overwrite:
         print('    - Fetching Favicon')
-        CMD = 'curl https://www.google.com/s2/favicons?domain={domain}'.format(**link).split(' ')
+        CMD = ['curl', 'https://www.google.com/s2/favicons?domain={domain}'.format(**link)]
         fout = open('{}/favicon.ico'.format(out_dir), 'w')
+        end = progress(timeout, prefix='      ')
         try:
-            run([*CMD], stdout=fout, stderr=DEVNULL, cwd=out_dir, timeout=timeout)  # favicon.ico
+            run(CMD, stdout=fout, stderr=DEVNULL, cwd=out_dir, timeout=timeout + 1)  # favicon.ico
+            end()
             chmod_file('favicon.ico', cwd=out_dir)
         except Exception as e:
+            end()
             print('       Run to see full output:', ' '.join(CMD))
-            print('       Failed: {} {}'.format(e.__class__.__name__, e))
+            print('       {}Failed: {} {}{}'.format(ANSI['red'], e.__class__.__name__, e, ANSI['reset']))
         fout.close()
     else:
         print('    √ Skipping favicon')
@@ -166,15 +175,18 @@ def fetch_audio(out_dir, link, overwrite=False, timeout=60):
             "youtube-dl -x --audio-format mp3 --audio-quality 0 -o '%(title)s.%(ext)s'",
             link['url'],
         ]
+        end = progress(timeout, prefix='      ')
         try:
-            result = run(CMD, stdout=DEVNULL, stderr=DEVNULL, cwd=out_dir, timeout=timeout)  # sreenshot.png
+            result = run(CMD, stdout=DEVNULL, stderr=DEVNULL, cwd=out_dir, timeout=timeout + 1)  # audio/audio.mp3
+            end()
             if result.returncode:
                 print('     ', result.stderr.decode())
                 raise Exception('Failed to download audio')
             chmod_file('audio', cwd=out_dir)
         except Exception as e:
+            end()
             print('       Run to see full output:', 'cd {}; {}'.format(out_dir, ' '.join(CMD)))
-            print('       Failed: {} {}'.format(e.__class__.__name__, e))
+            print('       {}Failed: {} {}{}'.format(ANSI['red'], e.__class__.__name__, e, ANSI['reset']))
     else:
         print('    √ Skipping audio download')
 
@@ -189,27 +201,30 @@ def fetch_video(out_dir, link, overwrite=False, timeout=60):
     if not os.path.exists(path) or overwrite:
         print('    - Downloading video')
         CMD = [
-            "youtube-dl -x --audio-format mp3 --audio-quality 0 -o '%(title)s.%(ext)s'",
+            "youtube-dl -x --video-format mp4 --audio-quality 0 -o '%(title)s.%(ext)s'",
             link['url'],
         ]
+        end = progress(timeout, prefix='      ')
         try:
-            result = run(CMD, stdout=DEVNULL, stderr=DEVNULL, cwd=out_dir, timeout=timeout)  # sreenshot.png
+            result = run(CMD, stdout=DEVNULL, stderr=DEVNULL, cwd=out_dir, timeout=timeout + 1)  # video/movie.mp4
+            end()
             if result.returncode:
                 print('     ', result.stderr.decode())
                 raise Exception('Failed to download video')
             chmod_file('video', cwd=out_dir)
         except Exception as e:
+            end()
             print('       Run to see full output:', 'cd {}; {}'.format(out_dir, ' '.join(CMD)))
-            print('       Failed: {} {}'.format(e.__class__.__name__, e))
+            print('       {}Failed: {} {}{}'.format(ANSI['red'], e.__class__.__name__, e, ANSI['reset']))
     else:
         print('    √ Skipping video download')
 
-def dump_link_info(out_dir, link, update=True):
+def dump_link_info(out_dir, link, overwrite=False):
     """write a json file with some info about the link"""
 
     info_file_path = os.path.join(out_dir, 'link.json')
 
-    if (not os.path.exists(info_file_path) or update):
+    if (not os.path.exists(info_file_path) or overwrite):
         print('    - Creating link info file')
         try:
             link_json = derived_link_info(link)
@@ -223,7 +238,7 @@ def dump_link_info(out_dir, link, update=True):
 
             chmod_file('link.json', cwd=out_dir)
         except Exception as e:
-            print('       Failed: {} {}'.format(e.__class__.__name__, e))
+            print('       {}Failed: {} {}{}'.format(ANSI['red'], e.__class__.__name__, e, ANSI['reset']))
     else:
         print('    √ Skipping link info file')
 
@@ -231,7 +246,7 @@ def dump_link_info(out_dir, link, update=True):
 def dump_website(link, service, overwrite=False, permissions=ARCHIVE_PERMISSIONS):
     """download the DOM, PDF, and a screenshot into a folder named after the link's timestamp"""
 
-    print('[+] [{timestamp} ({time})] "{title}": {base_url}'.format(**link))
+    print('[{green}+{reset}] [{timestamp} ({time})] "{title}": {blue}{base_url}{reset}'.format(**link, **ANSI))
 
     out_dir = os.path.join(service, 'archive', link['timestamp'])
     if not os.path.exists(out_dir):
@@ -243,7 +258,7 @@ def dump_website(link, service, overwrite=False, permissions=ARCHIVE_PERMISSIONS
         print('    i Type: {}'.format(link['type']))
 
     if not (link['url'].startswith('http') or link['url'].startswith('ftp')):
-        print('    X Skipping: invalid link.')
+        print('    {}X Skipping: invalid link.{}', ANSI['red'], ANSI['yellow'])
         return
 
     if FETCH_WGET:
@@ -267,4 +282,4 @@ def dump_website(link, service, overwrite=False, permissions=ARCHIVE_PERMISSIONS
     if FETCH_FAVICON:
         fetch_favicon(out_dir, link, overwrite=overwrite)
 
-    dump_link_info(out_dir, link)
+    dump_link_info(out_dir, link, overwrite=overwrite)
diff --git a/index.py b/index.py
index ab80a70b..442f8a98 100644
--- a/index.py
+++ b/index.py
@@ -2,8 +2,14 @@ import os
 from datetime import datetime
 from string import Template
 
-from config import INDEX_TEMPLATE, INDEX_ROW_TEMPLATE
 from parse import derived_link_info
+from config import (
+    INDEX_TEMPLATE,
+    INDEX_ROW_TEMPLATE,
+    ARCHIVE_PERMISSIONS,
+    ANSI,
+    chmod_file,
+)
 
 
 def dump_index(links, service):
@@ -28,4 +34,13 @@ def dump_index(links, service):
     }
 
     with open(os.path.join(service, 'index.html'), 'w', encoding='utf-8') as f:
-        f.write(Template(index_html).substitute(template_vars))
+        f.write(Template(index_html).substitute(**template_vars))
+
+    chmod_file(service, permissions=ARCHIVE_PERMISSIONS)
+
+    print('[+] [{}] Created archive index with {}{}{} links.'.format(
+        datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
+        ANSI['green'],
+        len(links),
+        ANSI['reset'],
+    ))
diff --git a/parse.py b/parse.py
index d130b298..8f807346 100644
--- a/parse.py
+++ b/parse.py
@@ -64,8 +64,8 @@ def parse_pocket_export(html_file):
             without_scheme = fixed_url.replace('http://', '').replace('https://', '')
             info = {
                 'url': fixed_url,
-                'domain': without_scheme.split('/')[0],    # without pathname
-                'base_url': without_scheme.split('?')[0],  # without query args
+                'domain': without_scheme.split('/', 1)[0],    # without pathname
+                'base_url': without_scheme.split('?', 1)[0],  # without query args
                 'time': datetime.fromtimestamp(int(match.group(2))).strftime('%Y-%m-%d %H:%M'),
                 'timestamp': match.group(2),
                 'tags': match.group(3),
@@ -84,10 +84,10 @@ def parse_json_export(json_file):
             erg = line
             info = {
                 'url': erg['href'],
-                'domain': erg['href'].replace('http://', '').replace('https://', '').split('/')[0],
-                'base_url': erg['href'].replace('https://', '').replace('http://', '').split('?')[0],
-                'time': datetime.fromtimestamp(int(time.mktime(time.strptime(erg['time'].split(',')[0], '%Y-%m-%dT%H:%M:%SZ')))),
-                'timestamp': str(int(time.mktime(time.strptime(erg['time'].split(',')[0], '%Y-%m-%dT%H:%M:%SZ')))),
+                'domain': erg['href'].replace('http://', '').replace('https://', '').split('/', 1)[0],
+                'base_url': erg['href'].replace('https://', '').replace('http://', '').split('?', 1)[0],
+                'time': datetime.fromtimestamp(int(time.mktime(time.strptime(erg['time'].split(',', 1)[0], '%Y-%m-%dT%H:%M:%SZ')))),
+                'timestamp': str(int(time.mktime(time.strptime(erg['time'].split(',', 1)[0], '%Y-%m-%dT%H:%M:%SZ')))),
                 'tags': erg['tags'],
                 'title': erg['description'].replace(' — Readability', ''),
             }
@@ -108,8 +108,8 @@ def parse_bookmarks_export(html_file):
 
             info = {
                 'url': url,
-                'domain': url.replace('http://', '').replace('https://', '').split('/')[0],
-                'base_url': url.replace('https://', '').replace('http://', '').split('?')[0],
+                'domain': url.replace('http://', '').replace('https://', '').split('/', 1)[0],
+                'base_url': url.replace('https://', '').replace('http://', '').split('?', 1)[0],
                 'time': dt,
                 'timestamp': secs,
                 'tags': "",