diff --git a/README.md b/README.md
index b8e135a7..f768c9bd 100644
--- a/README.md
+++ b/README.md
@@ -391,6 +391,10 @@ Not all sites can be effectively archived with each method, that's why it's best
 If it seems like more than 10-20% of sites in the archive are broken, open an [issue](https://github.com/pirate/bookmark-archiver/issues)
 with some of the URLs that failed to be archived and I'll investigate.
 
+**Removing unwanted links from the index:**
+
+If you accidentally added lots of unwanted links into index and they slow down your archiving, you can use the `bin/purge` script to remove them from your index, which removes everything matching python regexes you pass into it. E.g: `bin/purge -r 'amazon\.com' -r 'google\.com'`. It would prompt before removing links from index, but for extra safety you might want to back up `index.json` first (or put in undex version control).
+
 ### Hosting the Archive
 
 If you're having issues trying to host the archive via nginx, make sure you already have nginx running with SSL.
diff --git a/archiver/purge.py b/archiver/purge.py
new file mode 100755
index 00000000..55ba6fb6
--- /dev/null
+++ b/archiver/purge.py
@@ -0,0 +1,54 @@
+#!/usr/bin/env python3
+import argparse
+import re
+from typing import List
+
+from archive import parse_json_link_index
+from config import OUTPUT_DIR
+from index import write_json_links_index
+
+
+def cleanup_index(patterns: List[str], yes=False):
+    regexes = [re.compile(p) for p in patterns]
+
+    index = parse_json_link_index(OUTPUT_DIR)
+    links = index['links']
+
+    filtered = []
+    remaining = []
+    for l in links:
+        url = l['url']
+        for r in regexes:
+            if r.search(url):
+                filtered.append((l, r))
+                break
+        else:
+            remaining.append(l)
+
+
+    print("Filtered out {}/{} urls:".format(len(filtered), len(links)))
+    for link, regex in filtered:
+        url = link['url']
+        print(" {url} via {regex}".format(url=url, regex=regex.pattern))
+
+    proceed = False
+    if yes:
+        proceed = True
+    else:
+        res = input("Remove {} entries from index? [y/n] ".format(len(filtered)))
+        proceed = res.strip().lower() in ('y', 'yes')
+
+    if proceed:
+        write_json_links_index(OUTPUT_DIR, remaining)
+    else:
+        exit('aborting')
+
+
+if __name__ == '__main__':
+    p = argparse.ArgumentParser('Index purging tool')
+    p.add_argument('--regex', '-r', action='append', help='Python regex to filter out')
+    p.add_argument('--yes', action='store_true', default=False, help='Do not propmpt for confirmation')
+
+    args = p.parse_args()
+    regexes = args.regex
+    cleanup_index(regexes, yes=args.yes)
diff --git a/bin/purge b/bin/purge
new file mode 120000
index 00000000..ad99fab3
--- /dev/null
+++ b/bin/purge
@@ -0,0 +1 @@
+../archiver/purge.py
\ No newline at end of file