From 3562f2c9aacb4b7cba8a61d95f4d325b6b5887c1 Mon Sep 17 00:00:00 2001
From: Sander van Harmelen <sander@xanzy.io>
Date: Mon, 28 Jan 2019 19:28:51 +0100
Subject: [PATCH] Trim unicode characters from the start of URLs

---
 CHANGELOG.md |  1 +
 src/url.rs   | 15 +++++++++++----
 2 files changed, 12 insertions(+), 4 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 5a1d8156..2bdd8c68 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - Resolved off-by-one issue with erasing characters in the last column
 - Excessive polling every 100ms with `live_config_reload` enabled
+- Unicode characters at the beginning of URLs are now properly ignored
 
 ## Version 0.2.7
 
diff --git a/src/url.rs b/src/url.rs
index fc2a00f9..0db083be 100644
--- a/src/url.rs
+++ b/src/url.rs
@@ -45,13 +45,19 @@ impl UrlParser {
 
     /// Returns the URL if the parser has found any.
     pub fn url(mut self) -> Option<String> {
-        // Remove non-alphabetical characters before scheme
+        // Remove non-alphabetical characters before the scheme
+        // https://tools.ietf.org/html/rfc3986#section-3.1
         if let Some(index) = self.state.find("://") {
-            for i in (0..index - 1).rev() {
-                match self.state.chars().nth(i).unwrap() {
+            let iter = self
+                .state
+                .char_indices()
+                .rev()
+                .skip_while(|(byte_index, _)| *byte_index >= index);
+            for (byte_index, c) in iter {
+                match c {
                     'a'...'z' | 'A'...'Z' => (),
                     _ => {
-                        self.state = self.state.split_off(i + 1);
+                        self.state = self.state.split_off(byte_index + c.len_utf8());
                         break;
                     }
                 }
@@ -206,6 +212,7 @@ mod tests {
         url_test("complicated:https://example.org", "https://example.org", 15);
         url_test("test.https://example.org", "https://example.org", 10);
         url_test(",https://example.org", "https://example.org", 5);
+        url_test("\u{2502}https://example.org", "https://example.org", 5);
     }
 
     #[test]