From 4102fc994a390cd3aef6c06a1ee3f8117d14ca63 Mon Sep 17 00:00:00 2001 From: Christian Duerr Date: Thu, 4 Oct 2018 21:02:11 +0200 Subject: [PATCH] Add heuristic to remove parentheses While parentheses in URLs are perfectly legal in positions even where they don't make a lot of sense (like `https://github.com)`), they can often lead to parsing errors when URLs are added in parentheses as side comment (like `(https://github.com)`). To improve the URL parsing when clicking on links, special heuristics have been added which aim to remove parentheses in occasions where they are not expected to be part of the URL. This includes removing leading parentheses (like `((https://url.com`), which are always removed before any other heuristic. If the URL ends with closing parentheses, but there are no matching opening parentheses in the URL (after stripping all leading parentheses), the trailing parentheses will also be removed until there are only matching parentheses left. This allows parsing URLs like `https://github.com/de(mo).html)))`. --- src/event.rs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/event.rs b/src/event.rs index 890ca3a7..77cd9dff 100644 --- a/src/event.rs +++ b/src/event.rs @@ -135,6 +135,19 @@ impl<'a, N: Notify + 'a> input::ActionContext for ActionContext<'a, N> { buf.push(cell.c); } + // Heuristic to remove all leading '(' + while buf.starts_with('(') { + buf.remove(0); + } + + // Heuristic to remove all ')' from end of URLs without matching '(' + let str_count = |text: &str, c: char| { + text.chars().filter(|tc| *tc == c).count() + }; + while buf.ends_with(')') && str_count(&buf, '(') < str_count(&buf, ')') { + buf.pop(); + } + // Check if string is valid url match Url::parse(&buf) { Ok(_) => Some(buf),