diff --git a/CHANGELOG.md b/CHANGELOG.md index 6735d83d..d8f4680d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -32,6 +32,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fix the `Copy` `mouse_bindings` action ([#1963](https://github.com/jwilm/alacritty/issues/1963)) - URLs are only launched when left-clicking - Removal of extra characters (like `,`) at the end of URLs has been improved +- Single quotes (`'`) are removed from URLs when there is no matching opening quote ## Version 0.2.4 diff --git a/src/lib.rs b/src/lib.rs index d6873f96..f99510f2 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -46,6 +46,7 @@ pub mod term; pub mod tty; pub mod util; pub mod window; +mod url; use std::ops::Mul; diff --git a/src/term/mod.rs b/src/term/mod.rs index dcde62c8..dadfaf78 100644 --- a/src/term/mod.rs +++ b/src/term/mod.rs @@ -20,7 +20,6 @@ use std::time::{Duration, Instant}; use arraydeque::ArrayDeque; use unicode_width::UnicodeWidthChar; -use url::Url; use font::{self, Size}; use crate::ansi::{self, Color, NamedColor, Attr, Handler, CharsetIndex, StandardCharset, CursorStyle}; @@ -32,17 +31,13 @@ use crate::{MouseCursor, Rgb}; use copypasta::{Clipboard, Load, Store}; use crate::input::FONT_SIZE_STEP; use crate::logging::LoggerProxy; +use crate::url::UrlParser; pub mod cell; pub mod color; pub use self::cell::Cell; use self::cell::LineLength; -// See https://tools.ietf.org/html/rfc3987#page-13 -const URL_SEPARATOR_CHARS: [char; 10] = ['<', '>', '"', ' ', '{', '}', '|', '\\', '^', '`']; -const URL_DENY_END_CHARS: [char; 7] = ['.', ',', ';', ':', '?', '!', '/']; -const URL_SCHEMES: [&str; 8] = ["http", "https", "mailto", "news", "file", "git", "ssh", "ftp"]; - /// A type that can expand a given point to a region /// /// Usually this is implemented for some 2-D array type since @@ -112,52 +107,19 @@ impl Search for Term { point.col += 1; let mut iterb = self.grid.iter_from(point); - // Put all characters until separators into a string - let mut buf = String::new(); + // Find URLs + let mut url_parser = UrlParser::new(); while let Some(cell) = iterb.prev() { - if URL_SEPARATOR_CHARS.contains(&cell.c) { + if url_parser.advance_left(cell.c) { break; } - buf.insert(0, cell.c); } for cell in iterf { - if URL_SEPARATOR_CHARS.contains(&cell.c) { + if url_parser.advance_right(cell.c) { break; } - buf.push(cell.c); - } - - // Remove all leading '(' - while buf.starts_with('(') { - buf.remove(0); - } - - // Remove all ')' from end of URLs without matching '(' - let open_count = buf.chars().filter(|&c| c == '(').count(); - let closed_count = buf.chars().filter(|&c| c == ')').count(); - let mut parens_diff = closed_count - open_count; - - // Remove all characters which aren't allowed at the end of a URL - while !buf.is_empty() - && (URL_DENY_END_CHARS.contains(&buf.chars().last().unwrap()) - || (parens_diff > 0 && buf.ends_with(')'))) - { - if buf.pop().unwrap() == ')' { - parens_diff -= 1; - } - } - - // Check if string is valid url - match Url::parse(&buf) { - Ok(url) => { - if URL_SCHEMES.contains(&url.scheme()) { - Some(buf) - } else { - None - } - } - Err(_) => None, } + url_parser.url() } } @@ -1163,6 +1125,12 @@ impl Term { &self.grid } + // Mutable access for swapping out the grid during tests + #[cfg(test)] + pub fn grid_mut(&mut self) -> &mut Grid { + &mut self.grid + } + /// Iterate over the *renderable* cells in the terminal /// /// A renderable cell is any cell which has content other than the default @@ -2147,7 +2115,7 @@ mod tests { use serde_json; use super::{Cell, Term, SizeInfo}; - use crate::term::{cell, Search}; + use crate::term::cell; use crate::grid::{Grid, Scroll}; use crate::index::{Point, Line, Column, Side}; @@ -2387,142 +2355,6 @@ mod tests { scrolled_grid.scroll_display(Scroll::Top); assert_eq!(term.grid, scrolled_grid); } - - // `((ftp://a.de))` -> `Some("ftp://a.de")` - #[test] - fn url_trim_unmatched_parens() { - let size = SizeInfo { - width: 21.0, - height: 51.0, - cell_width: 3.0, - cell_height: 3.0, - padding_x: 0.0, - padding_y: 0.0, - dpr: 1.0, - }; - let mut term = Term::new(&Default::default(), size); - let mut grid: Grid = Grid::new(Line(1), Column(15), 0, Cell::default()); - grid[Line(0)][Column(0)].c = '('; - grid[Line(0)][Column(1)].c = '('; - grid[Line(0)][Column(2)].c = 'f'; - grid[Line(0)][Column(3)].c = 't'; - grid[Line(0)][Column(4)].c = 'p'; - grid[Line(0)][Column(5)].c = ':'; - grid[Line(0)][Column(6)].c = '/'; - grid[Line(0)][Column(7)].c = '/'; - grid[Line(0)][Column(8)].c = 'a'; - grid[Line(0)][Column(9)].c = '.'; - grid[Line(0)][Column(10)].c = 'd'; - grid[Line(0)][Column(11)].c = 'e'; - grid[Line(0)][Column(12)].c = ')'; - grid[Line(0)][Column(13)].c = ')'; - mem::swap(&mut term.grid, &mut grid); - - // Search for URL in grid - let url = term.url_search(Point::new(0, Column(4))); - - assert_eq!(url, Some("ftp://a.de".into())); - } - - // `ftp://a.de/()` -> `Some("ftp://a.de/()")` - #[test] - fn url_allow_matching_parens() { - let size = SizeInfo { - width: 21.0, - height: 51.0, - cell_width: 3.0, - cell_height: 3.0, - padding_x: 0.0, - padding_y: 0.0, - dpr: 1.0, - }; - let mut term = Term::new(&Default::default(), size); - let mut grid: Grid = Grid::new(Line(1), Column(15), 0, Cell::default()); - grid[Line(0)][Column(0)].c = 'f'; - grid[Line(0)][Column(1)].c = 't'; - grid[Line(0)][Column(2)].c = 'p'; - grid[Line(0)][Column(3)].c = ':'; - grid[Line(0)][Column(4)].c = '/'; - grid[Line(0)][Column(5)].c = '/'; - grid[Line(0)][Column(6)].c = 'a'; - grid[Line(0)][Column(7)].c = '.'; - grid[Line(0)][Column(8)].c = 'd'; - grid[Line(0)][Column(9)].c = 'e'; - grid[Line(0)][Column(10)].c = '/'; - grid[Line(0)][Column(11)].c = '('; - grid[Line(0)][Column(12)].c = ')'; - mem::swap(&mut term.grid, &mut grid); - - // Search for URL in grid - let url = term.url_search(Point::new(0, Column(4))); - - assert_eq!(url, Some("ftp://a.de/()".into())); - } - - // `aze` -> `None` - #[test] - fn url_skip_invalid() { - let size = SizeInfo { - width: 21.0, - height: 51.0, - cell_width: 3.0, - cell_height: 3.0, - padding_x: 0.0, - padding_y: 0.0, - dpr: 1.0, - }; - let mut term = Term::new(&Default::default(), size); - let mut grid: Grid = Grid::new(Line(1), Column(15), 0, Cell::default()); - grid[Line(0)][Column(0)].c = 'a'; - grid[Line(0)][Column(1)].c = 'z'; - grid[Line(0)][Column(2)].c = 'e'; - mem::swap(&mut term.grid, &mut grid); - - // Search for URL in grid - let url = term.url_search(Point::new(0, Column(1))); - - assert_eq!(url, None); - } - - // `ftp://a.de.,;:)!/?` -> `Some("ftp://a.de")` - #[test] - fn url_remove_end_chars() { - let size = SizeInfo { - width: 21.0, - height: 51.0, - cell_width: 3.0, - cell_height: 3.0, - padding_x: 0.0, - padding_y: 0.0, - dpr: 1.0, - }; - let mut term = Term::new(&Default::default(), size); - let mut grid: Grid = Grid::new(Line(1), Column(18), 0, Cell::default()); - grid[Line(0)][Column(0)].c = 'f'; - grid[Line(0)][Column(1)].c = 't'; - grid[Line(0)][Column(2)].c = 'p'; - grid[Line(0)][Column(3)].c = ':'; - grid[Line(0)][Column(4)].c = '/'; - grid[Line(0)][Column(5)].c = '/'; - grid[Line(0)][Column(6)].c = 'a'; - grid[Line(0)][Column(7)].c = '.'; - grid[Line(0)][Column(8)].c = 'd'; - grid[Line(0)][Column(9)].c = 'e'; - grid[Line(0)][Column(10)].c = '.'; - grid[Line(0)][Column(11)].c = ','; - grid[Line(0)][Column(12)].c = ';'; - grid[Line(0)][Column(13)].c = ':'; - grid[Line(0)][Column(14)].c = ')'; - grid[Line(0)][Column(15)].c = '!'; - grid[Line(0)][Column(16)].c = '/'; - grid[Line(0)][Column(17)].c = '?'; - mem::swap(&mut term.grid, &mut grid); - - // Search for URL in grid - let url = term.url_search(Point::new(0, Column(4))); - - assert_eq!(url, Some("ftp://a.de".into())); - } } #[cfg(all(test, feature = "bench"))] diff --git a/src/url.rs b/src/url.rs new file mode 100644 index 00000000..385b484b --- /dev/null +++ b/src/url.rs @@ -0,0 +1,230 @@ +// Copyright 2016 Joe Wilm, The Alacritty Project Contributors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use url::Url; + +// See https://tools.ietf.org/html/rfc3987#page-13 +const URL_SEPARATOR_CHARS: [char; 10] = ['<', '>', '"', ' ', '{', '}', '|', '\\', '^', '`']; +const URL_DENY_END_CHARS: [char; 8] = ['.', ',', ';', ':', '?', '!', '/', '(']; +const URL_SCHEMES: [&str; 8] = [ + "http", "https", "mailto", "news", "file", "git", "ssh", "ftp", +]; + +// Parser for streaming inside-out detection of URLs. +pub struct UrlParser { + state: String, +} + +impl UrlParser { + pub fn new() -> Self { + UrlParser { + state: String::new(), + } + } + + /// Advance the parser one character to the left. + pub fn advance_left(&mut self, c: char) -> bool { + self.advance(c, 0) + } + + /// Advance the parser one character to the right. + pub fn advance_right(&mut self, c: char) -> bool { + self.advance(c, self.state.len()) + } + + /// Returns the URL if the parser has found any. + pub fn url(mut self) -> Option { + // Remove non-alphabetical characters before scheme + if let Some(index) = self.state.find("://") { + for i in (0..index - 1).rev() { + match self.state.chars().nth(i).unwrap() { + 'a'...'z' | 'A'...'Z' => (), + _ => { + self.state = self.state.split_off(i + 1); + break; + } + } + } + } + + // Remove non-matching parenthesis and brackets + let mut open_parens_count: isize = 0; + let mut open_bracks_count: isize = 0; + for (i, c) in self.state.chars().enumerate() { + match c { + '(' => open_parens_count += 1, + ')' if open_parens_count > 0 => open_parens_count -= 1, + '[' => open_bracks_count += 1, + ']' if open_bracks_count > 0 => open_bracks_count -= 1, + ')' | ']' => { + self.state.truncate(i); + break; + } + _ => (), + } + } + + // Track number of quotes + let mut num_quotes = self.state.chars().filter(|&c| c == '\'').count(); + + // Remove all characters which aren't allowed at the end of a URL + while !self.state.is_empty() + && (URL_DENY_END_CHARS.contains(&self.state.chars().last().unwrap()) + || (num_quotes % 2 != 0 && self.state.ends_with('\'')) + || self.state.ends_with("''") + || self.state.ends_with("()")) + { + if self.state.pop().unwrap() == '\'' { + num_quotes -= 1; + } + } + + // Check if string is valid url + match Url::parse(&self.state) { + Ok(url) => { + if URL_SCHEMES.contains(&url.scheme()) { + Some(self.state) + } else { + None + } + } + Err(_) => None, + } + } + + fn advance(&mut self, c: char, pos: usize) -> bool { + if URL_SEPARATOR_CHARS.contains(&c) + || (c >= '\u{00}' && c <= '\u{1F}') + || (c >= '\u{7F}' && c <= '\u{9F}') + { + true + } else { + self.state.insert(pos, c); + false + } + } +} + +#[cfg(test)] +mod test { + use std::mem; + + use crate::grid::Grid; + use crate::index::{Column, Line, Point}; + use crate::term::{Cell, Search, SizeInfo, Term}; + + fn url_create_term(input: &str) -> Term { + let size = SizeInfo { + width: 21.0, + height: 51.0, + cell_width: 3.0, + cell_height: 3.0, + padding_x: 0.0, + padding_y: 0.0, + dpr: 1.0, + }; + + let mut term = Term::new(&Default::default(), size); + let mut grid: Grid = Grid::new(Line(1), Column(input.len()), 0, Cell::default()); + + for (i, c) in input.chars().enumerate() { + grid[Line(0)][Column(i)].c = c; + } + + mem::swap(term.grid_mut(), &mut grid); + + term + } + + fn url_test(input: &str, expected: &str, click_index: usize) { + let term = url_create_term(input); + + let url = term.url_search(Point::new(0, Column(click_index))); + + assert_eq!(url, Some(expected.into())); + } + + #[test] + fn url_skip_invalid() { + let term = url_create_term("no url here"); + let url = term.url_search(Point::new(0, Column(4))); + assert_eq!(url, None); + } + + #[test] + fn url_matching_chars() { + url_test("(https://example.org/test(ing))", "https://example.org/test(ing)", 5); + url_test("https://example.org/test(ing)", "https://example.org/test(ing)", 5); + url_test("((https://example.org))", "https://example.org", 5); + url_test(")https://example.org(", "https://example.org", 5); + url_test("https://example.org)", "https://example.org", 5); + url_test("https://example.org(", "https://example.org", 5); + url_test("(https://one.org/)(https://two.org/)", "https://one.org", 5); + + url_test("https://[2001:db8:a0b:12f0::1]:80", "https://[2001:db8:a0b:12f0::1]:80", 5); + url_test("([(https://example.org/test(ing))])", "https://example.org/test(ing)", 5); + url_test("https://example.org/]()", "https://example.org", 5); + url_test("[https://example.org]", "https://example.org", 5); + + url_test("'https://example.org/test'ing'''", "https://example.org/test'ing'", 5); + url_test("https://example.org/test'ing'", "https://example.org/test'ing'", 5); + url_test("'https://example.org'", "https://example.org", 5); + url_test("'https://example.org", "https://example.org", 5); + url_test("https://example.org'", "https://example.org", 5); + } + + #[test] + fn url_detect_end() { + url_test("https://example.org/test\u{00}ing", "https://example.org/test", 5); + url_test("https://example.org/test\u{1F}ing", "https://example.org/test", 5); + url_test("https://example.org/test\u{7F}ing", "https://example.org/test", 5); + url_test("https://example.org/test\u{9F}ing", "https://example.org/test", 5); + url_test("https://example.org/test\ting", "https://example.org/test", 5); + url_test("https://example.org/test ing", "https://example.org/test", 5); + } + + #[test] + fn url_remove_end_chars() { + url_test("https://example.org/test?ing", "https://example.org/test?ing", 5); + url_test("https://example.org.,;:)'!/?", "https://example.org", 5); + url_test("https://example.org'.", "https://example.org", 5); + } + + #[test] + fn url_remove_start_chars() { + url_test("complicated:https://example.org", "https://example.org", 15); + url_test("test.https://example.org", "https://example.org", 10); + url_test(",https://example.org", "https://example.org", 5); + } + + #[test] + fn url_unicode() { + url_test("https://xn--example-2b07f.org", "https://xn--example-2b07f.org", 5); + url_test("https://example.org/\u{2008A}", "https://example.org/\u{2008A}", 5); + url_test("https://example.org/\u{f17c}", "https://example.org/\u{f17c}", 5); + url_test("https://üñîçøðé.com/ä", "https://üñîçøðé.com/ä", 5); + } + + #[test] + fn url_schemes() { + url_test("mailto://example.org", "mailto://example.org", 5); + url_test("https://example.org", "https://example.org", 5); + url_test("http://example.org", "http://example.org", 5); + url_test("news://example.org", "news://example.org", 5); + url_test("file://example.org", "file://example.org", 5); + url_test("git://example.org", "git://example.org", 5); + url_test("ssh://example.org", "ssh://example.org", 5); + url_test("ftp://example.org", "ftp://example.org", 5); + } +}