alacritty/src/url.rs

// Copyright 2016 Joe Wilm, The Alacritty Project Contributors
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use url::Url;

// See https://tools.ietf.org/html/rfc3987#page-13
const URL_SEPARATOR_CHARS: [char; 10] = ['<', '>', '"', ' ', '{', '}', '|', '\\', '^', '`'];
const URL_DENY_END_CHARS: [char; 8] = ['.', ',', ';', ':', '?', '!', '/', '('];
const URL_SCHEMES: [&str; 8] = [
    "http", "https", "mailto", "news", "file", "git", "ssh", "ftp",
];

// Parser for streaming inside-out detection of URLs.
pub struct UrlParser {
    state: String,
}

impl UrlParser {
    pub fn new() -> Self {
        UrlParser {
            state: String::new(),
        }
    }

    /// Advance the parser one character to the left.
    pub fn advance_left(&mut self, c: char) -> bool {
        self.advance(c, 0)
    }

    /// Advance the parser one character to the right.
    pub fn advance_right(&mut self, c: char) -> bool {
        self.advance(c, self.state.len())
    }

    /// Returns the URL if the parser has found any.
    pub fn url(mut self) -> Option<String> {
        // Remove non-alphabetical characters before the scheme
        // https://tools.ietf.org/html/rfc3986#section-3.1
        if let Some(index) = self.state.find("://") {
            let iter = self
                .state
                .char_indices()
                .rev()
                .skip_while(|(byte_index, _)| *byte_index >= index);
            for (byte_index, c) in iter {
                match c {
                    'a'...'z' | 'A'...'Z' => (),
                    _ => {
                        self.state = self.state.split_off(byte_index + c.len_utf8());
                        break;
                    }
                }
            }
        }

        // Remove non-matching parenthesis and brackets
        let mut open_parens_count: isize = 0;
        let mut open_bracks_count: isize = 0;
        for (i, c) in self.state.chars().enumerate() {
            match c {
                '(' => open_parens_count += 1,
                ')' if open_parens_count > 0 => open_parens_count -= 1,
                '[' => open_bracks_count += 1,
                ']' if open_bracks_count > 0 => open_bracks_count -= 1,
                ')' | ']' => {
                    self.state.truncate(i);
                    break;
                }
                _ => (),
            }
        }

        // Track number of quotes
        let mut num_quotes = self.state.chars().filter(|&c| c == '\'').count();

        // Remove all characters which aren't allowed at the end of a URL
        while !self.state.is_empty()
            && (URL_DENY_END_CHARS.contains(&self.state.chars().last().unwrap())
                || (num_quotes % 2 != 0 && self.state.ends_with('\''))
                || self.state.ends_with("''")
                || self.state.ends_with("()"))
        {
            if self.state.pop().unwrap() == '\'' {
                num_quotes -= 1;
            }
        }

        // Check if string is valid url
        match Url::parse(&self.state) {
            Ok(url) => {
                if URL_SCHEMES.contains(&url.scheme()) {
                    Some(self.state)
                } else {
                    None
                }
            }
            Err(_) => None,
        }
    }

    fn advance(&mut self, c: char, pos: usize) -> bool {
        if URL_SEPARATOR_CHARS.contains(&c)
            || (c >= '\u{00}' && c <= '\u{1F}')
            || (c >= '\u{7F}' && c <= '\u{9F}')
        {
            true
        } else {
            self.state.insert(pos, c);
            false
        }
    }
}

#[cfg(test)]
mod tests {
    use std::mem;

    use crate::grid::Grid;
    use crate::index::{Column, Line, Point};
    use crate::term::{Search, SizeInfo, Term};
    use crate::term::cell::Cell;
    use crate::message_bar::MessageBuffer;

    fn url_create_term(input: &str) -> Term {
        let size = SizeInfo {
            width: 21.0,
            height: 51.0,
            cell_width: 3.0,
            cell_height: 3.0,
            padding_x: 0.0,
            padding_y: 0.0,
            dpr: 1.0,
        };

        let mut term = Term::new(&Default::default(), size, MessageBuffer::new());
        let mut grid: Grid<Cell> = Grid::new(Line(1), Column(input.len()), 0, Cell::default());

        for (i, c) in input.chars().enumerate() {
            grid[Line(0)][Column(i)].c = c;
        }

        mem::swap(term.grid_mut(), &mut grid);

        term
    }

    fn url_test(input: &str, expected: &str, click_index: usize) {
        let term = url_create_term(input);

        let url = term.url_search(Point::new(0, Column(click_index)));

        assert_eq!(url, Some(expected.into()));
    }

    #[test]
    fn url_skip_invalid() {
        let term = url_create_term("no url here");
        let url = term.url_search(Point::new(0, Column(4)));
        assert_eq!(url, None);
    }

    #[test]
    fn url_matching_chars() {
        url_test("(https://example.org/test(ing))", "https://example.org/test(ing)", 5);
        url_test("https://example.org/test(ing)", "https://example.org/test(ing)", 5);
        url_test("((https://example.org))", "https://example.org", 5);
        url_test(")https://example.org(", "https://example.org", 5);
        url_test("https://example.org)", "https://example.org", 5);
        url_test("https://example.org(", "https://example.org", 5);
        url_test("(https://one.org/)(https://two.org/)", "https://one.org", 5);

        url_test("https://[2001:db8:a0b:12f0::1]:80", "https://[2001:db8:a0b:12f0::1]:80", 5);
        url_test("([(https://example.org/test(ing))])", "https://example.org/test(ing)", 5);
        url_test("https://example.org/]()", "https://example.org", 5);
        url_test("[https://example.org]", "https://example.org", 5);

        url_test("'https://example.org/test'ing'''", "https://example.org/test'ing'", 5);
        url_test("https://example.org/test'ing'", "https://example.org/test'ing'", 5);
        url_test("'https://example.org'", "https://example.org", 5);
        url_test("'https://example.org", "https://example.org", 5);
        url_test("https://example.org'", "https://example.org", 5);
    }

    #[test]
    fn url_detect_end() {
        url_test("https://example.org/test\u{00}ing", "https://example.org/test", 5);
        url_test("https://example.org/test\u{1F}ing", "https://example.org/test", 5);
        url_test("https://example.org/test\u{7F}ing", "https://example.org/test", 5);
        url_test("https://example.org/test\u{9F}ing", "https://example.org/test", 5);
        url_test("https://example.org/test\ting", "https://example.org/test", 5);
        url_test("https://example.org/test ing", "https://example.org/test", 5);
    }

    #[test]
    fn url_remove_end_chars() {
        url_test("https://example.org/test?ing", "https://example.org/test?ing", 5);
        url_test("https://example.org.,;:)'!/?", "https://example.org", 5);
        url_test("https://example.org'.", "https://example.org", 5);
    }

    #[test]
    fn url_remove_start_chars() {
        url_test("complicated:https://example.org", "https://example.org", 15);
        url_test("test.https://example.org", "https://example.org", 10);
        url_test(",https://example.org", "https://example.org", 5);
        url_test("\u{2502}https://example.org", "https://example.org", 5);
    }

    #[test]
    fn url_unicode() {
        url_test("https://xn--example-2b07f.org", "https://xn--example-2b07f.org", 5);
        url_test("https://example.org/\u{2008A}", "https://example.org/\u{2008A}", 5);
        url_test("https://example.org/\u{f17c}", "https://example.org/\u{f17c}", 5);
        url_test("https://üñîçøðé.com/ä", "https://üñîçøðé.com/ä", 5);
    }

    #[test]
    fn url_schemes() {
        url_test("mailto://example.org", "mailto://example.org", 5);
        url_test("https://example.org", "https://example.org", 5);
        url_test("http://example.org", "http://example.org", 5);
        url_test("news://example.org", "news://example.org", 5);
        url_test("file://example.org", "file://example.org", 5);
        url_test("git://example.org", "git://example.org", 5);
        url_test("ssh://example.org", "ssh://example.org", 5);
        url_test("ftp://example.org", "ftp://example.org", 5);
    }
}
Improve URL detection with special characters Various special characters and character combinations were not handled correctly with URL detection. All these instances have been resolved and covered by various tests to prevent future regressions. Notable fixes include single quotes working more properly now (like `'https://example.org'`) and IPv6 URL support. Since URL detection is now more than just a few lines of code and it's mostly unrelated to the `Term`, it has also been extracted into the `src/url.rs` file together with all URL-related tests. 2019-01-04 16:47:20 +01:00			`// Copyright 2016 Joe Wilm, The Alacritty Project Contributors`
			`//`
			`// Licensed under the Apache License, Version 2.0 (the "License");`
			`// you may not use this file except in compliance with the License.`
			`// You may obtain a copy of the License at`
			`//`
			`// http://www.apache.org/licenses/LICENSE-2.0`
			`//`
			`// Unless required by applicable law or agreed to in writing, software`
			`// distributed under the License is distributed on an "AS IS" BASIS,`
			`// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`// See the License for the specific language governing permissions and`
			`// limitations under the License.`

			`use url::Url;`

			`// See https://tools.ietf.org/html/rfc3987#page-13`
			const URL_SEPARATOR_CHARS: [char; 10] = ['<', '>', '"', ' ', '{', '}', '\|', '\\', '^', '`'];
			`const URL_DENY_END_CHARS: [char; 8] = ['.', ',', ';', ':', '?', '!', '/', '('];`
			`const URL_SCHEMES: [&str; 8] = [`
			`"http", "https", "mailto", "news", "file", "git", "ssh", "ftp",`
			`];`

			`// Parser for streaming inside-out detection of URLs.`
			`pub struct UrlParser {`
			`state: String,`
			`}`

			`impl UrlParser {`
			`pub fn new() -> Self {`
			`UrlParser {`
			`state: String::new(),`
			`}`
			`}`

			`/// Advance the parser one character to the left.`
			`pub fn advance_left(&mut self, c: char) -> bool {`
			`self.advance(c, 0)`
			`}`

			`/// Advance the parser one character to the right.`
			`pub fn advance_right(&mut self, c: char) -> bool {`
			`self.advance(c, self.state.len())`
			`}`

			`/// Returns the URL if the parser has found any.`
			`pub fn url(mut self) -> Option<String> {`
Trim unicode characters from the start of URLs 2019-01-28 19:28:51 +01:00			`// Remove non-alphabetical characters before the scheme`
			`// https://tools.ietf.org/html/rfc3986#section-3.1`
Improve URL detection with special characters Various special characters and character combinations were not handled correctly with URL detection. All these instances have been resolved and covered by various tests to prevent future regressions. Notable fixes include single quotes working more properly now (like `'https://example.org'`) and IPv6 URL support. Since URL detection is now more than just a few lines of code and it's mostly unrelated to the `Term`, it has also been extracted into the `src/url.rs` file together with all URL-related tests. 2019-01-04 16:47:20 +01:00			`if let Some(index) = self.state.find("://") {`
Trim unicode characters from the start of URLs 2019-01-28 19:28:51 +01:00			`let iter = self`
			`.state`
			`.char_indices()`
			`.rev()`
			`.skip_while(\|(byte_index, _)\| *byte_index >= index);`
			`for (byte_index, c) in iter {`
			`match c {`
Improve URL detection with special characters Various special characters and character combinations were not handled correctly with URL detection. All these instances have been resolved and covered by various tests to prevent future regressions. Notable fixes include single quotes working more properly now (like `'https://example.org'`) and IPv6 URL support. Since URL detection is now more than just a few lines of code and it's mostly unrelated to the `Term`, it has also been extracted into the `src/url.rs` file together with all URL-related tests. 2019-01-04 16:47:20 +01:00			`'a'...'z' \| 'A'...'Z' => (),`
			`_ => {`
Trim unicode characters from the start of URLs 2019-01-28 19:28:51 +01:00			`self.state = self.state.split_off(byte_index + c.len_utf8());`
Improve URL detection with special characters Various special characters and character combinations were not handled correctly with URL detection. All these instances have been resolved and covered by various tests to prevent future regressions. Notable fixes include single quotes working more properly now (like `'https://example.org'`) and IPv6 URL support. Since URL detection is now more than just a few lines of code and it's mostly unrelated to the `Term`, it has also been extracted into the `src/url.rs` file together with all URL-related tests. 2019-01-04 16:47:20 +01:00			`break;`
			`}`
			`}`
			`}`
			`}`

			`// Remove non-matching parenthesis and brackets`
			`let mut open_parens_count: isize = 0;`
			`let mut open_bracks_count: isize = 0;`
			`for (i, c) in self.state.chars().enumerate() {`
			`match c {`
			`'(' => open_parens_count += 1,`
			`')' if open_parens_count > 0 => open_parens_count -= 1,`
			`'[' => open_bracks_count += 1,`
			`']' if open_bracks_count > 0 => open_bracks_count -= 1,`
			`')' \| ']' => {`
			`self.state.truncate(i);`
			`break;`
			`}`
			`_ => (),`
			`}`
			`}`

			`// Track number of quotes`
			`let mut num_quotes = self.state.chars().filter(\|&c\| c == '\'').count();`

			`// Remove all characters which aren't allowed at the end of a URL`
			`while !self.state.is_empty()`
			`&& (URL_DENY_END_CHARS.contains(&self.state.chars().last().unwrap())`
			`\|\| (num_quotes % 2 != 0 && self.state.ends_with('\''))`
			`\|\| self.state.ends_with("''")`
			`\|\| self.state.ends_with("()"))`
			`{`
			`if self.state.pop().unwrap() == '\'' {`
			`num_quotes -= 1;`
			`}`
			`}`

			`// Check if string is valid url`
			`match Url::parse(&self.state) {`
			`Ok(url) => {`
			`if URL_SCHEMES.contains(&url.scheme()) {`
			`Some(self.state)`
			`} else {`
			`None`
			`}`
			`}`
			`Err(_) => None,`
			`}`
			`}`

			`fn advance(&mut self, c: char, pos: usize) -> bool {`
			`if URL_SEPARATOR_CHARS.contains(&c)`
			`\|\| (c >= '\u{00}' && c <= '\u{1F}')`
			`\|\| (c >= '\u{7F}' && c <= '\u{9F}')`
			`{`
			`true`
			`} else {`
			`self.state.insert(pos, c);`
			`false`
			`}`
			`}`
			`}`

			`#[cfg(test)]`
Make all configuration fields optional All configuration fields now have fallback values which will be used if the field is not present. This includes mouse, key bindings and platform specific differences. The mouse and key bindings are now filled by default, if the user rebinds a default mapping, it will be overwritten. To unbind a default binding, it can be mapped to `chars: ""`. Since all platform differences can now be correctly handled by the `src/config/mod.rs` code, it's no longer necessary to maintain separate configuration files, so the `alacritty_macos.yml` and `alacritty_windows.yml` have been deleted. Fixes #40. Fixes #1923. 2019-01-17 09:17:26 +00:00			`mod tests {`
Improve URL detection with special characters Various special characters and character combinations were not handled correctly with URL detection. All these instances have been resolved and covered by various tests to prevent future regressions. Notable fixes include single quotes working more properly now (like `'https://example.org'`) and IPv6 URL support. Since URL detection is now more than just a few lines of code and it's mostly unrelated to the `Term`, it has also been extracted into the `src/url.rs` file together with all URL-related tests. 2019-01-04 16:47:20 +01:00			`use std::mem;`

			`use crate::grid::Grid;`
			`use crate::index::{Column, Line, Point};`
Dynamically resize terminal for errors/warnings The warning and error messages now don't overwrite other terminal content anymore but instead resize the terminal to make sure that text can always be read. Instead of just showing that there is a new error and pointing to the log, errors will now be displayed fully in multiple lines of text, assuming that there is enough space left in the terminal. Explicit mouse click handling has also been added to the message bar, which made it possible to add a simple `close` button in the form of `[X]`. Alacritty's log file location is now stored in the `$ALACRITTY_LOG` environment variable which the shell inherits automatically. Previously there were some issues with the log file only being deleted when certain methods for closing Alacritty were used (like typing `exit`). This has been reworked and now Ctrl+D, exit and signals should all work properly. Before the config is reloaded, all current messages are now dropped. This should help with multiple terminals all getting clogged up at the same time when the config is broken. When one message is removed, all other duplicate messages are automatically removed too. 2019-02-07 22:36:45 +00:00			`use crate::term::{Search, SizeInfo, Term};`
			`use crate::term::cell::Cell;`
			`use crate::message_bar::MessageBuffer;`
Improve URL detection with special characters Various special characters and character combinations were not handled correctly with URL detection. All these instances have been resolved and covered by various tests to prevent future regressions. Notable fixes include single quotes working more properly now (like `'https://example.org'`) and IPv6 URL support. Since URL detection is now more than just a few lines of code and it's mostly unrelated to the `Term`, it has also been extracted into the `src/url.rs` file together with all URL-related tests. 2019-01-04 16:47:20 +01:00
			`fn url_create_term(input: &str) -> Term {`
			`let size = SizeInfo {`
			`width: 21.0,`
			`height: 51.0,`
			`cell_width: 3.0,`
			`cell_height: 3.0,`
			`padding_x: 0.0,`
			`padding_y: 0.0,`
			`dpr: 1.0,`
			`};`

Dynamically resize terminal for errors/warnings The warning and error messages now don't overwrite other terminal content anymore but instead resize the terminal to make sure that text can always be read. Instead of just showing that there is a new error and pointing to the log, errors will now be displayed fully in multiple lines of text, assuming that there is enough space left in the terminal. Explicit mouse click handling has also been added to the message bar, which made it possible to add a simple `close` button in the form of `[X]`. Alacritty's log file location is now stored in the `$ALACRITTY_LOG` environment variable which the shell inherits automatically. Previously there were some issues with the log file only being deleted when certain methods for closing Alacritty were used (like typing `exit`). This has been reworked and now Ctrl+D, exit and signals should all work properly. Before the config is reloaded, all current messages are now dropped. This should help with multiple terminals all getting clogged up at the same time when the config is broken. When one message is removed, all other duplicate messages are automatically removed too. 2019-02-07 22:36:45 +00:00			`let mut term = Term::new(&Default::default(), size, MessageBuffer::new());`
Improve URL detection with special characters Various special characters and character combinations were not handled correctly with URL detection. All these instances have been resolved and covered by various tests to prevent future regressions. Notable fixes include single quotes working more properly now (like `'https://example.org'`) and IPv6 URL support. Since URL detection is now more than just a few lines of code and it's mostly unrelated to the `Term`, it has also been extracted into the `src/url.rs` file together with all URL-related tests. 2019-01-04 16:47:20 +01:00			`let mut grid: Grid<Cell> = Grid::new(Line(1), Column(input.len()), 0, Cell::default());`

			`for (i, c) in input.chars().enumerate() {`
			`grid[Line(0)][Column(i)].c = c;`
			`}`

			`mem::swap(term.grid_mut(), &mut grid);`

			`term`
			`}`

			`fn url_test(input: &str, expected: &str, click_index: usize) {`
			`let term = url_create_term(input);`

			`let url = term.url_search(Point::new(0, Column(click_index)));`

			`assert_eq!(url, Some(expected.into()));`
			`}`

			`#[test]`
			`fn url_skip_invalid() {`
			`let term = url_create_term("no url here");`
			`let url = term.url_search(Point::new(0, Column(4)));`
			`assert_eq!(url, None);`
			`}`

			`#[test]`
			`fn url_matching_chars() {`
			`url_test("(https://example.org/test(ing))", "https://example.org/test(ing)", 5);`
			`url_test("https://example.org/test(ing)", "https://example.org/test(ing)", 5);`
			`url_test("((https://example.org))", "https://example.org", 5);`
			`url_test(")https://example.org(", "https://example.org", 5);`
			`url_test("https://example.org)", "https://example.org", 5);`
			`url_test("https://example.org(", "https://example.org", 5);`
			`url_test("(https://one.org/)(https://two.org/)", "https://one.org", 5);`

			`url_test("https://[2001:db8:a0b:12f0::1]:80", "https://[2001:db8:a0b:12f0::1]:80", 5);`
			`url_test("([(https://example.org/test(ing))])", "https://example.org/test(ing)", 5);`
			`url_test("https://example.org/]()", "https://example.org", 5);`
			`url_test("[https://example.org]", "https://example.org", 5);`

			`url_test("'https://example.org/test'ing'''", "https://example.org/test'ing'", 5);`
			`url_test("https://example.org/test'ing'", "https://example.org/test'ing'", 5);`
			`url_test("'https://example.org'", "https://example.org", 5);`
			`url_test("'https://example.org", "https://example.org", 5);`
			`url_test("https://example.org'", "https://example.org", 5);`
			`}`

			`#[test]`
			`fn url_detect_end() {`
			`url_test("https://example.org/test\u{00}ing", "https://example.org/test", 5);`
			`url_test("https://example.org/test\u{1F}ing", "https://example.org/test", 5);`
			`url_test("https://example.org/test\u{7F}ing", "https://example.org/test", 5);`
			`url_test("https://example.org/test\u{9F}ing", "https://example.org/test", 5);`
			`url_test("https://example.org/test\ting", "https://example.org/test", 5);`
			`url_test("https://example.org/test ing", "https://example.org/test", 5);`
			`}`

			`#[test]`
			`fn url_remove_end_chars() {`
			`url_test("https://example.org/test?ing", "https://example.org/test?ing", 5);`
			`url_test("https://example.org.,;:)'!/?", "https://example.org", 5);`
			`url_test("https://example.org'.", "https://example.org", 5);`
			`}`

			`#[test]`
			`fn url_remove_start_chars() {`
			`url_test("complicated:https://example.org", "https://example.org", 15);`
			`url_test("test.https://example.org", "https://example.org", 10);`
			`url_test(",https://example.org", "https://example.org", 5);`
Trim unicode characters from the start of URLs 2019-01-28 19:28:51 +01:00			`url_test("\u{2502}https://example.org", "https://example.org", 5);`
Improve URL detection with special characters Various special characters and character combinations were not handled correctly with URL detection. All these instances have been resolved and covered by various tests to prevent future regressions. Notable fixes include single quotes working more properly now (like `'https://example.org'`) and IPv6 URL support. Since URL detection is now more than just a few lines of code and it's mostly unrelated to the `Term`, it has also been extracted into the `src/url.rs` file together with all URL-related tests. 2019-01-04 16:47:20 +01:00			`}`

			`#[test]`
			`fn url_unicode() {`
			`url_test("https://xn--example-2b07f.org", "https://xn--example-2b07f.org", 5);`
			`url_test("https://example.org/\u{2008A}", "https://example.org/\u{2008A}", 5);`
			`url_test("https://example.org/\u{f17c}", "https://example.org/\u{f17c}", 5);`
			`url_test("https://üñîçøðé.com/ä", "https://üñîçøðé.com/ä", 5);`
			`}`

			`#[test]`
			`fn url_schemes() {`
			`url_test("mailto://example.org", "mailto://example.org", 5);`
			`url_test("https://example.org", "https://example.org", 5);`
			`url_test("http://example.org", "http://example.org", 5);`
			`url_test("news://example.org", "news://example.org", 5);`
			`url_test("file://example.org", "file://example.org", 5);`
			`url_test("git://example.org", "git://example.org", 5);`
			`url_test("ssh://example.org", "ssh://example.org", 5);`
			`url_test("ftp://example.org", "ftp://example.org", 5);`
			`}`
			`}`