Fix URL parsing with double-width characters
Since double-width characters are followed by an empty cell containing only the `WIDE_CELL_SPACER` flag, the URL parser would stop once encountering the cell after a double-width character. By skipping cells that contain the `WIDE_CELL_SPACER` flag and incrementing the URL length by unicode width of the character instead of cell count, this can be resolved for both URL launching and URL highlighting. Fixes #2158.
This commit is contained in:
parent
d29c309007
commit
d8272662db
|
@ -47,6 +47,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
||||||
- Fixes increase/decrease font-size keybindings on international keyboards
|
- Fixes increase/decrease font-size keybindings on international keyboards
|
||||||
- On Wayland, the `--title` flag will set the Window title now
|
- On Wayland, the `--title` flag will set the Window title now
|
||||||
- Parsing issues with URLs starting in the first or ending in the last column
|
- Parsing issues with URLs starting in the first or ending in the last column
|
||||||
|
- URLs stopping at double-width characters
|
||||||
|
|
||||||
## Version 0.2.9
|
## Version 0.2.9
|
||||||
|
|
||||||
|
|
|
@ -447,7 +447,7 @@ impl<'a, A: ActionContext + 'a> Processor<'a, A> {
|
||||||
None
|
None
|
||||||
};
|
};
|
||||||
|
|
||||||
if let Some(Url { text, origin }) = url {
|
if let Some(Url { origin, len, .. }) = url {
|
||||||
let mouse_cursor = if self.ctx.terminal().mode().intersects(mouse_mode) {
|
let mouse_cursor = if self.ctx.terminal().mode().intersects(mouse_mode) {
|
||||||
MouseCursor::Default
|
MouseCursor::Default
|
||||||
} else {
|
} else {
|
||||||
|
@ -473,9 +473,9 @@ impl<'a, A: ActionContext + 'a> Processor<'a, A> {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Underline all cells and store their current underline state
|
// Underline all cells and store their current underline state
|
||||||
let mut underlined = Vec::with_capacity(text.len());
|
let mut underlined = Vec::with_capacity(len);
|
||||||
let iter = once(start).chain(start.iter(Column(cols - 1), last_line));
|
let iter = once(start).chain(start.iter(Column(cols - 1), last_line));
|
||||||
for point in iter.take(text.len()) {
|
for point in iter.take(len) {
|
||||||
let cell = &mut self.ctx.terminal_mut().grid_mut()[point.line][point.col];
|
let cell = &mut self.ctx.terminal_mut().grid_mut()[point.line][point.col];
|
||||||
underlined.push(cell.flags.contains(Flags::UNDERLINE));
|
underlined.push(cell.flags.contains(Flags::UNDERLINE));
|
||||||
cell.flags.insert(Flags::UNDERLINE);
|
cell.flags.insert(Flags::UNDERLINE);
|
||||||
|
|
|
@ -120,14 +120,14 @@ impl Search for Term {
|
||||||
let mut url_parser = UrlParser::new();
|
let mut url_parser = UrlParser::new();
|
||||||
while let Some(cell) = iterb.prev() {
|
while let Some(cell) = iterb.prev() {
|
||||||
if (iterb.cur().col == last_col && !cell.flags.contains(cell::Flags::WRAPLINE))
|
if (iterb.cur().col == last_col && !cell.flags.contains(cell::Flags::WRAPLINE))
|
||||||
|| url_parser.advance_left(cell.c)
|
|| url_parser.advance_left(cell)
|
||||||
{
|
{
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
while let Some(cell) = iterf.next() {
|
while let Some(cell) = iterf.next() {
|
||||||
if url_parser.advance_right(cell.c)
|
if url_parser.advance_right(cell)
|
||||||
|| (iterf.cur().col == last_col && !cell.flags.contains(cell::Flags::WRAPLINE))
|
|| (iterf.cur().col == last_col && !cell.flags.contains(cell::Flags::WRAPLINE))
|
||||||
{
|
{
|
||||||
break;
|
break;
|
||||||
|
|
83
src/url.rs
83
src/url.rs
|
@ -14,6 +14,8 @@
|
||||||
|
|
||||||
use url;
|
use url;
|
||||||
|
|
||||||
|
use crate::term::cell::{Cell, Flags};
|
||||||
|
|
||||||
// See https://tools.ietf.org/html/rfc3987#page-13
|
// See https://tools.ietf.org/html/rfc3987#page-13
|
||||||
const URL_SEPARATOR_CHARS: [char; 10] = ['<', '>', '"', ' ', '{', '}', '|', '\\', '^', '`'];
|
const URL_SEPARATOR_CHARS: [char; 10] = ['<', '>', '"', ' ', '{', '}', '|', '\\', '^', '`'];
|
||||||
const URL_DENY_END_CHARS: [char; 8] = ['.', ',', ';', ':', '?', '!', '/', '('];
|
const URL_DENY_END_CHARS: [char; 8] = ['.', ',', ';', ':', '?', '!', '/', '('];
|
||||||
|
@ -26,12 +28,14 @@ const URL_SCHEMES: [&str; 8] = [
|
||||||
pub struct Url {
|
pub struct Url {
|
||||||
pub text: String,
|
pub text: String,
|
||||||
pub origin: usize,
|
pub origin: usize,
|
||||||
|
pub len: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Parser for streaming inside-out detection of URLs.
|
/// Parser for streaming inside-out detection of URLs.
|
||||||
pub struct UrlParser {
|
pub struct UrlParser {
|
||||||
state: String,
|
state: String,
|
||||||
origin: usize,
|
origin: usize,
|
||||||
|
len: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl UrlParser {
|
impl UrlParser {
|
||||||
|
@ -39,22 +43,40 @@ impl UrlParser {
|
||||||
UrlParser {
|
UrlParser {
|
||||||
state: String::new(),
|
state: String::new(),
|
||||||
origin: 0,
|
origin: 0,
|
||||||
|
len: 0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Advance the parser one character to the left.
|
/// Advance the parser one character to the left.
|
||||||
pub fn advance_left(&mut self, c: char) -> bool {
|
pub fn advance_left(&mut self, cell: &Cell) -> bool {
|
||||||
if self.advance(c, 0) {
|
if cell.flags.contains(Flags::WIDE_CHAR_SPACER) {
|
||||||
|
self.origin += 1;
|
||||||
|
self.len += 1;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.advance(cell.c, 0) {
|
||||||
true
|
true
|
||||||
} else {
|
} else {
|
||||||
self.origin += 1;
|
self.origin += 1;
|
||||||
|
self.len += 1;
|
||||||
false
|
false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Advance the parser one character to the right.
|
/// Advance the parser one character to the right.
|
||||||
pub fn advance_right(&mut self, c: char) -> bool {
|
pub fn advance_right(&mut self, cell: &Cell) -> bool {
|
||||||
self.advance(c, self.state.len())
|
if cell.flags.contains(Flags::WIDE_CHAR_SPACER) {
|
||||||
|
self.len += 1;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
if self.advance(cell.c, self.state.len()) {
|
||||||
|
true
|
||||||
|
} else {
|
||||||
|
self.len += 1;
|
||||||
|
false
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the URL if the parser has found any.
|
/// Returns the URL if the parser has found any.
|
||||||
|
@ -116,8 +138,9 @@ impl UrlParser {
|
||||||
Ok(url) => {
|
Ok(url) => {
|
||||||
if URL_SCHEMES.contains(&url.scheme()) && self.origin > 0 {
|
if URL_SCHEMES.contains(&url.scheme()) && self.origin > 0 {
|
||||||
Some(Url {
|
Some(Url {
|
||||||
text: self.state,
|
|
||||||
origin: self.origin - 1,
|
origin: self.origin - 1,
|
||||||
|
text: self.state,
|
||||||
|
len: self.len,
|
||||||
})
|
})
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
|
@ -144,10 +167,12 @@ impl UrlParser {
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::mem;
|
use std::mem;
|
||||||
|
|
||||||
|
use unicode_width::UnicodeWidthChar;
|
||||||
|
|
||||||
use crate::grid::Grid;
|
use crate::grid::Grid;
|
||||||
use crate::index::{Column, Line, Point};
|
use crate::index::{Column, Line, Point};
|
||||||
use crate::term::{Search, SizeInfo, Term};
|
use crate::term::{Search, SizeInfo, Term};
|
||||||
use crate::term::cell::Cell;
|
use crate::term::cell::{Cell, Flags};
|
||||||
use crate::message_bar::MessageBuffer;
|
use crate::message_bar::MessageBuffer;
|
||||||
|
|
||||||
fn url_create_term(input: &str) -> Term {
|
fn url_create_term(input: &str) -> Term {
|
||||||
|
@ -161,11 +186,22 @@ mod tests {
|
||||||
dpr: 1.0,
|
dpr: 1.0,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let width = input.chars().map(|c| if c.width() == Some(2) { 2 } else { 1 }).sum();
|
||||||
let mut term = Term::new(&Default::default(), size, MessageBuffer::new());
|
let mut term = Term::new(&Default::default(), size, MessageBuffer::new());
|
||||||
let mut grid: Grid<Cell> = Grid::new(Line(1), Column(input.len()), 0, Cell::default());
|
let mut grid: Grid<Cell> = Grid::new(Line(1), Column(width), 0, Cell::default());
|
||||||
|
|
||||||
for (i, c) in input.chars().enumerate() {
|
let mut i = 0;
|
||||||
|
for c in input.chars() {
|
||||||
grid[Line(0)][Column(i)].c = c;
|
grid[Line(0)][Column(i)].c = c;
|
||||||
|
|
||||||
|
if c.width() == Some(2) {
|
||||||
|
grid[Line(0)][Column(i)].flags.insert(Flags::WIDE_CHAR);
|
||||||
|
grid[Line(0)][Column(i + 1)].flags.insert(Flags::WIDE_CHAR_SPACER);
|
||||||
|
grid[Line(0)][Column(i + 1)].c = ' ';
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
i += 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
mem::swap(term.grid_mut(), &mut grid);
|
mem::swap(term.grid_mut(), &mut grid);
|
||||||
|
@ -199,6 +235,37 @@ mod tests {
|
||||||
let term = url_create_term("https://example.org");
|
let term = url_create_term("https://example.org");
|
||||||
let url = term.url_search(Point::new(0, Column(0)));
|
let url = term.url_search(Point::new(0, Column(0)));
|
||||||
assert_eq!(url.map(|u| u.origin), Some(0));
|
assert_eq!(url.map(|u| u.origin), Some(0));
|
||||||
|
|
||||||
|
let term = url_create_term("https://全.org");
|
||||||
|
let url = term.url_search(Point::new(0, Column(10)));
|
||||||
|
assert_eq!(url.map(|u| u.origin), Some(10));
|
||||||
|
|
||||||
|
let term = url_create_term("https://全.org");
|
||||||
|
let url = term.url_search(Point::new(0, Column(8)));
|
||||||
|
assert_eq!(url.map(|u| u.origin), Some(8));
|
||||||
|
|
||||||
|
let term = url_create_term("https://全.org");
|
||||||
|
let url = term.url_search(Point::new(0, Column(9)));
|
||||||
|
assert_eq!(url.map(|u| u.origin), Some(9));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn url_len() {
|
||||||
|
let term = url_create_term(" test https://example.org ");
|
||||||
|
let url = term.url_search(Point::new(0, Column(10)));
|
||||||
|
assert_eq!(url.map(|u| u.len), Some(19));
|
||||||
|
|
||||||
|
let term = url_create_term("https://全.org");
|
||||||
|
let url = term.url_search(Point::new(0, Column(0)));
|
||||||
|
assert_eq!(url.map(|u| u.len), Some(14));
|
||||||
|
|
||||||
|
let term = url_create_term("https://全.org");
|
||||||
|
let url = term.url_search(Point::new(0, Column(10)));
|
||||||
|
assert_eq!(url.map(|u| u.len), Some(14));
|
||||||
|
|
||||||
|
let term = url_create_term("https://全.org");
|
||||||
|
let url = term.url_search(Point::new(0, Column(9)));
|
||||||
|
assert_eq!(url.map(|u| u.len), Some(14));
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
|
Loading…
Reference in New Issue