use std::cmp::max; use std::mem; use std::ops::RangeInclusive; use regex_automata::{dense, DenseDFA, Error as RegexError, DFA}; use crate::grid::{BidirectionalIterator, Dimensions, GridIterator, Indexed}; use crate::index::{Boundary, Column, Direction, Point, Side}; use crate::term::cell::{Cell, Flags}; use crate::term::Term; /// Used to match equal brackets, when performing a bracket-pair selection. const BRACKET_PAIRS: [(char, char); 4] = [('(', ')'), ('[', ']'), ('{', '}'), ('<', '>')]; pub type Match = RangeInclusive; /// Terminal regex search state. #[derive(Clone, Debug)] pub struct RegexSearch { /// Locate end of match searching right. right_fdfa: DenseDFA, usize>, /// Locate start of match searching right. right_rdfa: DenseDFA, usize>, /// Locate start of match searching left. left_fdfa: DenseDFA, usize>, /// Locate end of match searching left. left_rdfa: DenseDFA, usize>, } impl RegexSearch { /// Build the forward and backward search DFAs. pub fn new(search: &str) -> Result { // Check case info for smart case let has_uppercase = search.chars().any(|c| c.is_uppercase()); // Create Regex DFAs for all search directions. let mut builder = dense::Builder::new(); let builder = builder.case_insensitive(!has_uppercase); let left_fdfa = builder.clone().reverse(true).build(search)?; let left_rdfa = builder.clone().anchored(true).longest_match(true).build(search)?; let right_fdfa = builder.clone().build(search)?; let right_rdfa = builder.anchored(true).longest_match(true).reverse(true).build(search)?; Ok(RegexSearch { right_fdfa, right_rdfa, left_fdfa, left_rdfa }) } } impl Term { /// Get next search match in the specified direction. pub fn search_next( &self, dfas: &RegexSearch, mut origin: Point, direction: Direction, side: Side, mut max_lines: Option, ) -> Option { origin = self.expand_wide(origin, direction); max_lines = max_lines.filter(|max_lines| max_lines + 1 < self.total_lines()); match direction { Direction::Right => self.next_match_right(dfas, origin, side, max_lines), Direction::Left => self.next_match_left(dfas, origin, side, max_lines), } } /// Find the next match to the right of the origin. fn next_match_right( &self, dfas: &RegexSearch, origin: Point, side: Side, max_lines: Option, ) -> Option { let start = self.line_search_left(origin); let mut end = start; // Limit maximum number of lines searched. end = match max_lines { Some(max_lines) => { let line = (start.line + max_lines).grid_clamp(self, Boundary::None); Point::new(line, self.last_column()) }, _ => end.sub(self, Boundary::None, 1), }; let mut regex_iter = RegexIter::new(start, end, Direction::Right, self, dfas).peekable(); // Check if there's any match at all. let first_match = regex_iter.peek()?.clone(); let regex_match = regex_iter .find(|regex_match| { let match_point = Self::match_side(regex_match, side); // If the match's point is beyond the origin, we're done. match_point.line < start.line || match_point.line > origin.line || (match_point.line == origin.line && match_point.column >= origin.column) }) .unwrap_or(first_match); Some(regex_match) } /// Find the next match to the left of the origin. fn next_match_left( &self, dfas: &RegexSearch, origin: Point, side: Side, max_lines: Option, ) -> Option { let start = self.line_search_right(origin); let mut end = start; // Limit maximum number of lines searched. end = match max_lines { Some(max_lines) => { let line = (start.line - max_lines).grid_clamp(self, Boundary::None); Point::new(line, Column(0)) }, _ => end.add(self, Boundary::None, 1), }; let mut regex_iter = RegexIter::new(start, end, Direction::Left, self, dfas).peekable(); // Check if there's any match at all. let first_match = regex_iter.peek()?.clone(); let regex_match = regex_iter .find(|regex_match| { let match_point = Self::match_side(regex_match, side); // If the match's point is beyond the origin, we're done. match_point.line > start.line || match_point.line < origin.line || (match_point.line == origin.line && match_point.column <= origin.column) }) .unwrap_or(first_match); Some(regex_match) } /// Get the side of a match. fn match_side(regex_match: &Match, side: Side) -> Point { match side { Side::Right => *regex_match.end(), Side::Left => *regex_match.start(), } } /// Find the next regex match to the left of the origin point. /// /// The origin is always included in the regex. pub fn regex_search_left(&self, dfas: &RegexSearch, start: Point, end: Point) -> Option { // Find start and end of match. let match_start = self.regex_search(start, end, Direction::Left, &dfas.left_fdfa)?; let match_end = self.regex_search(match_start, start, Direction::Right, &dfas.left_rdfa)?; Some(match_start..=match_end) } /// Find the next regex match to the right of the origin point. /// /// The origin is always included in the regex. pub fn regex_search_right( &self, dfas: &RegexSearch, start: Point, end: Point, ) -> Option { // Find start and end of match. let match_end = self.regex_search(start, end, Direction::Right, &dfas.right_fdfa)?; let match_start = self.regex_search(match_end, start, Direction::Left, &dfas.right_rdfa)?; Some(match_start..=match_end) } /// Find the next regex match. /// /// This will always return the side of the first match which is farthest from the start point. fn regex_search( &self, start: Point, end: Point, direction: Direction, dfa: &impl DFA, ) -> Option { let topmost_line = self.topmost_line(); let screen_lines = self.screen_lines() as i32; let last_column = self.last_column(); // Advance the iterator. let next = match direction { Direction::Right => GridIterator::next, Direction::Left => GridIterator::prev, }; let mut iter = self.grid.iter_from(start); let mut state = dfa.start_state(); let mut last_wrapped = false; let mut regex_match = None; let mut done = false; let mut cell = iter.cell(); self.skip_fullwidth(&mut iter, &mut cell, direction); let mut c = cell.c; let mut point = iter.point(); loop { // Convert char to array of bytes. let mut buf = [0; 4]; let utf8_len = c.encode_utf8(&mut buf).len(); // Pass char to DFA as individual bytes. for i in 0..utf8_len { // Inverse byte order when going left. let byte = match direction { Direction::Right => buf[i], Direction::Left => buf[utf8_len - i - 1], }; // Since we get the state from the DFA, it doesn't need to be checked. state = unsafe { dfa.next_state_unchecked(state, byte) }; } // Handle regex state changes. if dfa.is_match_or_dead_state(state) { if dfa.is_dead_state(state) { break; } else { regex_match = Some(point); } } // Stop once we've reached the target point. if point == end || done { break; } // Advance grid cell iterator. let mut cell = match next(&mut iter) { Some(Indexed { cell, .. }) => cell, None => { // Wrap around to other end of the scrollback buffer. let line = topmost_line - point.line + screen_lines - 1; let start = Point::new(line, last_column - point.column); iter = self.grid.iter_from(start); iter.cell() }, }; // Check for completion before potentially skipping over fullwidth characters. done = iter.point() == end; self.skip_fullwidth(&mut iter, &mut cell, direction); let wrapped = cell.flags.contains(Flags::WRAPLINE); c = cell.c; let last_point = mem::replace(&mut point, iter.point()); // Handle linebreaks. if (last_point.column == last_column && point.column == Column(0) && !last_wrapped) || (last_point.column == Column(0) && point.column == last_column && !wrapped) { match regex_match { Some(_) => break, None => state = dfa.start_state(), } } last_wrapped = wrapped; } regex_match } /// Advance a grid iterator over fullwidth characters. fn skip_fullwidth<'a>( &self, iter: &'a mut GridIterator<'_, Cell>, cell: &mut &'a Cell, direction: Direction, ) { match direction { // In the alternate screen buffer there might not be a wide char spacer after a wide // char, so we only advance the iterator when the wide char is not in the last column. Direction::Right if cell.flags.contains(Flags::WIDE_CHAR) && iter.point().column < self.last_column() => { iter.next(); }, Direction::Right if cell.flags.contains(Flags::LEADING_WIDE_CHAR_SPACER) => { if let Some(Indexed { cell: new_cell, .. }) = iter.next() { *cell = new_cell; } iter.next(); }, Direction::Left if cell.flags.contains(Flags::WIDE_CHAR_SPACER) => { if let Some(Indexed { cell: new_cell, .. }) = iter.prev() { *cell = new_cell; } let prev = iter.point().sub(self, Boundary::Grid, 1); if self.grid[prev].flags.contains(Flags::LEADING_WIDE_CHAR_SPACER) { iter.prev(); } }, _ => (), } } /// Find next matching bracket. pub fn bracket_search(&self, point: Point) -> Option { let start_char = self.grid[point].c; // Find the matching bracket we're looking for let (forward, end_char) = BRACKET_PAIRS.iter().find_map(|(open, close)| { if open == &start_char { Some((true, *close)) } else if close == &start_char { Some((false, *open)) } else { None } })?; let mut iter = self.grid.iter_from(point); // For every character match that equals the starting bracket, we // ignore one bracket of the opposite type. let mut skip_pairs = 0; loop { // Check the next cell let cell = if forward { iter.next() } else { iter.prev() }; // Break if there are no more cells let cell = match cell { Some(cell) => cell, None => break, }; // Check if the bracket matches if cell.c == end_char && skip_pairs == 0 { return Some(cell.point); } else if cell.c == start_char { skip_pairs += 1; } else if cell.c == end_char { skip_pairs -= 1; } } None } /// Find left end of semantic block. pub fn semantic_search_left(&self, mut point: Point) -> Point { // Limit the starting point to the last line in the history point.line = max(point.line, self.topmost_line()); let mut iter = self.grid.iter_from(point); let last_column = self.columns() - 1; let wide = Flags::WIDE_CHAR | Flags::WIDE_CHAR_SPACER | Flags::LEADING_WIDE_CHAR_SPACER; while let Some(cell) = iter.prev() { if !cell.flags.intersects(wide) && self.semantic_escape_chars.contains(cell.c) { break; } if cell.point.column == last_column && !cell.flags.contains(Flags::WRAPLINE) { break; // cut off if on new line or hit escape char } point = cell.point; } point } /// Find right end of semantic block. pub fn semantic_search_right(&self, mut point: Point) -> Point { // Limit the starting point to the last line in the history point.line = max(point.line, self.topmost_line()); let wide = Flags::WIDE_CHAR | Flags::WIDE_CHAR_SPACER | Flags::LEADING_WIDE_CHAR_SPACER; let last_column = self.columns() - 1; for cell in self.grid.iter_from(point) { if !cell.flags.intersects(wide) && self.semantic_escape_chars.contains(cell.c) { break; } point = cell.point; if point.column == last_column && !cell.flags.contains(Flags::WRAPLINE) { break; // cut off if on new line or hit escape char } } point } /// Find the beginning of the current line across linewraps. pub fn line_search_left(&self, mut point: Point) -> Point { while point.line > self.topmost_line() && self.grid[point.line - 1i32][self.last_column()].flags.contains(Flags::WRAPLINE) { point.line -= 1; } point.column = Column(0); point } /// Find the end of the current line across linewraps. pub fn line_search_right(&self, mut point: Point) -> Point { while point.line + 1 < self.screen_lines() && self.grid[point.line][self.last_column()].flags.contains(Flags::WRAPLINE) { point.line += 1; } point.column = self.last_column(); point } } /// Iterator over regex matches. pub struct RegexIter<'a, T> { point: Point, end: Point, direction: Direction, dfas: &'a RegexSearch, term: &'a Term, done: bool, } impl<'a, T> RegexIter<'a, T> { pub fn new( start: Point, end: Point, direction: Direction, term: &'a Term, dfas: &'a RegexSearch, ) -> Self { Self { point: start, done: false, end, direction, term, dfas } } /// Skip one cell, advancing the origin point to the next one. fn skip(&mut self) { self.point = self.term.expand_wide(self.point, self.direction); self.point = match self.direction { Direction::Right => self.point.add(self.term, Boundary::None, 1), Direction::Left => self.point.sub(self.term, Boundary::None, 1), }; } /// Get the next match in the specified direction. fn next_match(&self) -> Option { match self.direction { Direction::Right => self.term.regex_search_right(self.dfas, self.point, self.end), Direction::Left => self.term.regex_search_left(self.dfas, self.point, self.end), } } } impl<'a, T> Iterator for RegexIter<'a, T> { type Item = Match; fn next(&mut self) -> Option { if self.done { return None; } // Since the end itself might be a single cell match, we search one more time. if self.point == self.end { self.done = true; } let regex_match = self.next_match()?; self.point = *regex_match.end(); if self.point == self.end { // Stop when the match terminates right on the end limit. self.done = true; } else { // Move the new search origin past the match. self.skip(); } Some(regex_match) } } #[cfg(test)] mod tests { use super::*; use crate::config::Config; use crate::index::{Column, Line}; use crate::term::test::{mock_term, TermSize}; #[test] fn regex_right() { #[rustfmt::skip] let term = mock_term("\ testing66\r\n\ Alacritty\n\ 123\r\n\ Alacritty\r\n\ 123\ "); // Check regex across wrapped and unwrapped lines. let dfas = RegexSearch::new("Ala.*123").unwrap(); let start = Point::new(Line(1), Column(0)); let end = Point::new(Line(4), Column(2)); let match_start = Point::new(Line(1), Column(0)); let match_end = Point::new(Line(2), Column(2)); assert_eq!(term.regex_search_right(&dfas, start, end), Some(match_start..=match_end)); } #[test] fn regex_left() { #[rustfmt::skip] let term = mock_term("\ testing66\r\n\ Alacritty\n\ 123\r\n\ Alacritty\r\n\ 123\ "); // Check regex across wrapped and unwrapped lines. let dfas = RegexSearch::new("Ala.*123").unwrap(); let start = Point::new(Line(4), Column(2)); let end = Point::new(Line(1), Column(0)); let match_start = Point::new(Line(1), Column(0)); let match_end = Point::new(Line(2), Column(2)); assert_eq!(term.regex_search_left(&dfas, start, end), Some(match_start..=match_end)); } #[test] fn nested_regex() { #[rustfmt::skip] let term = mock_term("\ Ala -> Alacritty -> critty\r\n\ critty\ "); // Greedy stopped at linebreak. let dfas = RegexSearch::new("Ala.*critty").unwrap(); let start = Point::new(Line(0), Column(0)); let end = Point::new(Line(0), Column(25)); assert_eq!(term.regex_search_right(&dfas, start, end), Some(start..=end)); // Greedy stopped at dead state. let dfas = RegexSearch::new("Ala[^y]*critty").unwrap(); let start = Point::new(Line(0), Column(0)); let end = Point::new(Line(0), Column(15)); assert_eq!(term.regex_search_right(&dfas, start, end), Some(start..=end)); } #[test] fn no_match_right() { #[rustfmt::skip] let term = mock_term("\ first line\n\ broken second\r\n\ third\ "); let dfas = RegexSearch::new("nothing").unwrap(); let start = Point::new(Line(0), Column(0)); let end = Point::new(Line(2), Column(4)); assert_eq!(term.regex_search_right(&dfas, start, end), None); } #[test] fn no_match_left() { #[rustfmt::skip] let term = mock_term("\ first line\n\ broken second\r\n\ third\ "); let dfas = RegexSearch::new("nothing").unwrap(); let start = Point::new(Line(2), Column(4)); let end = Point::new(Line(0), Column(0)); assert_eq!(term.regex_search_left(&dfas, start, end), None); } #[test] fn include_linebreak_left() { #[rustfmt::skip] let term = mock_term("\ testing123\r\n\ xxx\ "); // Make sure the cell containing the linebreak is not skipped. let dfas = RegexSearch::new("te.*123").unwrap(); let start = Point::new(Line(1), Column(0)); let end = Point::new(Line(0), Column(0)); let match_start = Point::new(Line(0), Column(0)); let match_end = Point::new(Line(0), Column(9)); assert_eq!(term.regex_search_left(&dfas, start, end), Some(match_start..=match_end)); } #[test] fn include_linebreak_right() { #[rustfmt::skip] let term = mock_term("\ xxx\r\n\ testing123\ "); // Make sure the cell containing the linebreak is not skipped. let dfas = RegexSearch::new("te.*123").unwrap(); let start = Point::new(Line(0), Column(2)); let end = Point::new(Line(1), Column(9)); let match_start = Point::new(Line(1), Column(0)); assert_eq!(term.regex_search_right(&dfas, start, end), Some(match_start..=end)); } #[test] fn skip_dead_cell() { let term = mock_term("alacritty"); // Make sure dead state cell is skipped when reversing. let dfas = RegexSearch::new("alacrit").unwrap(); let start = Point::new(Line(0), Column(0)); let end = Point::new(Line(0), Column(6)); assert_eq!(term.regex_search_right(&dfas, start, end), Some(start..=end)); } #[test] fn reverse_search_dead_recovery() { let term = mock_term("zooo lense"); // Make sure the reverse DFA operates the same as a forward DFA. let dfas = RegexSearch::new("zoo").unwrap(); let start = Point::new(Line(0), Column(9)); let end = Point::new(Line(0), Column(0)); let match_start = Point::new(Line(0), Column(0)); let match_end = Point::new(Line(0), Column(2)); assert_eq!(term.regex_search_left(&dfas, start, end), Some(match_start..=match_end)); } #[test] fn multibyte_unicode() { let term = mock_term("testвосибing"); let dfas = RegexSearch::new("te.*ing").unwrap(); let start = Point::new(Line(0), Column(0)); let end = Point::new(Line(0), Column(11)); assert_eq!(term.regex_search_right(&dfas, start, end), Some(start..=end)); let dfas = RegexSearch::new("te.*ing").unwrap(); let start = Point::new(Line(0), Column(11)); let end = Point::new(Line(0), Column(0)); assert_eq!(term.regex_search_left(&dfas, start, end), Some(end..=start)); } #[test] fn fullwidth() { let term = mock_term("a🦇x🦇"); let dfas = RegexSearch::new("[^ ]*").unwrap(); let start = Point::new(Line(0), Column(0)); let end = Point::new(Line(0), Column(5)); assert_eq!(term.regex_search_right(&dfas, start, end), Some(start..=end)); let dfas = RegexSearch::new("[^ ]*").unwrap(); let start = Point::new(Line(0), Column(5)); let end = Point::new(Line(0), Column(0)); assert_eq!(term.regex_search_left(&dfas, start, end), Some(end..=start)); } #[test] fn singlecell_fullwidth() { let term = mock_term("🦇"); let dfas = RegexSearch::new("🦇").unwrap(); let start = Point::new(Line(0), Column(0)); let end = Point::new(Line(0), Column(1)); assert_eq!(term.regex_search_right(&dfas, start, end), Some(start..=end)); let dfas = RegexSearch::new("🦇").unwrap(); let start = Point::new(Line(0), Column(1)); let end = Point::new(Line(0), Column(0)); assert_eq!(term.regex_search_left(&dfas, start, end), Some(end..=start)); } #[test] fn end_on_fullwidth() { let term = mock_term("jarr🦇"); let start = Point::new(Line(0), Column(0)); let end = Point::new(Line(0), Column(4)); // Ensure ending without a match doesn't loop indefinitely. let dfas = RegexSearch::new("x").unwrap(); assert_eq!(term.regex_search_right(&dfas, start, end), None); let dfas = RegexSearch::new("x").unwrap(); let match_end = Point::new(Line(0), Column(5)); assert_eq!(term.regex_search_right(&dfas, start, match_end), None); // Ensure match is captured when only partially inside range. let dfas = RegexSearch::new("jarr🦇").unwrap(); assert_eq!(term.regex_search_right(&dfas, start, end), Some(start..=match_end)); } #[test] fn wrapping() { #[rustfmt::skip] let term = mock_term("\ xxx\r\n\ xxx\ "); let dfas = RegexSearch::new("xxx").unwrap(); let start = Point::new(Line(0), Column(2)); let end = Point::new(Line(1), Column(2)); let match_start = Point::new(Line(1), Column(0)); assert_eq!(term.regex_search_right(&dfas, start, end), Some(match_start..=end)); let dfas = RegexSearch::new("xxx").unwrap(); let start = Point::new(Line(1), Column(0)); let end = Point::new(Line(0), Column(0)); let match_end = Point::new(Line(0), Column(2)); assert_eq!(term.regex_search_left(&dfas, start, end), Some(end..=match_end)); } #[test] fn wrapping_into_fullwidth() { #[rustfmt::skip] let term = mock_term("\ 🦇xx\r\n\ xx🦇\ "); let dfas = RegexSearch::new("🦇x").unwrap(); let start = Point::new(Line(0), Column(0)); let end = Point::new(Line(1), Column(3)); let match_start = Point::new(Line(0), Column(0)); let match_end = Point::new(Line(0), Column(2)); assert_eq!(term.regex_search_right(&dfas, start, end), Some(match_start..=match_end)); let dfas = RegexSearch::new("x🦇").unwrap(); let start = Point::new(Line(1), Column(2)); let end = Point::new(Line(0), Column(0)); let match_start = Point::new(Line(1), Column(1)); let match_end = Point::new(Line(1), Column(3)); assert_eq!(term.regex_search_left(&dfas, start, end), Some(match_start..=match_end)); } #[test] fn leading_spacer() { #[rustfmt::skip] let mut term = mock_term("\ xxx \n\ 🦇xx\ "); term.grid[Line(0)][Column(3)].flags.insert(Flags::LEADING_WIDE_CHAR_SPACER); let dfas = RegexSearch::new("🦇x").unwrap(); let start = Point::new(Line(0), Column(0)); let end = Point::new(Line(1), Column(3)); let match_start = Point::new(Line(0), Column(3)); let match_end = Point::new(Line(1), Column(2)); assert_eq!(term.regex_search_right(&dfas, start, end), Some(match_start..=match_end)); let dfas = RegexSearch::new("🦇x").unwrap(); let start = Point::new(Line(1), Column(3)); let end = Point::new(Line(0), Column(0)); let match_start = Point::new(Line(0), Column(3)); let match_end = Point::new(Line(1), Column(2)); assert_eq!(term.regex_search_left(&dfas, start, end), Some(match_start..=match_end)); let dfas = RegexSearch::new("x🦇").unwrap(); let start = Point::new(Line(0), Column(0)); let end = Point::new(Line(1), Column(3)); let match_start = Point::new(Line(0), Column(2)); let match_end = Point::new(Line(1), Column(1)); assert_eq!(term.regex_search_right(&dfas, start, end), Some(match_start..=match_end)); let dfas = RegexSearch::new("x🦇").unwrap(); let start = Point::new(Line(1), Column(3)); let end = Point::new(Line(0), Column(0)); let match_start = Point::new(Line(0), Column(2)); let match_end = Point::new(Line(1), Column(1)); assert_eq!(term.regex_search_left(&dfas, start, end), Some(match_start..=match_end)); } #[test] fn wide_without_spacer() { let size = TermSize::new(2, 2); let mut term = Term::new(&Config::default(), &size, ()); term.grid[Line(0)][Column(0)].c = 'x'; term.grid[Line(0)][Column(1)].c = '字'; term.grid[Line(0)][Column(1)].flags = Flags::WIDE_CHAR; let dfas = RegexSearch::new("test").unwrap(); let start = Point::new(Line(0), Column(0)); let end = Point::new(Line(0), Column(1)); let mut iter = RegexIter::new(start, end, Direction::Right, &term, &dfas); assert_eq!(iter.next(), None); } #[test] fn wrap_around_to_another_end() { #[rustfmt::skip] let term = mock_term("\ abc\r\n\ def\ "); // Bottom to top. let dfas = RegexSearch::new("abc").unwrap(); let start = Point::new(Line(1), Column(0)); let end = Point::new(Line(0), Column(2)); let match_start = Point::new(Line(0), Column(0)); let match_end = Point::new(Line(0), Column(2)); assert_eq!(term.regex_search_right(&dfas, start, end), Some(match_start..=match_end)); // Top to bottom. let dfas = RegexSearch::new("def").unwrap(); let start = Point::new(Line(0), Column(2)); let end = Point::new(Line(1), Column(0)); let match_start = Point::new(Line(1), Column(0)); let match_end = Point::new(Line(1), Column(2)); assert_eq!(term.regex_search_left(&dfas, start, end), Some(match_start..=match_end)); } }