alacritty/alacritty_terminal/src/term/search.rs

779 lines
26 KiB
Rust
Raw Normal View History

use std::cmp::min;
use std::mem;
use std::ops::RangeInclusive;
use regex_automata::{dense, DenseDFA, Error as RegexError, DFA};
use crate::grid::{BidirectionalIterator, Dimensions, GridIterator, Indexed};
use crate::index::{Boundary, Column, Direction, Point, Side};
use crate::term::cell::{Cell, Flags};
use crate::term::Term;
/// Used to match equal brackets, when performing a bracket-pair selection.
const BRACKET_PAIRS: [(char, char); 4] = [('(', ')'), ('[', ']'), ('{', '}'), ('<', '>')];
pub type Match = RangeInclusive<Point<usize>>;
/// Terminal regex search state.
#[derive(Clone, Debug)]
pub struct RegexSearch {
/// Locate end of match searching right.
right_fdfa: DenseDFA<Vec<usize>, usize>,
/// Locate start of match searching right.
right_rdfa: DenseDFA<Vec<usize>, usize>,
/// Locate start of match searching left.
left_fdfa: DenseDFA<Vec<usize>, usize>,
/// Locate end of match searching left.
left_rdfa: DenseDFA<Vec<usize>, usize>,
}
impl RegexSearch {
/// Build the forward and backward search DFAs.
pub fn new(search: &str) -> Result<RegexSearch, RegexError> {
// Check case info for smart case
let has_uppercase = search.chars().any(|c| c.is_uppercase());
// Create Regex DFAs for all search directions.
let mut builder = dense::Builder::new();
let builder = builder.case_insensitive(!has_uppercase);
let left_fdfa = builder.clone().reverse(true).build(search)?;
let left_rdfa = builder.clone().anchored(true).longest_match(true).build(search)?;
let right_fdfa = builder.clone().build(search)?;
let right_rdfa = builder.anchored(true).longest_match(true).reverse(true).build(search)?;
Ok(RegexSearch { right_fdfa, right_rdfa, left_fdfa, left_rdfa })
}
}
impl<T> Term<T> {
/// Get next search match in the specified direction.
pub fn search_next(
&self,
dfas: &RegexSearch,
mut origin: Point<usize>,
direction: Direction,
side: Side,
mut max_lines: Option<usize>,
) -> Option<Match> {
origin = self.expand_wide(origin, direction);
max_lines = max_lines.filter(|max_lines| max_lines + 1 < self.total_lines());
match direction {
Direction::Right => self.next_match_right(dfas, origin, side, max_lines),
Direction::Left => self.next_match_left(dfas, origin, side, max_lines),
}
}
/// Find the next match to the right of the origin.
fn next_match_right(
&self,
dfas: &RegexSearch,
origin: Point<usize>,
side: Side,
max_lines: Option<usize>,
) -> Option<Match> {
let start = self.line_search_left(origin);
let mut end = start;
// Limit maximum number of lines searched.
let total_lines = self.total_lines();
end = match max_lines {
Some(max_lines) => {
let line = (start.line + total_lines - max_lines) % total_lines;
Point::new(line, self.cols() - 1)
},
_ => end.sub_absolute(self, Boundary::Wrap, 1),
};
let mut regex_iter = RegexIter::new(start, end, Direction::Right, &self, dfas).peekable();
// Check if there's any match at all.
let first_match = regex_iter.peek()?.clone();
let regex_match = regex_iter
.find(|regex_match| {
let match_point = Self::match_side(&regex_match, side);
// If the match's point is beyond the origin, we're done.
match_point.line > start.line
|| match_point.line < origin.line
|| (match_point.line == origin.line && match_point.column >= origin.column)
})
.unwrap_or(first_match);
Some(regex_match)
}
/// Find the next match to the left of the origin.
fn next_match_left(
&self,
dfas: &RegexSearch,
origin: Point<usize>,
side: Side,
max_lines: Option<usize>,
) -> Option<Match> {
let start = self.line_search_right(origin);
let mut end = start;
// Limit maximum number of lines searched.
end = match max_lines {
Some(max_lines) => Point::new((start.line + max_lines) % self.total_lines(), Column(0)),
_ => end.add_absolute(self, Boundary::Wrap, 1),
};
let mut regex_iter = RegexIter::new(start, end, Direction::Left, &self, dfas).peekable();
// Check if there's any match at all.
let first_match = regex_iter.peek()?.clone();
let regex_match = regex_iter
.find(|regex_match| {
let match_point = Self::match_side(&regex_match, side);
// If the match's point is beyond the origin, we're done.
match_point.line < start.line
|| match_point.line > origin.line
|| (match_point.line == origin.line && match_point.column <= origin.column)
})
.unwrap_or(first_match);
Some(regex_match)
}
/// Get the side of a match.
fn match_side(regex_match: &Match, side: Side) -> Point<usize> {
match side {
Side::Right => *regex_match.end(),
Side::Left => *regex_match.start(),
}
}
/// Find the next regex match to the left of the origin point.
///
/// The origin is always included in the regex.
pub fn regex_search_left(
&self,
dfas: &RegexSearch,
start: Point<usize>,
end: Point<usize>,
) -> Option<Match> {
// Find start and end of match.
let match_start = self.regex_search(start, end, Direction::Left, &dfas.left_fdfa)?;
let match_end = self.regex_search(match_start, start, Direction::Right, &dfas.left_rdfa)?;
Some(match_start..=match_end)
}
/// Find the next regex match to the right of the origin point.
///
/// The origin is always included in the regex.
pub fn regex_search_right(
&self,
dfas: &RegexSearch,
start: Point<usize>,
end: Point<usize>,
) -> Option<Match> {
// Find start and end of match.
let match_end = self.regex_search(start, end, Direction::Right, &dfas.right_fdfa)?;
let match_start = self.regex_search(match_end, start, Direction::Left, &dfas.right_rdfa)?;
Some(match_start..=match_end)
}
/// Find the next regex match.
///
/// This will always return the side of the first match which is farthest from the start point.
fn regex_search(
&self,
start: Point<usize>,
end: Point<usize>,
direction: Direction,
dfa: &impl DFA,
) -> Option<Point<usize>> {
let last_line = self.total_lines() - 1;
let last_col = self.cols() - 1;
// Advance the iterator.
let next = match direction {
Direction::Right => GridIterator::next,
Direction::Left => GridIterator::prev,
};
let mut iter = self.grid.iter_from(start);
let mut state = dfa.start_state();
let mut last_wrapped = false;
let mut regex_match = None;
let mut cell = iter.cell();
self.skip_fullwidth(&mut iter, &mut cell, direction);
let mut c = cell.c;
let mut point = iter.point();
loop {
// Convert char to array of bytes.
let mut buf = [0; 4];
let utf8_len = c.encode_utf8(&mut buf).len();
// Pass char to DFA as individual bytes.
for i in 0..utf8_len {
// Inverse byte order when going left.
let byte = match direction {
Direction::Right => buf[i],
Direction::Left => buf[utf8_len - i - 1],
};
// Since we get the state from the DFA, it doesn't need to be checked.
state = unsafe { dfa.next_state_unchecked(state, byte) };
}
// Handle regex state changes.
if dfa.is_match_or_dead_state(state) {
if dfa.is_dead_state(state) {
break;
} else {
regex_match = Some(point);
}
}
// Stop once we've reached the target point.
if point == end {
break;
}
// Advance grid cell iterator.
let mut cell = match next(&mut iter) {
Some(Indexed { cell, .. }) => cell,
None => {
// Wrap around to other end of the scrollback buffer.
let start = Point::new(last_line - point.line, last_col - point.column);
iter = self.grid.iter_from(start);
iter.cell()
},
};
self.skip_fullwidth(&mut iter, &mut cell, direction);
let wrapped = cell.flags.contains(Flags::WRAPLINE);
c = cell.c;
let last_point = mem::replace(&mut point, iter.point());
// Handle linebreaks.
if (last_point.column == last_col && point.column == Column(0) && !last_wrapped)
|| (last_point.column == Column(0) && point.column == last_col && !wrapped)
{
match regex_match {
Some(_) => break,
None => state = dfa.start_state(),
}
}
last_wrapped = wrapped;
}
regex_match
}
/// Advance a grid iterator over fullwidth characters.
fn skip_fullwidth<'a>(
&self,
iter: &'a mut GridIterator<'_, Cell>,
cell: &mut &'a Cell,
direction: Direction,
) {
match direction {
Direction::Right if cell.flags.contains(Flags::WIDE_CHAR) => {
iter.next();
},
Direction::Right if cell.flags.contains(Flags::LEADING_WIDE_CHAR_SPACER) => {
if let Some(Indexed { cell: new_cell, .. }) = iter.next() {
*cell = new_cell;
}
iter.next();
},
Direction::Left if cell.flags.contains(Flags::WIDE_CHAR_SPACER) => {
if let Some(Indexed { cell: new_cell, .. }) = iter.prev() {
*cell = new_cell;
}
let prev = iter.point().sub_absolute(self, Boundary::Clamp, 1);
if self.grid[prev].flags.contains(Flags::LEADING_WIDE_CHAR_SPACER) {
iter.prev();
}
},
_ => (),
}
}
/// Find next matching bracket.
pub fn bracket_search(&self, point: Point<usize>) -> Option<Point<usize>> {
let start_char = self.grid[point.line][point.column].c;
// Find the matching bracket we're looking for
let (forward, end_char) = BRACKET_PAIRS.iter().find_map(|(open, close)| {
if open == &start_char {
Some((true, *close))
} else if close == &start_char {
Some((false, *open))
} else {
None
}
})?;
let mut iter = self.grid.iter_from(point);
// For every character match that equals the starting bracket, we
// ignore one bracket of the opposite type.
let mut skip_pairs = 0;
loop {
// Check the next cell
let cell = if forward { iter.next() } else { iter.prev() };
// Break if there are no more cells
let cell = match cell {
Some(cell) => cell,
None => break,
};
// Check if the bracket matches
if cell.c == end_char && skip_pairs == 0 {
return Some(cell.point);
} else if cell.c == start_char {
skip_pairs += 1;
} else if cell.c == end_char {
skip_pairs -= 1;
}
}
None
}
/// Find left end of semantic block.
pub fn semantic_search_left(&self, mut point: Point<usize>) -> Point<usize> {
// Limit the starting point to the last line in the history
point.line = min(point.line, self.total_lines() - 1);
let mut iter = self.grid.iter_from(point);
let last_col = self.cols() - Column(1);
let wide = Flags::WIDE_CHAR | Flags::WIDE_CHAR_SPACER | Flags::LEADING_WIDE_CHAR_SPACER;
while let Some(cell) = iter.prev() {
if !cell.flags.intersects(wide) && self.semantic_escape_chars.contains(cell.c) {
break;
}
if cell.point.column == last_col && !cell.flags.contains(Flags::WRAPLINE) {
break; // cut off if on new line or hit escape char
}
point = cell.point;
}
point
}
/// Find right end of semantic block.
pub fn semantic_search_right(&self, mut point: Point<usize>) -> Point<usize> {
// Limit the starting point to the last line in the history
point.line = min(point.line, self.total_lines() - 1);
let wide = Flags::WIDE_CHAR | Flags::WIDE_CHAR_SPACER | Flags::LEADING_WIDE_CHAR_SPACER;
let last_col = self.cols() - 1;
for cell in self.grid.iter_from(point) {
if !cell.flags.intersects(wide) && self.semantic_escape_chars.contains(cell.c) {
break;
}
point = cell.point;
if point.column == last_col && !cell.flags.contains(Flags::WRAPLINE) {
break; // cut off if on new line or hit escape char
}
}
point
}
/// Find the beginning of the current line across linewraps.
pub fn line_search_left(&self, mut point: Point<usize>) -> Point<usize> {
while point.line + 1 < self.total_lines()
&& self.grid[point.line + 1][self.cols() - 1].flags.contains(Flags::WRAPLINE)
{
point.line += 1;
}
point.column = Column(0);
point
}
/// Find the end of the current line across linewraps.
pub fn line_search_right(&self, mut point: Point<usize>) -> Point<usize> {
while point.line > 0
&& self.grid[point.line][self.cols() - 1].flags.contains(Flags::WRAPLINE)
{
point.line -= 1;
}
point.column = self.cols() - 1;
point
}
}
/// Iterator over regex matches.
pub struct RegexIter<'a, T> {
point: Point<usize>,
end: Point<usize>,
direction: Direction,
dfas: &'a RegexSearch,
term: &'a Term<T>,
done: bool,
}
impl<'a, T> RegexIter<'a, T> {
pub fn new(
start: Point<usize>,
end: Point<usize>,
direction: Direction,
term: &'a Term<T>,
dfas: &'a RegexSearch,
) -> Self {
Self { point: start, done: false, end, direction, term, dfas }
}
/// Skip one cell, advancing the origin point to the next one.
fn skip(&mut self) {
self.point = self.term.expand_wide(self.point, self.direction);
self.point = match self.direction {
Direction::Right => self.point.add_absolute(self.term, Boundary::Wrap, 1),
Direction::Left => self.point.sub_absolute(self.term, Boundary::Wrap, 1),
};
}
/// Get the next match in the specified direction.
fn next_match(&self) -> Option<Match> {
match self.direction {
Direction::Right => self.term.regex_search_right(self.dfas, self.point, self.end),
Direction::Left => self.term.regex_search_left(self.dfas, self.point, self.end),
}
}
}
impl<'a, T> Iterator for RegexIter<'a, T> {
type Item = Match;
fn next(&mut self) -> Option<Self::Item> {
if self.done {
return None;
}
// Since the end itself might be a single cell match, we search one more time.
if self.point == self.end {
self.done = true;
}
let regex_match = self.next_match()?;
self.point = *regex_match.end();
if self.point == self.end {
// Stop when the match terminates right on the end limit.
self.done = true;
} else {
// Move the new search origin past the match.
self.skip();
}
Some(regex_match)
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::index::Column;
use crate::term::test::mock_term;
#[test]
fn regex_right() {
#[rustfmt::skip]
let term = mock_term("\
testing66\r\n\
Alacritty\n\
123\r\n\
Alacritty\r\n\
123\
");
// Check regex across wrapped and unwrapped lines.
let dfas = RegexSearch::new("Ala.*123").unwrap();
let start = Point::new(3, Column(0));
let end = Point::new(0, Column(2));
let match_start = Point::new(3, Column(0));
let match_end = Point::new(2, Column(2));
assert_eq!(term.regex_search_right(&dfas, start, end), Some(match_start..=match_end));
}
#[test]
fn regex_left() {
#[rustfmt::skip]
let term = mock_term("\
testing66\r\n\
Alacritty\n\
123\r\n\
Alacritty\r\n\
123\
");
// Check regex across wrapped and unwrapped lines.
let dfas = RegexSearch::new("Ala.*123").unwrap();
let start = Point::new(0, Column(2));
let end = Point::new(3, Column(0));
let match_start = Point::new(3, Column(0));
let match_end = Point::new(2, Column(2));
assert_eq!(term.regex_search_left(&dfas, start, end), Some(match_start..=match_end));
}
#[test]
fn nested_regex() {
#[rustfmt::skip]
let term = mock_term("\
Ala -> Alacritty -> critty\r\n\
critty\
");
// Greedy stopped at linebreak.
let dfas = RegexSearch::new("Ala.*critty").unwrap();
let start = Point::new(1, Column(0));
let end = Point::new(1, Column(25));
assert_eq!(term.regex_search_right(&dfas, start, end), Some(start..=end));
// Greedy stopped at dead state.
let dfas = RegexSearch::new("Ala[^y]*critty").unwrap();
let start = Point::new(1, Column(0));
let end = Point::new(1, Column(15));
assert_eq!(term.regex_search_right(&dfas, start, end), Some(start..=end));
}
#[test]
fn no_match_right() {
#[rustfmt::skip]
let term = mock_term("\
first line\n\
broken second\r\n\
third\
");
let dfas = RegexSearch::new("nothing").unwrap();
let start = Point::new(2, Column(0));
let end = Point::new(0, Column(4));
assert_eq!(term.regex_search_right(&dfas, start, end), None);
}
#[test]
fn no_match_left() {
#[rustfmt::skip]
let term = mock_term("\
first line\n\
broken second\r\n\
third\
");
let dfas = RegexSearch::new("nothing").unwrap();
let start = Point::new(0, Column(4));
let end = Point::new(2, Column(0));
assert_eq!(term.regex_search_left(&dfas, start, end), None);
}
#[test]
fn include_linebreak_left() {
#[rustfmt::skip]
let term = mock_term("\
testing123\r\n\
xxx\
");
// Make sure the cell containing the linebreak is not skipped.
let dfas = RegexSearch::new("te.*123").unwrap();
let start = Point::new(0, Column(0));
let end = Point::new(1, Column(0));
let match_start = Point::new(1, Column(0));
let match_end = Point::new(1, Column(9));
assert_eq!(term.regex_search_left(&dfas, start, end), Some(match_start..=match_end));
}
#[test]
fn include_linebreak_right() {
#[rustfmt::skip]
let term = mock_term("\
xxx\r\n\
testing123\
");
// Make sure the cell containing the linebreak is not skipped.
let dfas = RegexSearch::new("te.*123").unwrap();
let start = Point::new(1, Column(2));
let end = Point::new(0, Column(9));
let match_start = Point::new(0, Column(0));
assert_eq!(term.regex_search_right(&dfas, start, end), Some(match_start..=end));
}
#[test]
fn skip_dead_cell() {
let term = mock_term("alacritty");
// Make sure dead state cell is skipped when reversing.
let dfas = RegexSearch::new("alacrit").unwrap();
let start = Point::new(0, Column(0));
let end = Point::new(0, Column(6));
assert_eq!(term.regex_search_right(&dfas, start, end), Some(start..=end));
}
#[test]
fn reverse_search_dead_recovery() {
let term = mock_term("zooo lense");
// Make sure the reverse DFA operates the same as a forward DFA.
let dfas = RegexSearch::new("zoo").unwrap();
let start = Point::new(0, Column(9));
let end = Point::new(0, Column(0));
let match_start = Point::new(0, Column(0));
let match_end = Point::new(0, Column(2));
assert_eq!(term.regex_search_left(&dfas, start, end), Some(match_start..=match_end));
}
#[test]
fn multibyte_unicode() {
let term = mock_term("testвосибing");
let dfas = RegexSearch::new("te.*ing").unwrap();
let start = Point::new(0, Column(0));
let end = Point::new(0, Column(11));
assert_eq!(term.regex_search_right(&dfas, start, end), Some(start..=end));
let dfas = RegexSearch::new("te.*ing").unwrap();
let start = Point::new(0, Column(11));
let end = Point::new(0, Column(0));
assert_eq!(term.regex_search_left(&dfas, start, end), Some(end..=start));
}
#[test]
fn fullwidth() {
let term = mock_term("a🦇x🦇");
let dfas = RegexSearch::new("[^ ]*").unwrap();
let start = Point::new(0, Column(0));
let end = Point::new(0, Column(5));
assert_eq!(term.regex_search_right(&dfas, start, end), Some(start..=end));
let dfas = RegexSearch::new("[^ ]*").unwrap();
let start = Point::new(0, Column(5));
let end = Point::new(0, Column(0));
assert_eq!(term.regex_search_left(&dfas, start, end), Some(end..=start));
}
#[test]
fn singlecell_fullwidth() {
let term = mock_term("🦇");
let dfas = RegexSearch::new("🦇").unwrap();
let start = Point::new(0, Column(0));
let end = Point::new(0, Column(1));
assert_eq!(term.regex_search_right(&dfas, start, end), Some(start..=end));
let dfas = RegexSearch::new("🦇").unwrap();
let start = Point::new(0, Column(1));
let end = Point::new(0, Column(0));
assert_eq!(term.regex_search_left(&dfas, start, end), Some(end..=start));
}
#[test]
fn wrapping() {
#[rustfmt::skip]
let term = mock_term("\
xxx\r\n\
xxx\
");
let dfas = RegexSearch::new("xxx").unwrap();
let start = Point::new(0, Column(2));
let end = Point::new(1, Column(2));
let match_start = Point::new(1, Column(0));
assert_eq!(term.regex_search_right(&dfas, start, end), Some(match_start..=end));
let dfas = RegexSearch::new("xxx").unwrap();
let start = Point::new(1, Column(0));
let end = Point::new(0, Column(0));
let match_end = Point::new(0, Column(2));
assert_eq!(term.regex_search_left(&dfas, start, end), Some(end..=match_end));
}
#[test]
fn wrapping_into_fullwidth() {
#[rustfmt::skip]
let term = mock_term("\
🦇xx\r\n\
xx🦇\
");
let dfas = RegexSearch::new("🦇x").unwrap();
let start = Point::new(0, Column(0));
let end = Point::new(1, Column(3));
let match_start = Point::new(1, Column(0));
let match_end = Point::new(1, Column(2));
assert_eq!(term.regex_search_right(&dfas, start, end), Some(match_start..=match_end));
let dfas = RegexSearch::new("x🦇").unwrap();
let start = Point::new(1, Column(2));
let end = Point::new(0, Column(0));
let match_start = Point::new(0, Column(1));
let match_end = Point::new(0, Column(3));
assert_eq!(term.regex_search_left(&dfas, start, end), Some(match_start..=match_end));
}
#[test]
fn leading_spacer() {
#[rustfmt::skip]
let mut term = mock_term("\
xxx \n\
🦇xx\
");
term.grid[1][Column(3)].flags.insert(Flags::LEADING_WIDE_CHAR_SPACER);
let dfas = RegexSearch::new("🦇x").unwrap();
let start = Point::new(1, Column(0));
let end = Point::new(0, Column(3));
let match_start = Point::new(1, Column(3));
let match_end = Point::new(0, Column(2));
assert_eq!(term.regex_search_right(&dfas, start, end), Some(match_start..=match_end));
let dfas = RegexSearch::new("🦇x").unwrap();
let start = Point::new(0, Column(3));
let end = Point::new(1, Column(0));
let match_start = Point::new(1, Column(3));
let match_end = Point::new(0, Column(2));
assert_eq!(term.regex_search_left(&dfas, start, end), Some(match_start..=match_end));
let dfas = RegexSearch::new("x🦇").unwrap();
let start = Point::new(1, Column(0));
let end = Point::new(0, Column(3));
let match_start = Point::new(1, Column(2));
let match_end = Point::new(0, Column(1));
assert_eq!(term.regex_search_right(&dfas, start, end), Some(match_start..=match_end));
let dfas = RegexSearch::new("x🦇").unwrap();
let start = Point::new(0, Column(3));
let end = Point::new(1, Column(0));
let match_start = Point::new(1, Column(2));
let match_end = Point::new(0, Column(1));
assert_eq!(term.regex_search_left(&dfas, start, end), Some(match_start..=match_end));
}
}