Fix regex memory usage
This fixes an issue where regexes with a large number of possible states would consume excessive memory, since the entire DFA was compiled ahead of time. To solve this, the DFA is now built at runtime using `regex-automata`'s hybrid DFA. There are however still some checks performed ahead of time, causing errors with obscenely large regexes (`[0-9A-Za-z]{999999999}`), which shouldn't cause any issues. A regex which is large, but not large enough to fail the NFA construction (like `[0-9A-Za-z]{999999}`) will cause a long search of the entire grid, but will complete and show the match. Closes #7097.
This commit is contained in:
parent
77aa9f42ba
commit
e35e5ad14f
|
@ -49,6 +49,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
|||
- Cut off wide characters in preedit string
|
||||
- Scrolling on touchscreens
|
||||
- Double clicking on CSD titlebar not always maximizing a window on Wayland
|
||||
- Excessive memory usage when using regexes with a large number of possible states
|
||||
|
||||
### Removed
|
||||
|
||||
|
|
|
@ -485,7 +485,7 @@ impl LazyRegex {
|
|||
/// Execute a function with the compiled regex DFAs as parameter.
|
||||
pub fn with_compiled<T, F>(&self, f: F) -> Option<T>
|
||||
where
|
||||
F: FnMut(&RegexSearch) -> T,
|
||||
F: FnMut(&mut RegexSearch) -> T,
|
||||
{
|
||||
self.0.borrow_mut().compiled().map(f)
|
||||
}
|
||||
|
@ -514,7 +514,7 @@ impl LazyRegexVariant {
|
|||
///
|
||||
/// If the regex is not already compiled, this will compile the DFAs and store them for future
|
||||
/// access.
|
||||
fn compiled(&mut self) -> Option<&RegexSearch> {
|
||||
fn compiled(&mut self) -> Option<&mut RegexSearch> {
|
||||
// Check if the regex has already been compiled.
|
||||
let regex = match self {
|
||||
Self::Compiled(regex_search) => return Some(regex_search),
|
||||
|
@ -578,8 +578,8 @@ mod tests {
|
|||
"ftp://ftp.example.org",
|
||||
] {
|
||||
let term = mock_term(regular_url);
|
||||
let regex = RegexSearch::new(URL_REGEX).unwrap();
|
||||
let matches = visible_regex_match_iter(&term, ®ex).collect::<Vec<_>>();
|
||||
let mut regex = RegexSearch::new(URL_REGEX).unwrap();
|
||||
let matches = visible_regex_match_iter(&term, &mut regex).collect::<Vec<_>>();
|
||||
assert_eq!(
|
||||
matches.len(),
|
||||
1,
|
||||
|
@ -599,8 +599,8 @@ mod tests {
|
|||
"mailto:",
|
||||
] {
|
||||
let term = mock_term(url_like);
|
||||
let regex = RegexSearch::new(URL_REGEX).unwrap();
|
||||
let matches = visible_regex_match_iter(&term, ®ex).collect::<Vec<_>>();
|
||||
let mut regex = RegexSearch::new(URL_REGEX).unwrap();
|
||||
let matches = visible_regex_match_iter(&term, &mut regex).collect::<Vec<_>>();
|
||||
assert!(
|
||||
matches.is_empty(),
|
||||
"Should not match url in string {url_like}, but instead got: {matches:?}"
|
||||
|
|
|
@ -41,7 +41,7 @@ impl<'a> RenderableContent<'a> {
|
|||
config: &'a UiConfig,
|
||||
display: &'a mut Display,
|
||||
term: &'a Term<T>,
|
||||
search_state: &'a SearchState,
|
||||
search_state: &'a mut SearchState,
|
||||
) -> Self {
|
||||
let search = search_state.dfas().map(|dfas| HintMatches::visible_regex_matches(term, dfas));
|
||||
let focused_match = search_state.focused_match();
|
||||
|
@ -486,7 +486,7 @@ impl<'a> HintMatches<'a> {
|
|||
}
|
||||
|
||||
/// Create from regex matches on term visable part.
|
||||
fn visible_regex_matches<T>(term: &Term<T>, dfas: &RegexSearch) -> Self {
|
||||
fn visible_regex_matches<T>(term: &Term<T>, dfas: &mut RegexSearch) -> Self {
|
||||
let matches = hint::visible_regex_match_iter(term, dfas).collect::<Vec<_>>();
|
||||
Self::new(matches)
|
||||
}
|
||||
|
|
|
@ -90,7 +90,8 @@ impl HintState {
|
|||
|
||||
// Apply post-processing and search for sub-matches if necessary.
|
||||
if hint.post_processing {
|
||||
self.matches.extend(matches.flat_map(|rm| {
|
||||
let mut matches = matches.collect::<Vec<_>>();
|
||||
self.matches.extend(matches.drain(..).flat_map(|rm| {
|
||||
HintPostProcessor::new(term, regex, rm).collect::<Vec<_>>()
|
||||
}));
|
||||
} else {
|
||||
|
@ -289,7 +290,7 @@ impl HintLabels {
|
|||
/// Iterate over all visible regex matches.
|
||||
pub fn visible_regex_match_iter<'a, T>(
|
||||
term: &'a Term<T>,
|
||||
regex: &'a RegexSearch,
|
||||
regex: &'a mut RegexSearch,
|
||||
) -> impl Iterator<Item = Match> + 'a {
|
||||
let viewport_start = Line(-(term.grid().display_offset() as i32));
|
||||
let viewport_end = viewport_start + term.bottommost_line();
|
||||
|
@ -344,7 +345,7 @@ pub fn visible_unique_hyperlinks_iter<T>(term: &Term<T>) -> impl Iterator<Item =
|
|||
fn regex_match_at<T>(
|
||||
term: &Term<T>,
|
||||
point: Point,
|
||||
regex: &RegexSearch,
|
||||
regex: &mut RegexSearch,
|
||||
post_processing: bool,
|
||||
) -> Option<Match> {
|
||||
let regex_match = visible_regex_match_iter(term, regex).find(|rm| rm.contains(&point))?;
|
||||
|
@ -450,7 +451,7 @@ fn hyperlink_at<T>(term: &Term<T>, point: Point) -> Option<(Hyperlink, Match)> {
|
|||
/// Iterator over all post-processed matches inside an existing hint match.
|
||||
struct HintPostProcessor<'a, T> {
|
||||
/// Regex search DFAs.
|
||||
regex: &'a RegexSearch,
|
||||
regex: &'a mut RegexSearch,
|
||||
|
||||
/// Terminal reference.
|
||||
term: &'a Term<T>,
|
||||
|
@ -467,7 +468,7 @@ struct HintPostProcessor<'a, T> {
|
|||
|
||||
impl<'a, T> HintPostProcessor<'a, T> {
|
||||
/// Create a new iterator for an unprocessed match.
|
||||
fn new(term: &'a Term<T>, regex: &'a RegexSearch, regex_match: Match) -> Self {
|
||||
fn new(term: &'a Term<T>, regex: &'a mut RegexSearch, regex_match: Match) -> Self {
|
||||
let mut post_processor = Self {
|
||||
next_match: None,
|
||||
start: *regex_match.start(),
|
||||
|
@ -638,11 +639,11 @@ mod tests {
|
|||
fn closed_bracket_does_not_result_in_infinite_iterator() {
|
||||
let term = mock_term(" ) ");
|
||||
|
||||
let search = RegexSearch::new("[^/ ]").unwrap();
|
||||
let mut search = RegexSearch::new("[^/ ]").unwrap();
|
||||
|
||||
let count = HintPostProcessor::new(
|
||||
&term,
|
||||
&search,
|
||||
&mut search,
|
||||
Point::new(Line(0), Column(1))..=Point::new(Line(0), Column(1)),
|
||||
)
|
||||
.take(1)
|
||||
|
@ -694,9 +695,9 @@ mod tests {
|
|||
// The Term returned from this call will have a viewport starting at 0 and ending at 4096.
|
||||
// That's good enough for this test, since it only cares about visible content.
|
||||
let term = mock_term(&content);
|
||||
let regex = RegexSearch::new("match!").unwrap();
|
||||
let mut regex = RegexSearch::new("match!").unwrap();
|
||||
|
||||
// The interator should match everything in the viewport.
|
||||
assert_eq!(visible_regex_match_iter(&term, ®ex).count(), 4096);
|
||||
assert_eq!(visible_regex_match_iter(&term, &mut regex).count(), 4096);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -759,7 +759,7 @@ impl Display {
|
|||
scheduler: &mut Scheduler,
|
||||
message_buffer: &MessageBuffer,
|
||||
config: &UiConfig,
|
||||
search_state: &SearchState,
|
||||
search_state: &mut SearchState,
|
||||
) {
|
||||
// Collect renderable content before the terminal is dropped.
|
||||
let mut content = RenderableContent::new(config, self, &terminal, search_state);
|
||||
|
|
|
@ -154,8 +154,8 @@ impl SearchState {
|
|||
}
|
||||
|
||||
/// Active search dfas.
|
||||
pub fn dfas(&self) -> Option<&RegexSearch> {
|
||||
self.dfas.as_ref()
|
||||
pub fn dfas(&mut self) -> Option<&mut RegexSearch> {
|
||||
self.dfas.as_mut()
|
||||
}
|
||||
|
||||
/// Search regex text if a search is active.
|
||||
|
@ -637,7 +637,7 @@ impl<'a, N: Notify + 'a, T: EventListener> input::ActionContext<T> for ActionCon
|
|||
fn search_next(&mut self, origin: Point, direction: Direction, side: Side) -> Option<Match> {
|
||||
self.search_state
|
||||
.dfas
|
||||
.as_ref()
|
||||
.as_mut()
|
||||
.and_then(|dfas| self.terminal.search_next(dfas, origin, direction, side, None))
|
||||
}
|
||||
|
||||
|
@ -913,7 +913,7 @@ impl<'a, N: Notify + 'a, T: EventListener> ActionContext<'a, N, T> {
|
|||
|
||||
/// Jump to the first regex match from the search origin.
|
||||
fn goto_match(&mut self, mut limit: Option<usize>) {
|
||||
let dfas = match &self.search_state.dfas {
|
||||
let dfas = match &mut self.search_state.dfas {
|
||||
Some(dfas) => dfas,
|
||||
None => return,
|
||||
};
|
||||
|
|
|
@ -398,7 +398,7 @@ impl WindowContext {
|
|||
scheduler,
|
||||
&self.message_buffer,
|
||||
&self.config,
|
||||
&self.search_state,
|
||||
&mut self.search_state,
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
use std::cmp::max;
|
||||
use std::error::Error;
|
||||
use std::mem;
|
||||
use std::ops::RangeInclusive;
|
||||
|
||||
pub use regex_automata::dfa::dense::BuildError;
|
||||
use regex_automata::dfa::dense::{Builder, Config, DFA};
|
||||
use regex_automata::dfa::Automaton;
|
||||
use log::{debug, warn};
|
||||
use regex_automata::hybrid::dfa::{Builder, Cache, Config, DFA};
|
||||
pub use regex_automata::hybrid::BuildError;
|
||||
use regex_automata::nfa::thompson::Config as ThompsonConfig;
|
||||
use regex_automata::util::syntax::Config as SyntaxConfig;
|
||||
use regex_automata::{Anchored, Input};
|
||||
|
@ -17,38 +18,59 @@ use crate::term::Term;
|
|||
/// Used to match equal brackets, when performing a bracket-pair selection.
|
||||
const BRACKET_PAIRS: [(char, char); 4] = [('(', ')'), ('[', ']'), ('{', '}'), ('<', '>')];
|
||||
|
||||
/// Maximum DFA size to prevent pathological regexes taking down the entire system.
|
||||
const MAX_DFA_SIZE: usize = 100_000_000;
|
||||
|
||||
pub type Match = RangeInclusive<Point>;
|
||||
|
||||
/// Terminal regex search state.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct RegexSearch {
|
||||
dfa: DFA<Vec<u32>>,
|
||||
rdfa: DFA<Vec<u32>>,
|
||||
fdfa: LazyDfa,
|
||||
rdfa: LazyDfa,
|
||||
}
|
||||
|
||||
impl RegexSearch {
|
||||
/// Build the forward and backward search DFAs.
|
||||
pub fn new(search: &str) -> Result<RegexSearch, Box<BuildError>> {
|
||||
// Setup configs for both DFA directions.
|
||||
//
|
||||
// Bounds are based on Regex's meta engine:
|
||||
// https://github.com/rust-lang/regex/blob/061ee815ef2c44101dba7b0b124600fcb03c1912/regex-automata/src/meta/wrappers.rs#L581-L599
|
||||
let has_uppercase = search.chars().any(|c| c.is_uppercase());
|
||||
let syntax_config = SyntaxConfig::new().case_insensitive(!has_uppercase);
|
||||
let config = Config::new().dfa_size_limit(Some(MAX_DFA_SIZE));
|
||||
let config =
|
||||
Config::new().minimum_cache_clear_count(Some(3)).minimum_bytes_per_state(Some(10));
|
||||
let max_size = config.get_cache_capacity();
|
||||
let mut thompson_config = ThompsonConfig::new().nfa_size_limit(Some(max_size));
|
||||
|
||||
// Create Regex DFA for left-to-right search.
|
||||
let dfa = Builder::new().configure(config.clone()).syntax(syntax_config).build(search)?;
|
||||
let fdfa = Builder::new()
|
||||
.configure(config.clone())
|
||||
.syntax(syntax_config)
|
||||
.thompson(thompson_config.clone())
|
||||
.build(search)?;
|
||||
|
||||
// Create Regex DFA for right-to-left search.
|
||||
let thompson_config = ThompsonConfig::new().reverse(true);
|
||||
thompson_config = thompson_config.reverse(true);
|
||||
let rdfa = Builder::new()
|
||||
.configure(config)
|
||||
.syntax(syntax_config)
|
||||
.thompson(thompson_config)
|
||||
.build(search)?;
|
||||
|
||||
Ok(RegexSearch { dfa, rdfa })
|
||||
Ok(RegexSearch { fdfa: fdfa.into(), rdfa: rdfa.into() })
|
||||
}
|
||||
}
|
||||
|
||||
/// Runtime-evaluated DFA.
|
||||
#[derive(Clone, Debug)]
|
||||
struct LazyDfa {
|
||||
dfa: DFA,
|
||||
cache: Cache,
|
||||
}
|
||||
|
||||
impl From<DFA> for LazyDfa {
|
||||
fn from(dfa: DFA) -> Self {
|
||||
let cache = dfa.create_cache();
|
||||
Self { dfa, cache }
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -56,7 +78,7 @@ impl<T> Term<T> {
|
|||
/// Get next search match in the specified direction.
|
||||
pub fn search_next(
|
||||
&self,
|
||||
regex: &RegexSearch,
|
||||
regex: &mut RegexSearch,
|
||||
mut origin: Point,
|
||||
direction: Direction,
|
||||
side: Side,
|
||||
|
@ -75,7 +97,7 @@ impl<T> Term<T> {
|
|||
/// Find the next match to the right of the origin.
|
||||
fn next_match_right(
|
||||
&self,
|
||||
regex: &RegexSearch,
|
||||
regex: &mut RegexSearch,
|
||||
origin: Point,
|
||||
side: Side,
|
||||
max_lines: Option<usize>,
|
||||
|
@ -114,7 +136,7 @@ impl<T> Term<T> {
|
|||
/// Find the next match to the left of the origin.
|
||||
fn next_match_left(
|
||||
&self,
|
||||
regex: &RegexSearch,
|
||||
regex: &mut RegexSearch,
|
||||
origin: Point,
|
||||
side: Side,
|
||||
max_lines: Option<usize>,
|
||||
|
@ -163,14 +185,14 @@ impl<T> Term<T> {
|
|||
/// The origin is always included in the regex.
|
||||
pub fn regex_search_left(
|
||||
&self,
|
||||
regex: &RegexSearch,
|
||||
regex: &mut RegexSearch,
|
||||
start: Point,
|
||||
end: Point,
|
||||
) -> Option<Match> {
|
||||
// Find start and end of match.
|
||||
let match_start = self.regex_search(start, end, Direction::Left, false, ®ex.rdfa)?;
|
||||
let match_start = self.regex_search(start, end, Direction::Left, false, &mut regex.rdfa)?;
|
||||
let match_end =
|
||||
self.regex_search(match_start, start, Direction::Right, true, ®ex.dfa)?;
|
||||
self.regex_search(match_start, start, Direction::Right, true, &mut regex.fdfa)?;
|
||||
|
||||
Some(match_start..=match_end)
|
||||
}
|
||||
|
@ -180,14 +202,14 @@ impl<T> Term<T> {
|
|||
/// The origin is always included in the regex.
|
||||
pub fn regex_search_right(
|
||||
&self,
|
||||
regex: &RegexSearch,
|
||||
regex: &mut RegexSearch,
|
||||
start: Point,
|
||||
end: Point,
|
||||
) -> Option<Match> {
|
||||
// Find start and end of match.
|
||||
let match_end = self.regex_search(start, end, Direction::Right, false, ®ex.dfa)?;
|
||||
let match_end = self.regex_search(start, end, Direction::Right, false, &mut regex.fdfa)?;
|
||||
let match_start =
|
||||
self.regex_search(match_end, start, Direction::Left, true, ®ex.rdfa)?;
|
||||
self.regex_search(match_end, start, Direction::Left, true, &mut regex.rdfa)?;
|
||||
|
||||
Some(match_start..=match_end)
|
||||
}
|
||||
|
@ -201,8 +223,29 @@ impl<T> Term<T> {
|
|||
end: Point,
|
||||
direction: Direction,
|
||||
anchored: bool,
|
||||
regex: &impl Automaton,
|
||||
regex: &mut LazyDfa,
|
||||
) -> Option<Point> {
|
||||
match self.regex_search_internal(start, end, direction, anchored, regex) {
|
||||
Ok(regex_match) => regex_match,
|
||||
Err(err) => {
|
||||
warn!("Regex exceeded complexity limit");
|
||||
debug!(" {err}");
|
||||
None
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Find the next regex match.
|
||||
///
|
||||
/// To automatically log regex complexity errors, use [`Self::regex_search`] instead.
|
||||
fn regex_search_internal(
|
||||
&self,
|
||||
start: Point,
|
||||
end: Point,
|
||||
direction: Direction,
|
||||
anchored: bool,
|
||||
regex: &mut LazyDfa,
|
||||
) -> Result<Option<Point>, Box<dyn Error>> {
|
||||
let topmost_line = self.topmost_line();
|
||||
let screen_lines = self.screen_lines() as i32;
|
||||
let last_column = self.last_column();
|
||||
|
@ -216,8 +259,7 @@ impl<T> Term<T> {
|
|||
// Get start state for the DFA.
|
||||
let regex_anchored = if anchored { Anchored::Yes } else { Anchored::No };
|
||||
let input = Input::new(&[]).anchored(regex_anchored);
|
||||
let start_state = regex.start_state_forward(&input).unwrap();
|
||||
let mut state = start_state;
|
||||
let mut state = regex.dfa.start_state_forward(&mut regex.cache, &input).unwrap();
|
||||
|
||||
let mut iter = self.grid.iter_from(start);
|
||||
let mut last_wrapped = false;
|
||||
|
@ -244,19 +286,18 @@ impl<T> Term<T> {
|
|||
Direction::Left => buf[utf8_len - i - 1],
|
||||
};
|
||||
|
||||
// Since we get the state from the DFA, it doesn't need to be checked.
|
||||
state = unsafe { regex.next_state_unchecked(state, byte) };
|
||||
state = regex.dfa.next_state(&mut regex.cache, state, byte)?;
|
||||
|
||||
// Matches require one additional BYTE of lookahead, so we check the match state for
|
||||
// the first byte of every new character to determine if the last character was a
|
||||
// match.
|
||||
if i == 0 && regex.is_match_state(state) {
|
||||
if i == 0 && state.is_match() {
|
||||
regex_match = Some(last_point);
|
||||
}
|
||||
}
|
||||
|
||||
// Abort on dead states.
|
||||
if regex.is_dead_state(state) {
|
||||
if state.is_dead() {
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -264,8 +305,8 @@ impl<T> Term<T> {
|
|||
if point == end || done {
|
||||
// When reaching the end-of-input, we need to notify the parser that no look-ahead
|
||||
// is possible and check if the current state is still a match.
|
||||
state = regex.next_eoi_state(state);
|
||||
if regex.is_match_state(state) {
|
||||
state = regex.dfa.next_eoi_state(&mut regex.cache, state)?;
|
||||
if state.is_match() {
|
||||
regex_match = Some(point);
|
||||
}
|
||||
|
||||
|
@ -303,12 +344,12 @@ impl<T> Term<T> {
|
|||
None => {
|
||||
// When reaching the end-of-input, we need to notify the parser that no
|
||||
// look-ahead is possible and check if the current state is still a match.
|
||||
state = regex.next_eoi_state(state);
|
||||
if regex.is_match_state(state) {
|
||||
state = regex.dfa.next_eoi_state(&mut regex.cache, state)?;
|
||||
if state.is_match() {
|
||||
regex_match = Some(last_point);
|
||||
}
|
||||
|
||||
state = start_state;
|
||||
state = regex.dfa.start_state_forward(&mut regex.cache, &input)?;
|
||||
},
|
||||
}
|
||||
}
|
||||
|
@ -316,7 +357,7 @@ impl<T> Term<T> {
|
|||
last_wrapped = wrapped;
|
||||
}
|
||||
|
||||
regex_match
|
||||
Ok(regex_match)
|
||||
}
|
||||
|
||||
/// Advance a grid iterator over fullwidth characters.
|
||||
|
@ -478,7 +519,7 @@ pub struct RegexIter<'a, T> {
|
|||
point: Point,
|
||||
end: Point,
|
||||
direction: Direction,
|
||||
regex: &'a RegexSearch,
|
||||
regex: &'a mut RegexSearch,
|
||||
term: &'a Term<T>,
|
||||
done: bool,
|
||||
}
|
||||
|
@ -489,7 +530,7 @@ impl<'a, T> RegexIter<'a, T> {
|
|||
end: Point,
|
||||
direction: Direction,
|
||||
term: &'a Term<T>,
|
||||
regex: &'a RegexSearch,
|
||||
regex: &'a mut RegexSearch,
|
||||
) -> Self {
|
||||
Self { point: start, done: false, end, direction, term, regex }
|
||||
}
|
||||
|
@ -505,7 +546,7 @@ impl<'a, T> RegexIter<'a, T> {
|
|||
}
|
||||
|
||||
/// Get the next match in the specified direction.
|
||||
fn next_match(&self) -> Option<Match> {
|
||||
fn next_match(&mut self) -> Option<Match> {
|
||||
match self.direction {
|
||||
Direction::Right => self.term.regex_search_right(self.regex, self.point, self.end),
|
||||
Direction::Left => self.term.regex_search_left(self.regex, self.point, self.end),
|
||||
|
@ -561,12 +602,12 @@ mod tests {
|
|||
");
|
||||
|
||||
// Check regex across wrapped and unwrapped lines.
|
||||
let regex = RegexSearch::new("Ala.*123").unwrap();
|
||||
let mut regex = RegexSearch::new("Ala.*123").unwrap();
|
||||
let start = Point::new(Line(1), Column(0));
|
||||
let end = Point::new(Line(4), Column(2));
|
||||
let match_start = Point::new(Line(1), Column(0));
|
||||
let match_end = Point::new(Line(2), Column(2));
|
||||
assert_eq!(term.regex_search_right(®ex, start, end), Some(match_start..=match_end));
|
||||
assert_eq!(term.regex_search_right(&mut regex, start, end), Some(match_start..=match_end));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -581,12 +622,12 @@ mod tests {
|
|||
");
|
||||
|
||||
// Check regex across wrapped and unwrapped lines.
|
||||
let regex = RegexSearch::new("Ala.*123").unwrap();
|
||||
let mut regex = RegexSearch::new("Ala.*123").unwrap();
|
||||
let start = Point::new(Line(4), Column(2));
|
||||
let end = Point::new(Line(1), Column(0));
|
||||
let match_start = Point::new(Line(1), Column(0));
|
||||
let match_end = Point::new(Line(2), Column(2));
|
||||
assert_eq!(term.regex_search_left(®ex, start, end), Some(match_start..=match_end));
|
||||
assert_eq!(term.regex_search_left(&mut regex, start, end), Some(match_start..=match_end));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -598,16 +639,16 @@ mod tests {
|
|||
");
|
||||
|
||||
// Greedy stopped at linebreak.
|
||||
let regex = RegexSearch::new("Ala.*critty").unwrap();
|
||||
let mut regex = RegexSearch::new("Ala.*critty").unwrap();
|
||||
let start = Point::new(Line(0), Column(0));
|
||||
let end = Point::new(Line(0), Column(25));
|
||||
assert_eq!(term.regex_search_right(®ex, start, end), Some(start..=end));
|
||||
assert_eq!(term.regex_search_right(&mut regex, start, end), Some(start..=end));
|
||||
|
||||
// Greedy stopped at dead state.
|
||||
let regex = RegexSearch::new("Ala[^y]*critty").unwrap();
|
||||
let mut regex = RegexSearch::new("Ala[^y]*critty").unwrap();
|
||||
let start = Point::new(Line(0), Column(0));
|
||||
let end = Point::new(Line(0), Column(15));
|
||||
assert_eq!(term.regex_search_right(®ex, start, end), Some(start..=end));
|
||||
assert_eq!(term.regex_search_right(&mut regex, start, end), Some(start..=end));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -619,10 +660,10 @@ mod tests {
|
|||
third\
|
||||
");
|
||||
|
||||
let regex = RegexSearch::new("nothing").unwrap();
|
||||
let mut regex = RegexSearch::new("nothing").unwrap();
|
||||
let start = Point::new(Line(0), Column(0));
|
||||
let end = Point::new(Line(2), Column(4));
|
||||
assert_eq!(term.regex_search_right(®ex, start, end), None);
|
||||
assert_eq!(term.regex_search_right(&mut regex, start, end), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -634,10 +675,10 @@ mod tests {
|
|||
third\
|
||||
");
|
||||
|
||||
let regex = RegexSearch::new("nothing").unwrap();
|
||||
let mut regex = RegexSearch::new("nothing").unwrap();
|
||||
let start = Point::new(Line(2), Column(4));
|
||||
let end = Point::new(Line(0), Column(0));
|
||||
assert_eq!(term.regex_search_left(®ex, start, end), None);
|
||||
assert_eq!(term.regex_search_left(&mut regex, start, end), None);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -649,12 +690,12 @@ mod tests {
|
|||
");
|
||||
|
||||
// Make sure the cell containing the linebreak is not skipped.
|
||||
let regex = RegexSearch::new("te.*123").unwrap();
|
||||
let mut regex = RegexSearch::new("te.*123").unwrap();
|
||||
let start = Point::new(Line(1), Column(0));
|
||||
let end = Point::new(Line(0), Column(0));
|
||||
let match_start = Point::new(Line(0), Column(0));
|
||||
let match_end = Point::new(Line(0), Column(9));
|
||||
assert_eq!(term.regex_search_left(®ex, start, end), Some(match_start..=match_end));
|
||||
assert_eq!(term.regex_search_left(&mut regex, start, end), Some(match_start..=match_end));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -666,11 +707,11 @@ mod tests {
|
|||
");
|
||||
|
||||
// Make sure the cell containing the linebreak is not skipped.
|
||||
let regex = RegexSearch::new("te.*123").unwrap();
|
||||
let mut regex = RegexSearch::new("te.*123").unwrap();
|
||||
let start = Point::new(Line(0), Column(2));
|
||||
let end = Point::new(Line(1), Column(9));
|
||||
let match_start = Point::new(Line(1), Column(0));
|
||||
assert_eq!(term.regex_search_right(®ex, start, end), Some(match_start..=end));
|
||||
assert_eq!(term.regex_search_right(&mut regex, start, end), Some(match_start..=end));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -678,10 +719,10 @@ mod tests {
|
|||
let term = mock_term("alacritty");
|
||||
|
||||
// Make sure dead state cell is skipped when reversing.
|
||||
let regex = RegexSearch::new("alacrit").unwrap();
|
||||
let mut regex = RegexSearch::new("alacrit").unwrap();
|
||||
let start = Point::new(Line(0), Column(0));
|
||||
let end = Point::new(Line(0), Column(6));
|
||||
assert_eq!(term.regex_search_right(®ex, start, end), Some(start..=end));
|
||||
assert_eq!(term.regex_search_right(&mut regex, start, end), Some(start..=end));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -689,68 +730,68 @@ mod tests {
|
|||
let term = mock_term("zooo lense");
|
||||
|
||||
// Make sure the reverse DFA operates the same as a forward DFA.
|
||||
let regex = RegexSearch::new("zoo").unwrap();
|
||||
let mut regex = RegexSearch::new("zoo").unwrap();
|
||||
let start = Point::new(Line(0), Column(9));
|
||||
let end = Point::new(Line(0), Column(0));
|
||||
let match_start = Point::new(Line(0), Column(0));
|
||||
let match_end = Point::new(Line(0), Column(2));
|
||||
assert_eq!(term.regex_search_left(®ex, start, end), Some(match_start..=match_end));
|
||||
assert_eq!(term.regex_search_left(&mut regex, start, end), Some(match_start..=match_end));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn multibyte_unicode() {
|
||||
let term = mock_term("testвосибing");
|
||||
|
||||
let regex = RegexSearch::new("te.*ing").unwrap();
|
||||
let mut regex = RegexSearch::new("te.*ing").unwrap();
|
||||
let start = Point::new(Line(0), Column(0));
|
||||
let end = Point::new(Line(0), Column(11));
|
||||
assert_eq!(term.regex_search_right(®ex, start, end), Some(start..=end));
|
||||
assert_eq!(term.regex_search_right(&mut regex, start, end), Some(start..=end));
|
||||
|
||||
let regex = RegexSearch::new("te.*ing").unwrap();
|
||||
let mut regex = RegexSearch::new("te.*ing").unwrap();
|
||||
let start = Point::new(Line(0), Column(11));
|
||||
let end = Point::new(Line(0), Column(0));
|
||||
assert_eq!(term.regex_search_left(®ex, start, end), Some(end..=start));
|
||||
assert_eq!(term.regex_search_left(&mut regex, start, end), Some(end..=start));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn end_on_multibyte_unicode() {
|
||||
let term = mock_term("testвосиб");
|
||||
|
||||
let regex = RegexSearch::new("te.*и").unwrap();
|
||||
let mut regex = RegexSearch::new("te.*и").unwrap();
|
||||
let start = Point::new(Line(0), Column(0));
|
||||
let end = Point::new(Line(0), Column(8));
|
||||
let match_end = Point::new(Line(0), Column(7));
|
||||
assert_eq!(term.regex_search_right(®ex, start, end), Some(start..=match_end));
|
||||
assert_eq!(term.regex_search_right(&mut regex, start, end), Some(start..=match_end));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fullwidth() {
|
||||
let term = mock_term("a🦇x🦇");
|
||||
|
||||
let regex = RegexSearch::new("[^ ]*").unwrap();
|
||||
let mut regex = RegexSearch::new("[^ ]*").unwrap();
|
||||
let start = Point::new(Line(0), Column(0));
|
||||
let end = Point::new(Line(0), Column(5));
|
||||
assert_eq!(term.regex_search_right(®ex, start, end), Some(start..=end));
|
||||
assert_eq!(term.regex_search_right(&mut regex, start, end), Some(start..=end));
|
||||
|
||||
let regex = RegexSearch::new("[^ ]*").unwrap();
|
||||
let mut regex = RegexSearch::new("[^ ]*").unwrap();
|
||||
let start = Point::new(Line(0), Column(5));
|
||||
let end = Point::new(Line(0), Column(0));
|
||||
assert_eq!(term.regex_search_left(®ex, start, end), Some(end..=start));
|
||||
assert_eq!(term.regex_search_left(&mut regex, start, end), Some(end..=start));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn singlecell_fullwidth() {
|
||||
let term = mock_term("🦇");
|
||||
|
||||
let regex = RegexSearch::new("🦇").unwrap();
|
||||
let mut regex = RegexSearch::new("🦇").unwrap();
|
||||
let start = Point::new(Line(0), Column(0));
|
||||
let end = Point::new(Line(0), Column(1));
|
||||
assert_eq!(term.regex_search_right(®ex, start, end), Some(start..=end));
|
||||
assert_eq!(term.regex_search_right(&mut regex, start, end), Some(start..=end));
|
||||
|
||||
let regex = RegexSearch::new("🦇").unwrap();
|
||||
let mut regex = RegexSearch::new("🦇").unwrap();
|
||||
let start = Point::new(Line(0), Column(1));
|
||||
let end = Point::new(Line(0), Column(0));
|
||||
assert_eq!(term.regex_search_left(®ex, start, end), Some(end..=start));
|
||||
assert_eq!(term.regex_search_left(&mut regex, start, end), Some(end..=start));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -761,16 +802,16 @@ mod tests {
|
|||
let end = Point::new(Line(0), Column(4));
|
||||
|
||||
// Ensure ending without a match doesn't loop indefinitely.
|
||||
let regex = RegexSearch::new("x").unwrap();
|
||||
assert_eq!(term.regex_search_right(®ex, start, end), None);
|
||||
let mut regex = RegexSearch::new("x").unwrap();
|
||||
assert_eq!(term.regex_search_right(&mut regex, start, end), None);
|
||||
|
||||
let regex = RegexSearch::new("x").unwrap();
|
||||
let mut regex = RegexSearch::new("x").unwrap();
|
||||
let match_end = Point::new(Line(0), Column(5));
|
||||
assert_eq!(term.regex_search_right(®ex, start, match_end), None);
|
||||
assert_eq!(term.regex_search_right(&mut regex, start, match_end), None);
|
||||
|
||||
// Ensure match is captured when only partially inside range.
|
||||
let regex = RegexSearch::new("jarr🦇").unwrap();
|
||||
assert_eq!(term.regex_search_right(®ex, start, end), Some(start..=match_end));
|
||||
let mut regex = RegexSearch::new("jarr🦇").unwrap();
|
||||
assert_eq!(term.regex_search_right(&mut regex, start, end), Some(start..=match_end));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -781,17 +822,17 @@ mod tests {
|
|||
xxx\
|
||||
");
|
||||
|
||||
let regex = RegexSearch::new("xxx").unwrap();
|
||||
let mut regex = RegexSearch::new("xxx").unwrap();
|
||||
let start = Point::new(Line(0), Column(2));
|
||||
let end = Point::new(Line(1), Column(2));
|
||||
let match_start = Point::new(Line(1), Column(0));
|
||||
assert_eq!(term.regex_search_right(®ex, start, end), Some(match_start..=end));
|
||||
assert_eq!(term.regex_search_right(&mut regex, start, end), Some(match_start..=end));
|
||||
|
||||
let regex = RegexSearch::new("xxx").unwrap();
|
||||
let mut regex = RegexSearch::new("xxx").unwrap();
|
||||
let start = Point::new(Line(1), Column(0));
|
||||
let end = Point::new(Line(0), Column(0));
|
||||
let match_end = Point::new(Line(0), Column(2));
|
||||
assert_eq!(term.regex_search_left(®ex, start, end), Some(end..=match_end));
|
||||
assert_eq!(term.regex_search_left(&mut regex, start, end), Some(end..=match_end));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -802,19 +843,19 @@ mod tests {
|
|||
xx🦇\
|
||||
");
|
||||
|
||||
let regex = RegexSearch::new("🦇x").unwrap();
|
||||
let mut regex = RegexSearch::new("🦇x").unwrap();
|
||||
let start = Point::new(Line(0), Column(0));
|
||||
let end = Point::new(Line(1), Column(3));
|
||||
let match_start = Point::new(Line(0), Column(0));
|
||||
let match_end = Point::new(Line(0), Column(2));
|
||||
assert_eq!(term.regex_search_right(®ex, start, end), Some(match_start..=match_end));
|
||||
assert_eq!(term.regex_search_right(&mut regex, start, end), Some(match_start..=match_end));
|
||||
|
||||
let regex = RegexSearch::new("x🦇").unwrap();
|
||||
let mut regex = RegexSearch::new("x🦇").unwrap();
|
||||
let start = Point::new(Line(1), Column(2));
|
||||
let end = Point::new(Line(0), Column(0));
|
||||
let match_start = Point::new(Line(1), Column(1));
|
||||
let match_end = Point::new(Line(1), Column(3));
|
||||
assert_eq!(term.regex_search_left(®ex, start, end), Some(match_start..=match_end));
|
||||
assert_eq!(term.regex_search_left(&mut regex, start, end), Some(match_start..=match_end));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -826,33 +867,33 @@ mod tests {
|
|||
");
|
||||
term.grid[Line(0)][Column(3)].flags.insert(Flags::LEADING_WIDE_CHAR_SPACER);
|
||||
|
||||
let regex = RegexSearch::new("🦇x").unwrap();
|
||||
let mut regex = RegexSearch::new("🦇x").unwrap();
|
||||
let start = Point::new(Line(0), Column(0));
|
||||
let end = Point::new(Line(1), Column(3));
|
||||
let match_start = Point::new(Line(0), Column(3));
|
||||
let match_end = Point::new(Line(1), Column(2));
|
||||
assert_eq!(term.regex_search_right(®ex, start, end), Some(match_start..=match_end));
|
||||
assert_eq!(term.regex_search_right(&mut regex, start, end), Some(match_start..=match_end));
|
||||
|
||||
let regex = RegexSearch::new("🦇x").unwrap();
|
||||
let mut regex = RegexSearch::new("🦇x").unwrap();
|
||||
let start = Point::new(Line(1), Column(3));
|
||||
let end = Point::new(Line(0), Column(0));
|
||||
let match_start = Point::new(Line(0), Column(3));
|
||||
let match_end = Point::new(Line(1), Column(2));
|
||||
assert_eq!(term.regex_search_left(®ex, start, end), Some(match_start..=match_end));
|
||||
assert_eq!(term.regex_search_left(&mut regex, start, end), Some(match_start..=match_end));
|
||||
|
||||
let regex = RegexSearch::new("x🦇").unwrap();
|
||||
let mut regex = RegexSearch::new("x🦇").unwrap();
|
||||
let start = Point::new(Line(0), Column(0));
|
||||
let end = Point::new(Line(1), Column(3));
|
||||
let match_start = Point::new(Line(0), Column(2));
|
||||
let match_end = Point::new(Line(1), Column(1));
|
||||
assert_eq!(term.regex_search_right(®ex, start, end), Some(match_start..=match_end));
|
||||
assert_eq!(term.regex_search_right(&mut regex, start, end), Some(match_start..=match_end));
|
||||
|
||||
let regex = RegexSearch::new("x🦇").unwrap();
|
||||
let mut regex = RegexSearch::new("x🦇").unwrap();
|
||||
let start = Point::new(Line(1), Column(3));
|
||||
let end = Point::new(Line(0), Column(0));
|
||||
let match_start = Point::new(Line(0), Column(2));
|
||||
let match_end = Point::new(Line(1), Column(1));
|
||||
assert_eq!(term.regex_search_left(®ex, start, end), Some(match_start..=match_end));
|
||||
assert_eq!(term.regex_search_left(&mut regex, start, end), Some(match_start..=match_end));
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -863,12 +904,12 @@ mod tests {
|
|||
term.grid[Line(0)][Column(1)].c = '字';
|
||||
term.grid[Line(0)][Column(1)].flags = Flags::WIDE_CHAR;
|
||||
|
||||
let regex = RegexSearch::new("test").unwrap();
|
||||
let mut regex = RegexSearch::new("test").unwrap();
|
||||
|
||||
let start = Point::new(Line(0), Column(0));
|
||||
let end = Point::new(Line(0), Column(1));
|
||||
|
||||
let mut iter = RegexIter::new(start, end, Direction::Right, &term, ®ex);
|
||||
let mut iter = RegexIter::new(start, end, Direction::Right, &term, &mut regex);
|
||||
assert_eq!(iter.next(), None);
|
||||
}
|
||||
|
||||
|
@ -881,19 +922,34 @@ mod tests {
|
|||
");
|
||||
|
||||
// Bottom to top.
|
||||
let regex = RegexSearch::new("abc").unwrap();
|
||||
let mut regex = RegexSearch::new("abc").unwrap();
|
||||
let start = Point::new(Line(1), Column(0));
|
||||
let end = Point::new(Line(0), Column(2));
|
||||
let match_start = Point::new(Line(0), Column(0));
|
||||
let match_end = Point::new(Line(0), Column(2));
|
||||
assert_eq!(term.regex_search_right(®ex, start, end), Some(match_start..=match_end));
|
||||
assert_eq!(term.regex_search_right(&mut regex, start, end), Some(match_start..=match_end));
|
||||
|
||||
// Top to bottom.
|
||||
let regex = RegexSearch::new("def").unwrap();
|
||||
let mut regex = RegexSearch::new("def").unwrap();
|
||||
let start = Point::new(Line(0), Column(2));
|
||||
let end = Point::new(Line(1), Column(0));
|
||||
let match_start = Point::new(Line(1), Column(0));
|
||||
let match_end = Point::new(Line(1), Column(2));
|
||||
assert_eq!(term.regex_search_left(®ex, start, end), Some(match_start..=match_end));
|
||||
assert_eq!(term.regex_search_left(&mut regex, start, end), Some(match_start..=match_end));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn nfa_compile_error() {
|
||||
assert!(RegexSearch::new("[0-9A-Za-z]{9999999}").is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn runtime_cache_error() {
|
||||
let term = mock_term(&str::repeat("i", 9999));
|
||||
|
||||
let mut regex = RegexSearch::new("[0-9A-Za-z]{9999}").unwrap();
|
||||
let start = Point::new(Line(0), Column(0));
|
||||
let end = Point::new(Line(0), Column(9999));
|
||||
assert_eq!(term.regex_search_right(&mut regex, start, end), None);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue