From 4f2d50e9b8279f209ba0d721d675e3338c3244e7 Mon Sep 17 00:00:00 2001 From: Alex Kotov Date: Sun, 2 Apr 2023 15:02:15 +0400 Subject: [PATCH] Filter input chars --- src/input.rs | 35 ++++++++++++++++++++++++++++------- src/lexer.rs | 8 ++++---- src/models.rs | 22 ++++++++++++++++++---- 3 files changed, 50 insertions(+), 15 deletions(-) diff --git a/src/input.rs b/src/input.rs index 2eb7946..1661ce5 100644 --- a/src/input.rs +++ b/src/input.rs @@ -1,10 +1,16 @@ use crate::models::{Char, Pos}; -pub use std::io::{Error, Result}; - use std::io::BufRead; use std::vec::IntoIter; +pub type Result = std::result::Result; + +#[derive(Debug)] +pub enum Error { + IO(std::io::Error), + InvalidChar(char), +} + pub struct Input<'a> { read: &'a mut dyn BufRead, fin: bool, @@ -12,6 +18,18 @@ pub struct Input<'a> { pos: Pos, } +impl From for Error { + fn from(err: std::io::Error) -> Self { + Self::IO(err) + } +} + +impl From for Error { + fn from(chr: char) -> Self { + Self::InvalidChar(chr) + } +} + impl<'a> Input<'a> { pub fn new(read: &'a mut dyn BufRead) -> Self { Self { @@ -27,7 +45,7 @@ impl<'a> Input<'a> { } impl Iterator for Input<'_> { - type Item = Result; + type Item = Result; fn next(&mut self) -> Option { if self.fin { @@ -35,10 +53,13 @@ impl Iterator for Input<'_> { } if let Some(chr) = self.iter.next() { - let result = Some(Ok(Char::new(chr, self.pos))); + let chr = Char::new(chr, self.pos); self.pos.index += 1; self.pos.col += 1; - return result; + return match chr { + Err(chr) => Some(Err(chr.into())), + Ok(chr) => Some(Ok(chr)), + }; } let mut buffer = String::new(); @@ -53,7 +74,7 @@ impl Iterator for Input<'_> { } Err(err) => { self.fin = true; - Some(Err(err)) + Some(Err(err.into())) } } } @@ -64,7 +85,7 @@ mod tests { use super::*; fn chr(value: char, index: usize, line: usize, col: usize) -> Char { - Char::new(value, Pos::new(index, line, col)) + Char::new(value, Pos::new(index, line, col)).unwrap() } #[test] diff --git a/src/lexer.rs b/src/lexer.rs index 5ed0774..0d3884c 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -1,12 +1,12 @@ use crate::input::{self, Input}; -use crate::models::{Char, Pos, Token}; +use crate::models::{Pos, Token}; use crate::tokenizer::Tokenizer; pub type Result = std::result::Result; pub struct Lexer<'a> { input: Input<'a>, - cur: Option>, + cur: Option, pos: Pos, tokenizer: Tokenizer, } @@ -47,14 +47,14 @@ impl Iterator for Lexer<'_> { Ok(chr) => { let chr = *chr; self.cur = self.input.next(); - match self.tokenizer.iter(Some(chr.value)) { + match self.tokenizer.iter(Some(chr.value())) { None => self.next(), Some(value) => { let token = Token { value, pos: self.pos, }; - self.pos = chr.pos; + self.pos = chr.pos(); Some(Ok(token)) } } diff --git a/src/models.rs b/src/models.rs index 636992c..4aa497d 100644 --- a/src/models.rs +++ b/src/models.rs @@ -1,7 +1,9 @@ +const MIN_CHAR: char = ' '; // ASCII code: 32 + #[derive(Clone, Copy, Debug, Eq, PartialEq)] pub struct Char { - pub value: char, - pub pos: Pos, + value: char, + pos: Pos, } #[derive(Clone, Copy, Debug, Eq, PartialEq)] @@ -18,8 +20,20 @@ pub struct Token { } impl Char { - pub fn new(value: char, pos: Pos) -> Self { - Self { value, pos } + pub fn new(value: char, pos: Pos) -> Result { + if value >= MIN_CHAR || value == ' ' || value == '\n' { + Ok(Self { value, pos }) + } else { + Err(value) + } + } + + pub fn value(&self) -> char { + self.value + } + + pub fn pos(&self) -> Pos { + self.pos } }