146 lines
4.2 KiB
Rust
146 lines
4.2 KiB
Rust
use crate::input::{self, Input};
|
|
use crate::models::{Pos, Token};
|
|
use crate::tokenizer::Tokenizer;
|
|
|
|
pub type Result = std::result::Result<Token, Error>;
|
|
|
|
pub struct Lexer<'a> {
|
|
input: Input<'a>,
|
|
cur: Option<input::Result>,
|
|
pos: Pos,
|
|
tokenizer: Tokenizer,
|
|
}
|
|
|
|
#[derive(Debug)]
|
|
pub enum Error {
|
|
Input(input::Error),
|
|
}
|
|
|
|
impl<'a> Lexer<'a> {
|
|
pub fn new(mut input: Input<'a>) -> Self {
|
|
let cur = input.next();
|
|
Self {
|
|
input,
|
|
cur,
|
|
pos: Pos::default(),
|
|
tokenizer: Tokenizer::default(),
|
|
}
|
|
}
|
|
}
|
|
|
|
impl Iterator for Lexer<'_> {
|
|
type Item = Result;
|
|
|
|
fn next(&mut self) -> Option<Self::Item> {
|
|
match &self.cur {
|
|
None => match self.tokenizer.iter(None) {
|
|
None => None,
|
|
Some(value) => Some(Ok(Token {
|
|
value,
|
|
pos: self.pos,
|
|
})),
|
|
},
|
|
Some(result) => match result {
|
|
Err(_) => Some(Err(Error::Input(
|
|
self.cur.take().unwrap().unwrap_err(),
|
|
))),
|
|
Ok(chr) => {
|
|
let chr = *chr;
|
|
self.cur = self.input.next();
|
|
match self.tokenizer.iter(Some(chr.value())) {
|
|
None => self.next(),
|
|
Some(value) => {
|
|
let token = Token {
|
|
value,
|
|
pos: self.pos,
|
|
};
|
|
self.pos = chr.pos();
|
|
Some(Ok(token))
|
|
}
|
|
}
|
|
}
|
|
},
|
|
}
|
|
}
|
|
}
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
fn token(value: &str, index: usize, line: usize, col: usize) -> Token {
|
|
Token {
|
|
value: String::from(value),
|
|
pos: Pos::new(index, line, col),
|
|
}
|
|
}
|
|
|
|
#[test]
|
|
fn empty() {
|
|
let mut bytes = "".as_bytes();
|
|
let mut lexer = Lexer::new(Input::new(&mut bytes));
|
|
assert!(lexer.next().is_none());
|
|
}
|
|
|
|
#[test]
|
|
fn newline() {
|
|
let mut bytes = "\n".as_bytes();
|
|
let mut lexer = Lexer::new(Input::new(&mut bytes));
|
|
assert_eq!(lexer.next().unwrap().unwrap(), token("\n", 0, 1, 1));
|
|
assert!(lexer.next().is_none());
|
|
}
|
|
|
|
#[test]
|
|
fn newline_long() {
|
|
let mut bytes = "\n\n\n".as_bytes();
|
|
let mut lexer = Lexer::new(Input::new(&mut bytes));
|
|
assert_eq!(lexer.next().unwrap().unwrap(), token("\n\n\n", 0, 1, 1));
|
|
assert!(lexer.next().is_none());
|
|
}
|
|
|
|
#[test]
|
|
fn whitespace() {
|
|
let mut bytes = " ".as_bytes();
|
|
let mut lexer = Lexer::new(Input::new(&mut bytes));
|
|
assert_eq!(lexer.next().unwrap().unwrap(), token(" ", 0, 1, 1));
|
|
assert!(lexer.next().is_none());
|
|
}
|
|
|
|
#[test]
|
|
fn whitespace_long() {
|
|
let mut bytes = " ".as_bytes();
|
|
let mut lexer = Lexer::new(Input::new(&mut bytes));
|
|
assert_eq!(lexer.next().unwrap().unwrap(), token(" ", 0, 1, 1));
|
|
assert!(lexer.next().is_none());
|
|
}
|
|
|
|
#[test]
|
|
fn other() {
|
|
let mut bytes = "x".as_bytes();
|
|
let mut lexer = Lexer::new(Input::new(&mut bytes));
|
|
assert_eq!(lexer.next().unwrap().unwrap(), token("x", 0, 1, 1));
|
|
assert!(lexer.next().is_none());
|
|
}
|
|
|
|
#[test]
|
|
fn other_long() {
|
|
let mut bytes = "abc".as_bytes();
|
|
let mut lexer = Lexer::new(Input::new(&mut bytes));
|
|
assert_eq!(lexer.next().unwrap().unwrap(), token("abc", 0, 1, 1));
|
|
assert!(lexer.next().is_none());
|
|
}
|
|
|
|
#[test]
|
|
fn together() {
|
|
let mut bytes = "\n x\n\n abc".as_bytes();
|
|
let mut lexer = Lexer::new(Input::new(&mut bytes));
|
|
assert_eq!(lexer.next().unwrap().unwrap(), token("\n", 0, 1, 1));
|
|
assert_eq!(lexer.next().unwrap().unwrap(), token(" ", 1, 2, 1));
|
|
assert_eq!(lexer.next().unwrap().unwrap(), token("x", 2, 2, 2));
|
|
assert_eq!(lexer.next().unwrap().unwrap(), token("\n\n", 3, 2, 3));
|
|
assert_eq!(lexer.next().unwrap().unwrap(), token(" ", 5, 4, 1));
|
|
assert_eq!(lexer.next().unwrap().unwrap(), token("abc", 7, 4, 3));
|
|
assert!(lexer.next().is_none());
|
|
}
|
|
}
|