repubmark/src/lexer.rs

146 lines
4.2 KiB
Rust

use crate::input::{self, Input};
use crate::models::{Pos, Token};
use crate::tokenizer::Tokenizer;
pub type Result = std::result::Result<Token, Error>;
pub struct Lexer<'a> {
input: Input<'a>,
cur: Option<input::Result>,
pos: Pos,
tokenizer: Tokenizer,
}
#[derive(Debug)]
pub enum Error {
Input(input::Error),
}
impl<'a> Lexer<'a> {
pub fn new(mut input: Input<'a>) -> Self {
let cur = input.next();
Self {
input,
cur,
pos: Pos::default(),
tokenizer: Tokenizer::default(),
}
}
}
impl Iterator for Lexer<'_> {
type Item = Result;
fn next(&mut self) -> Option<Self::Item> {
match &self.cur {
None => match self.tokenizer.iter(None) {
None => None,
Some(value) => Some(Ok(Token {
value,
pos: self.pos,
})),
},
Some(result) => match result {
Err(_) => Some(Err(Error::Input(
self.cur.take().unwrap().unwrap_err(),
))),
Ok(chr) => {
let chr = *chr;
self.cur = self.input.next();
match self.tokenizer.iter(Some(chr.value())) {
None => self.next(),
Some(value) => {
let token = Token {
value,
pos: self.pos,
};
self.pos = chr.pos();
Some(Ok(token))
}
}
}
},
}
}
}
#[cfg(test)]
mod tests {
use super::*;
fn token(value: &str, index: usize, line: usize, col: usize) -> Token {
Token {
value: String::from(value),
pos: Pos::new(index, line, col),
}
}
#[test]
fn empty() {
let mut bytes = "".as_bytes();
let mut lexer = Lexer::new(Input::new(&mut bytes));
assert!(lexer.next().is_none());
}
#[test]
fn newline() {
let mut bytes = "\n".as_bytes();
let mut lexer = Lexer::new(Input::new(&mut bytes));
assert_eq!(lexer.next().unwrap().unwrap(), token("\n", 0, 1, 1));
assert!(lexer.next().is_none());
}
#[test]
fn newline_long() {
let mut bytes = "\n\n\n".as_bytes();
let mut lexer = Lexer::new(Input::new(&mut bytes));
assert_eq!(lexer.next().unwrap().unwrap(), token("\n\n\n", 0, 1, 1));
assert!(lexer.next().is_none());
}
#[test]
fn whitespace() {
let mut bytes = " ".as_bytes();
let mut lexer = Lexer::new(Input::new(&mut bytes));
assert_eq!(lexer.next().unwrap().unwrap(), token(" ", 0, 1, 1));
assert!(lexer.next().is_none());
}
#[test]
fn whitespace_long() {
let mut bytes = " ".as_bytes();
let mut lexer = Lexer::new(Input::new(&mut bytes));
assert_eq!(lexer.next().unwrap().unwrap(), token(" ", 0, 1, 1));
assert!(lexer.next().is_none());
}
#[test]
fn other() {
let mut bytes = "x".as_bytes();
let mut lexer = Lexer::new(Input::new(&mut bytes));
assert_eq!(lexer.next().unwrap().unwrap(), token("x", 0, 1, 1));
assert!(lexer.next().is_none());
}
#[test]
fn other_long() {
let mut bytes = "abc".as_bytes();
let mut lexer = Lexer::new(Input::new(&mut bytes));
assert_eq!(lexer.next().unwrap().unwrap(), token("abc", 0, 1, 1));
assert!(lexer.next().is_none());
}
#[test]
fn together() {
let mut bytes = "\n x\n\n abc".as_bytes();
let mut lexer = Lexer::new(Input::new(&mut bytes));
assert_eq!(lexer.next().unwrap().unwrap(), token("\n", 0, 1, 1));
assert_eq!(lexer.next().unwrap().unwrap(), token(" ", 1, 2, 1));
assert_eq!(lexer.next().unwrap().unwrap(), token("x", 2, 2, 2));
assert_eq!(lexer.next().unwrap().unwrap(), token("\n\n", 3, 2, 3));
assert_eq!(lexer.next().unwrap().unwrap(), token(" ", 5, 4, 1));
assert_eq!(lexer.next().unwrap().unwrap(), token("abc", 7, 4, 3));
assert!(lexer.next().is_none());
}
}