diff --git a/Makefile b/Makefile index eb6498f..8ba893c 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ all: lisp CC = gcc CFLAGS = -Wall -Wextra -OBJS = enums.c.o main.c.o object.c.o tokens.c.o +OBJS = enums.c.o lexer.c.o main.c.o object.c.o tokens.c.o run: lisp ./lisp diff --git a/lexer.c b/lexer.c new file mode 100644 index 0000000..9d90a39 --- /dev/null +++ b/lexer.c @@ -0,0 +1,195 @@ +#include "lexer.h" + +#include "enums.h" +#include "tokens.h" + +#include +#include +#include + +static char buffer[1024]; +static size_t buffer_index = 0; +static enum State state = STATE_INIT; + +static void buffer_add(char chr); +static void buffer_clean(); +static void token_add(enum State state, char *val); + +void buffer_add(char chr) +{ + assert(buffer_index < 1000); + buffer[buffer_index++] = chr; + buffer[buffer_index] = 0; +} + +void buffer_clean() +{ + buffer_index = 0; +} + +void token_add(enum State state, char *val) +{ + if (state == STATE_WHITESPACE) return; + + enum TokenType token_type; + assert(State_to_token_type(state, &token_type)); + + tokens_push(token_type, val); +} + +void lex(const char chr) +{ + switch (state) { + case STATE_INIT: + if (chr == '(') { + state = STATE_OPEN; + buffer_add(chr); + } else if (chr == ')') { + state = STATE_CLOSE; + buffer_add(chr); + } else if (isspace(chr)) { + state = STATE_WHITESPACE; + buffer_add(chr); + } else if (isalpha(chr)) { + state = STATE_ATOM; + buffer_add(chr); + } else if (isdigit(chr)) { + state = STATE_NUM; + buffer_add(chr); + } else { + abort(); + } + break; + case STATE_WHITESPACE: + if (chr == '(') { + token_add(state, buffer); + buffer_clean(); + state = STATE_OPEN; + buffer_add(chr); + } else if (chr == ')') { + token_add(state, buffer); + buffer_clean(); + state = STATE_CLOSE; + buffer_add(chr); + } else if (isspace(chr)) { + buffer_add(chr); + } else if (isalpha(chr)) { + token_add(state, buffer); + buffer_clean(); + state = STATE_ATOM; + buffer[buffer_index++] = chr; + } else if (isdigit(chr)) { + token_add(state, buffer); + buffer_clean(); + state = STATE_NUM; + buffer[buffer_index++] = chr; + } else { + abort(); + } + break; + case STATE_OPEN: + if (chr == '(') { + token_add(state, buffer); + buffer_clean(); + state = STATE_OPEN; + buffer_add(chr); + } else if (chr == ')') { + token_add(state, buffer); + buffer_clean(); + state = STATE_CLOSE; + buffer_add(chr); + } else if (isspace(chr)) { + token_add(state, buffer); + buffer_clean(); + state = STATE_WHITESPACE; + buffer_add(chr); + } else if (isalpha(chr)) { + token_add(state, buffer); + buffer_clean(); + state = STATE_ATOM; + buffer_add(chr); + } else if (isdigit(chr)) { + token_add(state, buffer); + buffer_clean(); + state = STATE_NUM; + buffer_add(chr); + } else { + abort(); + } + break; + case STATE_CLOSE: + if (chr == '(') { + token_add(state, buffer); + buffer_clean(); + state = STATE_OPEN; + buffer_add(chr); + } else if (chr == ')') { + token_add(state, buffer); + buffer_clean(); + state = STATE_CLOSE; + buffer_add(chr); + } else if (isspace(chr)) { + token_add(state, buffer); + buffer_clean(); + state = STATE_WHITESPACE; + buffer_add(chr); + } else if (isalpha(chr)) { + token_add(state, buffer); + buffer_clean(); + state = STATE_ATOM; + buffer_add(chr); + } else if (isdigit(chr)) { + token_add(state, buffer); + buffer_clean(); + state = STATE_NUM; + buffer_add(chr); + } else { + abort(); + } + break; + case STATE_ATOM: + if (chr == '(') { + token_add(state, buffer); + buffer_clean(); + state = STATE_OPEN; + buffer_add(chr); + } else if (chr == ')') { + token_add(state, buffer); + buffer_clean(); + state = STATE_CLOSE; + buffer_add(chr); + } else if (isspace(chr)) { + token_add(state, buffer); + buffer_clean(); + state = STATE_WHITESPACE; + buffer_add(chr); + } else if (isalnum(chr)) { + buffer_add(chr); + } else { + abort(); + } + break; + case STATE_NUM: + if (chr == '(') { + token_add(state, buffer); + buffer_clean(); + state = STATE_OPEN; + buffer_add(chr); + } else if (chr == ')') { + token_add(state, buffer); + buffer_clean(); + state = STATE_CLOSE; + buffer_add(chr); + } else if (isspace(chr)) { + token_add(state, buffer); + buffer_clean(); + state = STATE_WHITESPACE; + buffer_add(chr); + } else if (isdigit(chr)) { + buffer_add(chr); + } else { + abort(); + } + break; + } +} diff --git a/lexer.h b/lexer.h new file mode 100644 index 0000000..668becd --- /dev/null +++ b/lexer.h @@ -0,0 +1,6 @@ +#ifndef __LEXER_H__ +#define __LEXER_H__ + +void lex(char chr); + +#endif diff --git a/main.c b/main.c index 901e039..f831977 100644 --- a/main.c +++ b/main.c @@ -1,30 +1,16 @@ #include "enums.h" +#include "lexer.h" #include "object.h" #include "tokens.h" #include -#include #include #include #include #include -static char buffer[1024]; -static size_t buffer_index = 0; - -static enum State state = STATE_INIT; - static void error(const char *msg); -/********* - * Lexer * - *********/ - -static void buffer_add(char chr); -static void buffer_clean(); -static void token_add(enum State state, char *val); -static void lex(char chr); - /********** * Parser * **********/ @@ -84,189 +70,6 @@ void error(const char *msg) exit(EXIT_FAILURE); } -/********* - * Lexer * - *********/ - -void buffer_add(char chr) -{ - if (buffer_index >= 1000) error("token too long"); - buffer[buffer_index++] = chr; - buffer[buffer_index] = 0; -} - -void buffer_clean() -{ - buffer_index = 0; -} - -void token_add(enum State state, char *val) -{ - if (state == STATE_WHITESPACE) return; - - enum TokenType token_type; - if (!State_to_token_type(state, &token_type)) error("invalid state"); - - tokens_push(token_type, val); -} - -void lex(char chr) -{ - switch (state) { - case STATE_INIT: - if (chr == '(') { - state = STATE_OPEN; - buffer_add(chr); - } else if (chr == ')') { - state = STATE_CLOSE; - buffer_add(chr); - } else if (isspace(chr)) { - state = STATE_WHITESPACE; - buffer_add(chr); - } else if (isalpha(chr)) { - state = STATE_ATOM; - buffer_add(chr); - } else if (isdigit(chr)) { - state = STATE_NUM; - buffer_add(chr); - } else { - error("invalid char (STATE_INIT)"); - } - break; - case STATE_WHITESPACE: - if (chr == '(') { - token_add(state, buffer); - buffer_clean(); - state = STATE_OPEN; - buffer_add(chr); - } else if (chr == ')') { - token_add(state, buffer); - buffer_clean(); - state = STATE_CLOSE; - buffer_add(chr); - } else if (isspace(chr)) { - buffer_add(chr); - } else if (isalpha(chr)) { - token_add(state, buffer); - buffer_clean(); - state = STATE_ATOM; - buffer[buffer_index++] = chr; - } else if (isdigit(chr)) { - token_add(state, buffer); - buffer_clean(); - state = STATE_NUM; - buffer[buffer_index++] = chr; - } else { - error("invalid char (STATE_WHITESPACE)"); - } - break; - case STATE_OPEN: - if (chr == '(') { - token_add(state, buffer); - buffer_clean(); - state = STATE_OPEN; - buffer_add(chr); - } else if (chr == ')') { - token_add(state, buffer); - buffer_clean(); - state = STATE_CLOSE; - buffer_add(chr); - } else if (isspace(chr)) { - token_add(state, buffer); - buffer_clean(); - state = STATE_WHITESPACE; - buffer_add(chr); - } else if (isalpha(chr)) { - token_add(state, buffer); - buffer_clean(); - state = STATE_ATOM; - buffer_add(chr); - } else if (isdigit(chr)) { - token_add(state, buffer); - buffer_clean(); - state = STATE_NUM; - buffer_add(chr); - } else { - error("invalid char (STATE_OPEN)"); - } - break; - case STATE_CLOSE: - if (chr == '(') { - token_add(state, buffer); - buffer_clean(); - state = STATE_OPEN; - buffer_add(chr); - } else if (chr == ')') { - token_add(state, buffer); - buffer_clean(); - state = STATE_CLOSE; - buffer_add(chr); - } else if (isspace(chr)) { - token_add(state, buffer); - buffer_clean(); - state = STATE_WHITESPACE; - buffer_add(chr); - } else if (isalpha(chr)) { - token_add(state, buffer); - buffer_clean(); - state = STATE_ATOM; - buffer_add(chr); - } else if (isdigit(chr)) { - token_add(state, buffer); - buffer_clean(); - state = STATE_NUM; - buffer_add(chr); - } else { - error("invalid char (STATE_CLOSE)"); - } - break; - case STATE_ATOM: - if (chr == '(') { - token_add(state, buffer); - buffer_clean(); - state = STATE_OPEN; - buffer_add(chr); - } else if (chr == ')') { - token_add(state, buffer); - buffer_clean(); - state = STATE_CLOSE; - buffer_add(chr); - } else if (isspace(chr)) { - token_add(state, buffer); - buffer_clean(); - state = STATE_WHITESPACE; - buffer_add(chr); - } else if (isalnum(chr)) { - buffer_add(chr); - } else { - error("invalid char (STATE_ATOM)"); - } - break; - case STATE_NUM: - if (chr == '(') { - token_add(state, buffer); - buffer_clean(); - state = STATE_OPEN; - buffer_add(chr); - } else if (chr == ')') { - token_add(state, buffer); - buffer_clean(); - state = STATE_CLOSE; - buffer_add(chr); - } else if (isspace(chr)) { - token_add(state, buffer); - buffer_clean(); - state = STATE_WHITESPACE; - buffer_add(chr); - } else if (isdigit(chr)) { - buffer_add(chr); - } else { - error("invalid char (STATE_NUM)"); - } - break; - } -} - /********** * Parser * **********/