Rewrite lexer
This commit is contained in:
parent
1dda43f662
commit
9b01119121
3 changed files with 69 additions and 36 deletions
85
src/lexer.c
85
src/lexer.c
|
@ -6,6 +6,7 @@
|
|||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
static char buffer[1024];
|
||||
static size_t buffer_index = 0;
|
||||
|
@ -15,6 +16,22 @@ static void buffer_add(char chr);
|
|||
static void buffer_clean();
|
||||
static void token_add(Tokens tokens, enum State state, char *val);
|
||||
|
||||
Lexer Lexer_new(const Tokens tokens)
|
||||
{
|
||||
assert(tokens);
|
||||
const Lexer self = malloc(sizeof(struct Lexer));
|
||||
assert(self);
|
||||
memset(self, 0, sizeof(struct Lexer));
|
||||
self->tokens = tokens;
|
||||
return self;
|
||||
}
|
||||
|
||||
void Lexer_delete(const Lexer self)
|
||||
{
|
||||
assert(self);
|
||||
free(self);
|
||||
}
|
||||
|
||||
void buffer_add(char chr)
|
||||
{
|
||||
assert(buffer_index < 1000);
|
||||
|
@ -37,7 +54,7 @@ void token_add(Tokens tokens, enum State state, char *val)
|
|||
Tokens_append(tokens, token_type, val);
|
||||
}
|
||||
|
||||
void lex(Tokens tokens, const char chr)
|
||||
void Lexer_lex(const Lexer self, const char chr)
|
||||
{
|
||||
switch (state) {
|
||||
case STATE_INIT:
|
||||
|
@ -77,49 +94,49 @@ void lex(Tokens tokens, const char chr)
|
|||
break;
|
||||
case STATE_WHITESPACE:
|
||||
if (chr == '(') {
|
||||
token_add(tokens, state, buffer);
|
||||
token_add(self->tokens, state, buffer);
|
||||
buffer_clean();
|
||||
state = STATE_ROUND_OPEN;
|
||||
buffer_add(chr);
|
||||
} else if (chr == ')') {
|
||||
token_add(tokens, state, buffer);
|
||||
token_add(self->tokens, state, buffer);
|
||||
buffer_clean();
|
||||
state = STATE_ROUND_CLOSE;
|
||||
buffer_add(chr);
|
||||
} else if (chr == '[') {
|
||||
token_add(tokens, state, buffer);
|
||||
token_add(self->tokens, state, buffer);
|
||||
buffer_clean();
|
||||
state = STATE_SQUARE_OPEN;
|
||||
buffer_add(chr);
|
||||
} else if (chr == ']') {
|
||||
token_add(tokens, state, buffer);
|
||||
token_add(self->tokens, state, buffer);
|
||||
buffer_clean();
|
||||
state = STATE_SQUARE_CLOSE;
|
||||
buffer_add(chr);
|
||||
} else if (chr == '{') {
|
||||
token_add(tokens, state, buffer);
|
||||
token_add(self->tokens, state, buffer);
|
||||
buffer_clean();
|
||||
state = STATE_CURLY_OPEN;
|
||||
buffer_add(chr);
|
||||
} else if (chr == '}') {
|
||||
token_add(tokens, state, buffer);
|
||||
token_add(self->tokens, state, buffer);
|
||||
buffer_clean();
|
||||
state = STATE_CURLY_CLOSE;
|
||||
buffer_add(chr);
|
||||
} else if (chr == '\'') {
|
||||
token_add(tokens, state, buffer);
|
||||
token_add(self->tokens, state, buffer);
|
||||
buffer_clean();
|
||||
state = STATE_QUOTE;
|
||||
buffer_add(chr);
|
||||
} else if (is_space(chr)) {
|
||||
buffer_add(chr);
|
||||
} else if (is_ident_head(chr)) {
|
||||
token_add(tokens, state, buffer);
|
||||
token_add(self->tokens, state, buffer);
|
||||
buffer_clean();
|
||||
state = STATE_IDENT;
|
||||
buffer[buffer_index++] = chr;
|
||||
} else if (is_number(chr)) {
|
||||
token_add(tokens, state, buffer);
|
||||
token_add(self->tokens, state, buffer);
|
||||
buffer_clean();
|
||||
state = STATE_NUM;
|
||||
buffer[buffer_index++] = chr;
|
||||
|
@ -135,52 +152,52 @@ void lex(Tokens tokens, const char chr)
|
|||
case STATE_CURLY_CLOSE:
|
||||
case STATE_QUOTE:
|
||||
if (chr == '(') {
|
||||
token_add(tokens, state, buffer);
|
||||
token_add(self->tokens, state, buffer);
|
||||
buffer_clean();
|
||||
state = STATE_ROUND_OPEN;
|
||||
buffer_add(chr);
|
||||
} else if (chr == ')') {
|
||||
token_add(tokens, state, buffer);
|
||||
token_add(self->tokens, state, buffer);
|
||||
buffer_clean();
|
||||
state = STATE_ROUND_CLOSE;
|
||||
buffer_add(chr);
|
||||
} else if (chr == '[') {
|
||||
token_add(tokens, state, buffer);
|
||||
token_add(self->tokens, state, buffer);
|
||||
buffer_clean();
|
||||
state = STATE_SQUARE_OPEN;
|
||||
buffer_add(chr);
|
||||
} else if (chr == ']') {
|
||||
token_add(tokens, state, buffer);
|
||||
token_add(self->tokens, state, buffer);
|
||||
buffer_clean();
|
||||
state = STATE_SQUARE_CLOSE;
|
||||
buffer_add(chr);
|
||||
} else if (chr == '{') {
|
||||
token_add(tokens, state, buffer);
|
||||
token_add(self->tokens, state, buffer);
|
||||
buffer_clean();
|
||||
state = STATE_CURLY_OPEN;
|
||||
buffer_add(chr);
|
||||
} else if (chr == '}') {
|
||||
token_add(tokens, state, buffer);
|
||||
token_add(self->tokens, state, buffer);
|
||||
buffer_clean();
|
||||
state = STATE_CURLY_CLOSE;
|
||||
buffer_add(chr);
|
||||
} else if (chr == '\'') {
|
||||
token_add(tokens, state, buffer);
|
||||
token_add(self->tokens, state, buffer);
|
||||
buffer_clean();
|
||||
state = STATE_QUOTE;
|
||||
buffer_add(chr);
|
||||
} else if (is_space(chr)) {
|
||||
token_add(tokens, state, buffer);
|
||||
token_add(self->tokens, state, buffer);
|
||||
buffer_clean();
|
||||
state = STATE_WHITESPACE;
|
||||
buffer_add(chr);
|
||||
} else if (is_ident_head(chr)) {
|
||||
token_add(tokens, state, buffer);
|
||||
token_add(self->tokens, state, buffer);
|
||||
buffer_clean();
|
||||
state = STATE_IDENT;
|
||||
buffer_add(chr);
|
||||
} else if (is_number(chr)) {
|
||||
token_add(tokens, state, buffer);
|
||||
token_add(self->tokens, state, buffer);
|
||||
buffer_clean();
|
||||
state = STATE_NUM;
|
||||
buffer_add(chr);
|
||||
|
@ -190,37 +207,37 @@ void lex(Tokens tokens, const char chr)
|
|||
break;
|
||||
case STATE_IDENT:
|
||||
if (chr == '(') {
|
||||
token_add(tokens, state, buffer);
|
||||
token_add(self->tokens, state, buffer);
|
||||
buffer_clean();
|
||||
state = STATE_ROUND_OPEN;
|
||||
buffer_add(chr);
|
||||
} else if (chr == ')') {
|
||||
token_add(tokens, state, buffer);
|
||||
token_add(self->tokens, state, buffer);
|
||||
buffer_clean();
|
||||
state = STATE_ROUND_CLOSE;
|
||||
buffer_add(chr);
|
||||
} else if (chr == '[') {
|
||||
token_add(tokens, state, buffer);
|
||||
token_add(self->tokens, state, buffer);
|
||||
buffer_clean();
|
||||
state = STATE_SQUARE_OPEN;
|
||||
buffer_add(chr);
|
||||
} else if (chr == ']') {
|
||||
token_add(tokens, state, buffer);
|
||||
token_add(self->tokens, state, buffer);
|
||||
buffer_clean();
|
||||
state = STATE_SQUARE_CLOSE;
|
||||
buffer_add(chr);
|
||||
} else if (chr == '{') {
|
||||
token_add(tokens, state, buffer);
|
||||
token_add(self->tokens, state, buffer);
|
||||
buffer_clean();
|
||||
state = STATE_CURLY_OPEN;
|
||||
buffer_add(chr);
|
||||
} else if (chr == '}') {
|
||||
token_add(tokens, state, buffer);
|
||||
token_add(self->tokens, state, buffer);
|
||||
buffer_clean();
|
||||
state = STATE_CURLY_CLOSE;
|
||||
buffer_add(chr);
|
||||
} else if (is_space(chr)) {
|
||||
token_add(tokens, state, buffer);
|
||||
token_add(self->tokens, state, buffer);
|
||||
buffer_clean();
|
||||
state = STATE_WHITESPACE;
|
||||
buffer_add(chr);
|
||||
|
@ -232,37 +249,37 @@ void lex(Tokens tokens, const char chr)
|
|||
break;
|
||||
case STATE_NUM:
|
||||
if (chr == '(') {
|
||||
token_add(tokens, state, buffer);
|
||||
token_add(self->tokens, state, buffer);
|
||||
buffer_clean();
|
||||
state = STATE_ROUND_OPEN;
|
||||
buffer_add(chr);
|
||||
} else if (chr == ')') {
|
||||
token_add(tokens, state, buffer);
|
||||
token_add(self->tokens, state, buffer);
|
||||
buffer_clean();
|
||||
state = STATE_ROUND_CLOSE;
|
||||
buffer_add(chr);
|
||||
} else if (chr == '[') {
|
||||
token_add(tokens, state, buffer);
|
||||
token_add(self->tokens, state, buffer);
|
||||
buffer_clean();
|
||||
state = STATE_SQUARE_OPEN;
|
||||
buffer_add(chr);
|
||||
} else if (chr == ']') {
|
||||
token_add(tokens, state, buffer);
|
||||
token_add(self->tokens, state, buffer);
|
||||
buffer_clean();
|
||||
state = STATE_SQUARE_CLOSE;
|
||||
buffer_add(chr);
|
||||
} else if (chr == '{') {
|
||||
token_add(tokens, state, buffer);
|
||||
token_add(self->tokens, state, buffer);
|
||||
buffer_clean();
|
||||
state = STATE_CURLY_OPEN;
|
||||
buffer_add(chr);
|
||||
} else if (chr == '}') {
|
||||
token_add(tokens, state, buffer);
|
||||
token_add(self->tokens, state, buffer);
|
||||
buffer_clean();
|
||||
state = STATE_CURLY_CLOSE;
|
||||
buffer_add(chr);
|
||||
} else if (is_space(chr)) {
|
||||
token_add(tokens, state, buffer);
|
||||
token_add(self->tokens, state, buffer);
|
||||
buffer_clean();
|
||||
state = STATE_WHITESPACE;
|
||||
buffer_add(chr);
|
||||
|
|
16
src/lexer.h
16
src/lexer.h
|
@ -3,6 +3,20 @@
|
|||
|
||||
#include "tokens.h"
|
||||
|
||||
void lex(Tokens tokens, char chr);
|
||||
#include <stddef.h>
|
||||
|
||||
#define LEXER_DELETE(lexer) do { \
|
||||
Lexer_delete(lexer); \
|
||||
lexer = NULL; \
|
||||
} while (0)
|
||||
|
||||
typedef struct Lexer {
|
||||
Tokens tokens;
|
||||
} *Lexer;
|
||||
|
||||
Lexer Lexer_new(Tokens tokens);
|
||||
void Lexer_delete(Lexer lexer);
|
||||
|
||||
void Lexer_lex(Lexer self, char chr);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -116,11 +116,13 @@ struct Object *eval_list(struct Object *const object)
|
|||
void run()
|
||||
{
|
||||
Tokens tokens = Tokens_new();
|
||||
Lexer lexer = Lexer_new(tokens);
|
||||
assert(tokens);
|
||||
char chr;
|
||||
while ((chr = getchar()) != EOF) {
|
||||
lex(tokens, chr);
|
||||
Lexer_lex(lexer, chr);
|
||||
}
|
||||
LEXER_DELETE(lexer);
|
||||
|
||||
printf("\n=== TOKENS =======\n");
|
||||
for (
|
||||
|
|
Loading…
Reference in a new issue