1
0
Fork 0

Rewrite lexer

This commit is contained in:
Alex Kotov 2023-05-05 12:28:17 +04:00
parent 1dda43f662
commit 9b01119121
Signed by: kotovalexarian
GPG key ID: 553C0EBBEB5D5F08
3 changed files with 69 additions and 36 deletions

View file

@ -6,6 +6,7 @@
#include <assert.h>
#include <stdlib.h>
#include <string.h>
static char buffer[1024];
static size_t buffer_index = 0;
@ -15,6 +16,22 @@ static void buffer_add(char chr);
static void buffer_clean();
static void token_add(Tokens tokens, enum State state, char *val);
Lexer Lexer_new(const Tokens tokens)
{
assert(tokens);
const Lexer self = malloc(sizeof(struct Lexer));
assert(self);
memset(self, 0, sizeof(struct Lexer));
self->tokens = tokens;
return self;
}
void Lexer_delete(const Lexer self)
{
assert(self);
free(self);
}
void buffer_add(char chr)
{
assert(buffer_index < 1000);
@ -37,7 +54,7 @@ void token_add(Tokens tokens, enum State state, char *val)
Tokens_append(tokens, token_type, val);
}
void lex(Tokens tokens, const char chr)
void Lexer_lex(const Lexer self, const char chr)
{
switch (state) {
case STATE_INIT:
@ -77,49 +94,49 @@ void lex(Tokens tokens, const char chr)
break;
case STATE_WHITESPACE:
if (chr == '(') {
token_add(tokens, state, buffer);
token_add(self->tokens, state, buffer);
buffer_clean();
state = STATE_ROUND_OPEN;
buffer_add(chr);
} else if (chr == ')') {
token_add(tokens, state, buffer);
token_add(self->tokens, state, buffer);
buffer_clean();
state = STATE_ROUND_CLOSE;
buffer_add(chr);
} else if (chr == '[') {
token_add(tokens, state, buffer);
token_add(self->tokens, state, buffer);
buffer_clean();
state = STATE_SQUARE_OPEN;
buffer_add(chr);
} else if (chr == ']') {
token_add(tokens, state, buffer);
token_add(self->tokens, state, buffer);
buffer_clean();
state = STATE_SQUARE_CLOSE;
buffer_add(chr);
} else if (chr == '{') {
token_add(tokens, state, buffer);
token_add(self->tokens, state, buffer);
buffer_clean();
state = STATE_CURLY_OPEN;
buffer_add(chr);
} else if (chr == '}') {
token_add(tokens, state, buffer);
token_add(self->tokens, state, buffer);
buffer_clean();
state = STATE_CURLY_CLOSE;
buffer_add(chr);
} else if (chr == '\'') {
token_add(tokens, state, buffer);
token_add(self->tokens, state, buffer);
buffer_clean();
state = STATE_QUOTE;
buffer_add(chr);
} else if (is_space(chr)) {
buffer_add(chr);
} else if (is_ident_head(chr)) {
token_add(tokens, state, buffer);
token_add(self->tokens, state, buffer);
buffer_clean();
state = STATE_IDENT;
buffer[buffer_index++] = chr;
} else if (is_number(chr)) {
token_add(tokens, state, buffer);
token_add(self->tokens, state, buffer);
buffer_clean();
state = STATE_NUM;
buffer[buffer_index++] = chr;
@ -135,52 +152,52 @@ void lex(Tokens tokens, const char chr)
case STATE_CURLY_CLOSE:
case STATE_QUOTE:
if (chr == '(') {
token_add(tokens, state, buffer);
token_add(self->tokens, state, buffer);
buffer_clean();
state = STATE_ROUND_OPEN;
buffer_add(chr);
} else if (chr == ')') {
token_add(tokens, state, buffer);
token_add(self->tokens, state, buffer);
buffer_clean();
state = STATE_ROUND_CLOSE;
buffer_add(chr);
} else if (chr == '[') {
token_add(tokens, state, buffer);
token_add(self->tokens, state, buffer);
buffer_clean();
state = STATE_SQUARE_OPEN;
buffer_add(chr);
} else if (chr == ']') {
token_add(tokens, state, buffer);
token_add(self->tokens, state, buffer);
buffer_clean();
state = STATE_SQUARE_CLOSE;
buffer_add(chr);
} else if (chr == '{') {
token_add(tokens, state, buffer);
token_add(self->tokens, state, buffer);
buffer_clean();
state = STATE_CURLY_OPEN;
buffer_add(chr);
} else if (chr == '}') {
token_add(tokens, state, buffer);
token_add(self->tokens, state, buffer);
buffer_clean();
state = STATE_CURLY_CLOSE;
buffer_add(chr);
} else if (chr == '\'') {
token_add(tokens, state, buffer);
token_add(self->tokens, state, buffer);
buffer_clean();
state = STATE_QUOTE;
buffer_add(chr);
} else if (is_space(chr)) {
token_add(tokens, state, buffer);
token_add(self->tokens, state, buffer);
buffer_clean();
state = STATE_WHITESPACE;
buffer_add(chr);
} else if (is_ident_head(chr)) {
token_add(tokens, state, buffer);
token_add(self->tokens, state, buffer);
buffer_clean();
state = STATE_IDENT;
buffer_add(chr);
} else if (is_number(chr)) {
token_add(tokens, state, buffer);
token_add(self->tokens, state, buffer);
buffer_clean();
state = STATE_NUM;
buffer_add(chr);
@ -190,37 +207,37 @@ void lex(Tokens tokens, const char chr)
break;
case STATE_IDENT:
if (chr == '(') {
token_add(tokens, state, buffer);
token_add(self->tokens, state, buffer);
buffer_clean();
state = STATE_ROUND_OPEN;
buffer_add(chr);
} else if (chr == ')') {
token_add(tokens, state, buffer);
token_add(self->tokens, state, buffer);
buffer_clean();
state = STATE_ROUND_CLOSE;
buffer_add(chr);
} else if (chr == '[') {
token_add(tokens, state, buffer);
token_add(self->tokens, state, buffer);
buffer_clean();
state = STATE_SQUARE_OPEN;
buffer_add(chr);
} else if (chr == ']') {
token_add(tokens, state, buffer);
token_add(self->tokens, state, buffer);
buffer_clean();
state = STATE_SQUARE_CLOSE;
buffer_add(chr);
} else if (chr == '{') {
token_add(tokens, state, buffer);
token_add(self->tokens, state, buffer);
buffer_clean();
state = STATE_CURLY_OPEN;
buffer_add(chr);
} else if (chr == '}') {
token_add(tokens, state, buffer);
token_add(self->tokens, state, buffer);
buffer_clean();
state = STATE_CURLY_CLOSE;
buffer_add(chr);
} else if (is_space(chr)) {
token_add(tokens, state, buffer);
token_add(self->tokens, state, buffer);
buffer_clean();
state = STATE_WHITESPACE;
buffer_add(chr);
@ -232,37 +249,37 @@ void lex(Tokens tokens, const char chr)
break;
case STATE_NUM:
if (chr == '(') {
token_add(tokens, state, buffer);
token_add(self->tokens, state, buffer);
buffer_clean();
state = STATE_ROUND_OPEN;
buffer_add(chr);
} else if (chr == ')') {
token_add(tokens, state, buffer);
token_add(self->tokens, state, buffer);
buffer_clean();
state = STATE_ROUND_CLOSE;
buffer_add(chr);
} else if (chr == '[') {
token_add(tokens, state, buffer);
token_add(self->tokens, state, buffer);
buffer_clean();
state = STATE_SQUARE_OPEN;
buffer_add(chr);
} else if (chr == ']') {
token_add(tokens, state, buffer);
token_add(self->tokens, state, buffer);
buffer_clean();
state = STATE_SQUARE_CLOSE;
buffer_add(chr);
} else if (chr == '{') {
token_add(tokens, state, buffer);
token_add(self->tokens, state, buffer);
buffer_clean();
state = STATE_CURLY_OPEN;
buffer_add(chr);
} else if (chr == '}') {
token_add(tokens, state, buffer);
token_add(self->tokens, state, buffer);
buffer_clean();
state = STATE_CURLY_CLOSE;
buffer_add(chr);
} else if (is_space(chr)) {
token_add(tokens, state, buffer);
token_add(self->tokens, state, buffer);
buffer_clean();
state = STATE_WHITESPACE;
buffer_add(chr);

View file

@ -3,6 +3,20 @@
#include "tokens.h"
void lex(Tokens tokens, char chr);
#include <stddef.h>
#define LEXER_DELETE(lexer) do { \
Lexer_delete(lexer); \
lexer = NULL; \
} while (0)
typedef struct Lexer {
Tokens tokens;
} *Lexer;
Lexer Lexer_new(Tokens tokens);
void Lexer_delete(Lexer lexer);
void Lexer_lex(Lexer self, char chr);
#endif

View file

@ -116,11 +116,13 @@ struct Object *eval_list(struct Object *const object)
void run()
{
Tokens tokens = Tokens_new();
Lexer lexer = Lexer_new(tokens);
assert(tokens);
char chr;
while ((chr = getchar()) != EOF) {
lex(tokens, chr);
Lexer_lex(lexer, chr);
}
LEXER_DELETE(lexer);
printf("\n=== TOKENS =======\n");
for (