Improve lexer
This commit is contained in:
parent
bb100af429
commit
b87c4dfbd4
1 changed files with 158 additions and 199 deletions
357
src/lexer.c
357
src/lexer.c
|
@ -13,8 +13,10 @@ static bool State_to_token_type(
|
|||
enum TokenType *token_type
|
||||
);
|
||||
|
||||
static void buffer_add(Lexer lexer, char chr);
|
||||
static void token_add(Lexer lexer);
|
||||
static void token_start(Lexer lexer, enum Lexer_State state);
|
||||
static void token_start_putc(Lexer lexer, enum Lexer_State state, char chr);
|
||||
static void token_putc(Lexer lexer, char chr);
|
||||
static void token_finish(Lexer lexer);
|
||||
|
||||
Lexer Lexer_new(const Tokens tokens)
|
||||
{
|
||||
|
@ -60,7 +62,25 @@ State_to_token_type(const enum Lexer_State state, enum TokenType *const token_ty
|
|||
return true;
|
||||
}
|
||||
|
||||
void buffer_add(const Lexer lexer, char chr)
|
||||
void token_start(const Lexer lexer, const enum Lexer_State state)
|
||||
{
|
||||
assert(lexer);
|
||||
|
||||
lexer->state = state;
|
||||
lexer->buffer_index = 0;
|
||||
lexer->buffer[0] = '\0';
|
||||
}
|
||||
|
||||
void token_start_putc(
|
||||
const Lexer lexer,
|
||||
const enum Lexer_State state,
|
||||
const char chr
|
||||
) {
|
||||
token_start(lexer, state);
|
||||
token_putc(lexer, chr);
|
||||
}
|
||||
|
||||
void token_putc(const Lexer lexer, char chr)
|
||||
{
|
||||
assert(lexer);
|
||||
assert(lexer->buffer_index < LEXER_BUFFER_SLEN);
|
||||
|
@ -70,7 +90,7 @@ void buffer_add(const Lexer lexer, char chr)
|
|||
lexer->buffer[lexer->buffer_index] = '\0';
|
||||
}
|
||||
|
||||
void token_add(const Lexer lexer)
|
||||
void token_finish(const Lexer lexer)
|
||||
{
|
||||
assert(lexer);
|
||||
|
||||
|
@ -95,102 +115,83 @@ void Lexer_lex(const Lexer self, const char chr)
|
|||
switch (self->state) {
|
||||
case STATE_INIT:
|
||||
if (chr == ';') {
|
||||
self->state = STATE_COMMENT_LINE;
|
||||
token_start(self, STATE_COMMENT_LINE);
|
||||
} else if (chr == '(') {
|
||||
self->state = STATE_ROUND_OPEN;
|
||||
buffer_add(self, chr);
|
||||
token_start_putc(self, STATE_ROUND_OPEN, chr);
|
||||
} else if (chr == ')') {
|
||||
self->state = STATE_ROUND_CLOSE;
|
||||
buffer_add(self, chr);
|
||||
token_start_putc(self, STATE_ROUND_CLOSE, chr);
|
||||
} else if (chr == '[') {
|
||||
self->state = STATE_SQUARE_OPEN;
|
||||
buffer_add(self, chr);
|
||||
token_start_putc(self, STATE_SQUARE_OPEN, chr);
|
||||
} else if (chr == ']') {
|
||||
self->state = STATE_SQUARE_CLOSE;
|
||||
buffer_add(self, chr);
|
||||
token_start_putc(self, STATE_SQUARE_CLOSE, chr);
|
||||
} else if (chr == '{') {
|
||||
self->state = STATE_CURLY_OPEN;
|
||||
buffer_add(self, chr);
|
||||
token_start_putc(self, STATE_CURLY_OPEN, chr);
|
||||
} else if (chr == '}') {
|
||||
self->state = STATE_CURLY_CLOSE;
|
||||
buffer_add(self, chr);
|
||||
token_start_putc(self, STATE_CURLY_CLOSE, chr);
|
||||
} else if (chr == '\'') {
|
||||
self->state = STATE_QUOTE;
|
||||
buffer_add(self, chr);
|
||||
token_start_putc(self, STATE_QUOTE, chr);
|
||||
} else if (chr == '#') {
|
||||
self->state = STATE_SHARP;
|
||||
token_start(self, STATE_SHARP);
|
||||
} else if (is_space(chr)) {
|
||||
self->state = STATE_WHITESPACE;
|
||||
buffer_add(self, chr);
|
||||
token_start_putc(self, STATE_WHITESPACE, chr);
|
||||
} else if (is_ident_head(chr)) {
|
||||
self->state = STATE_IDENT;
|
||||
buffer_add(self, chr);
|
||||
token_start_putc(self, STATE_IDENT, chr);
|
||||
} else if (is_number(chr)) {
|
||||
self->state = STATE_NUM;
|
||||
buffer_add(self, chr);
|
||||
token_start_putc(self, STATE_NUM, chr);
|
||||
} else if (chr == '"') {
|
||||
self->state = STATE_STRING_START;
|
||||
token_start(self, STATE_STRING_START);
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
break;
|
||||
case STATE_WHITESPACE:
|
||||
if (chr == ';') {
|
||||
self->state = STATE_COMMENT_LINE;
|
||||
token_start(self, STATE_COMMENT_LINE);
|
||||
} else if (chr == '(') {
|
||||
token_add(self);
|
||||
self->state = STATE_ROUND_OPEN;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_ROUND_OPEN, chr);
|
||||
} else if (chr == ')') {
|
||||
token_add(self);
|
||||
self->state = STATE_ROUND_CLOSE;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_ROUND_CLOSE, chr);
|
||||
} else if (chr == '[') {
|
||||
token_add(self);
|
||||
self->state = STATE_SQUARE_OPEN;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_SQUARE_OPEN, chr);
|
||||
} else if (chr == ']') {
|
||||
token_add(self);
|
||||
self->state = STATE_SQUARE_CLOSE;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_SQUARE_CLOSE, chr);
|
||||
} else if (chr == '{') {
|
||||
token_add(self);
|
||||
self->state = STATE_CURLY_OPEN;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_CURLY_OPEN, chr);
|
||||
} else if (chr == '}') {
|
||||
token_add(self);
|
||||
self->state = STATE_CURLY_CLOSE;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_CURLY_CLOSE, chr);
|
||||
} else if (chr == '\'') {
|
||||
token_add(self);
|
||||
self->state = STATE_QUOTE;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_QUOTE, chr);
|
||||
} else if (chr == '#') {
|
||||
token_add(self);
|
||||
self->state = STATE_SHARP;
|
||||
token_finish(self);
|
||||
token_start(self, STATE_SHARP);
|
||||
} else if (is_space(chr)) {
|
||||
buffer_add(self, chr);
|
||||
token_putc(self, chr);
|
||||
} else if (is_ident_head(chr)) {
|
||||
token_add(self);
|
||||
self->state = STATE_IDENT;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_IDENT, chr);
|
||||
} else if (is_number(chr)) {
|
||||
token_add(self);
|
||||
self->state = STATE_NUM;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_NUM, chr);
|
||||
} else if (chr == '"') {
|
||||
token_add(self);
|
||||
self->state = STATE_STRING_START;
|
||||
token_finish(self);
|
||||
token_start(self, STATE_STRING_START);
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
break;
|
||||
case STATE_COMMENT_LINE:
|
||||
if (chr == '\n') {
|
||||
token_add(self);
|
||||
self->state = STATE_WHITESPACE;
|
||||
token_finish(self);
|
||||
token_start(self, STATE_WHITESPACE);
|
||||
} else {
|
||||
buffer_add(self, chr);
|
||||
token_putc(self, chr);
|
||||
}
|
||||
break;
|
||||
case STATE_ROUND_OPEN:
|
||||
|
@ -201,61 +202,50 @@ void Lexer_lex(const Lexer self, const char chr)
|
|||
case STATE_CURLY_CLOSE:
|
||||
case STATE_QUOTE:
|
||||
if (chr == ';') {
|
||||
self->state = STATE_COMMENT_LINE;
|
||||
token_start(self, STATE_COMMENT_LINE);
|
||||
} else if (chr == '(') {
|
||||
token_add(self);
|
||||
self->state = STATE_ROUND_OPEN;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_ROUND_OPEN, chr);
|
||||
} else if (chr == ')') {
|
||||
token_add(self);
|
||||
self->state = STATE_ROUND_CLOSE;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_ROUND_CLOSE, chr);
|
||||
} else if (chr == '[') {
|
||||
token_add(self);
|
||||
self->state = STATE_SQUARE_OPEN;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_SQUARE_OPEN, chr);
|
||||
} else if (chr == ']') {
|
||||
token_add(self);
|
||||
self->state = STATE_SQUARE_CLOSE;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_SQUARE_CLOSE, chr);
|
||||
} else if (chr == '{') {
|
||||
token_add(self);
|
||||
self->state = STATE_CURLY_OPEN;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_CURLY_OPEN, chr);
|
||||
} else if (chr == '}') {
|
||||
token_add(self);
|
||||
self->state = STATE_CURLY_CLOSE;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_CURLY_CLOSE, chr);
|
||||
} else if (chr == '\'') {
|
||||
token_add(self);
|
||||
self->state = STATE_QUOTE;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_QUOTE, chr);
|
||||
} else if (chr == '#') {
|
||||
token_add(self);
|
||||
self->state = STATE_SHARP;
|
||||
token_finish(self);
|
||||
token_start(self, STATE_SHARP);
|
||||
} else if (is_space(chr)) {
|
||||
token_add(self);
|
||||
self->state = STATE_WHITESPACE;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_WHITESPACE, chr);
|
||||
} else if (is_ident_head(chr)) {
|
||||
token_add(self);
|
||||
self->state = STATE_IDENT;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_IDENT, chr);
|
||||
} else if (is_number(chr)) {
|
||||
token_add(self);
|
||||
self->state = STATE_NUM;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_NUM, chr);
|
||||
} else if (chr == '"') {
|
||||
token_add(self);
|
||||
self->state = STATE_STRING_START;
|
||||
token_finish(self);
|
||||
token_start(self, STATE_STRING_START);
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
break;
|
||||
case STATE_SHARP:
|
||||
if (is_tag(chr)) {
|
||||
self->state = STATE_TAG;
|
||||
buffer_add(self, chr);
|
||||
token_start_putc(self, STATE_TAG, chr);
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
|
@ -264,38 +254,31 @@ void Lexer_lex(const Lexer self, const char chr)
|
|||
if (chr == ';') {
|
||||
self->state = STATE_COMMENT_LINE;
|
||||
} else if (chr == '(') {
|
||||
token_add(self);
|
||||
self->state = STATE_ROUND_OPEN;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_ROUND_OPEN, chr);
|
||||
} else if (chr == ')') {
|
||||
token_add(self);
|
||||
self->state = STATE_ROUND_CLOSE;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_ROUND_CLOSE, chr);
|
||||
} else if (chr == '[') {
|
||||
token_add(self);
|
||||
self->state = STATE_SQUARE_OPEN;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_SQUARE_OPEN, chr);
|
||||
} else if (chr == ']') {
|
||||
token_add(self);
|
||||
self->state = STATE_SQUARE_CLOSE;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_SQUARE_CLOSE, chr);
|
||||
} else if (chr == '{') {
|
||||
token_add(self);
|
||||
self->state = STATE_CURLY_OPEN;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_CURLY_OPEN, chr);
|
||||
} else if (chr == '}') {
|
||||
token_add(self);
|
||||
self->state = STATE_CURLY_CLOSE;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_CURLY_CLOSE, chr);
|
||||
} else if (is_space(chr)) {
|
||||
token_add(self);
|
||||
self->state = STATE_WHITESPACE;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_WHITESPACE, chr);
|
||||
} else if (is_tag(chr)) {
|
||||
buffer_add(self, chr);
|
||||
token_putc(self, chr);
|
||||
} else if (chr == '"') {
|
||||
token_add(self);
|
||||
self->state = STATE_STRING_START;
|
||||
token_finish(self);
|
||||
token_start(self, STATE_STRING_START);
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
|
@ -304,78 +287,64 @@ void Lexer_lex(const Lexer self, const char chr)
|
|||
if (chr == ';') {
|
||||
self->state = STATE_COMMENT_LINE;
|
||||
} else if (chr == '(') {
|
||||
token_add(self);
|
||||
self->state = STATE_ROUND_OPEN;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_ROUND_OPEN, chr);
|
||||
} else if (chr == ')') {
|
||||
token_add(self);
|
||||
self->state = STATE_ROUND_CLOSE;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_ROUND_CLOSE, chr);
|
||||
} else if (chr == '[') {
|
||||
token_add(self);
|
||||
self->state = STATE_SQUARE_OPEN;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_SQUARE_OPEN, chr);
|
||||
} else if (chr == ']') {
|
||||
token_add(self);
|
||||
self->state = STATE_SQUARE_CLOSE;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_SQUARE_CLOSE, chr);
|
||||
} else if (chr == '{') {
|
||||
token_add(self);
|
||||
self->state = STATE_CURLY_OPEN;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_CURLY_OPEN, chr);
|
||||
} else if (chr == '}') {
|
||||
token_add(self);
|
||||
self->state = STATE_CURLY_CLOSE;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_CURLY_CLOSE, chr);
|
||||
} else if (is_space(chr)) {
|
||||
token_add(self);
|
||||
self->state = STATE_WHITESPACE;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_WHITESPACE, chr);
|
||||
} else if (is_ident_tail(chr)) {
|
||||
buffer_add(self, chr);
|
||||
token_putc(self, chr);
|
||||
} else if (chr == '"') {
|
||||
token_add(self);
|
||||
self->state = STATE_STRING_START;
|
||||
token_finish(self);
|
||||
token_start(self, STATE_STRING_START);
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
break;
|
||||
case STATE_NUM:
|
||||
if (chr == ';') {
|
||||
self->state = STATE_COMMENT_LINE;
|
||||
token_start(self, STATE_COMMENT_LINE);
|
||||
} else if (chr == '(') {
|
||||
token_add(self);
|
||||
self->state = STATE_ROUND_OPEN;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_ROUND_OPEN, chr);
|
||||
} else if (chr == ')') {
|
||||
token_add(self);
|
||||
self->state = STATE_ROUND_CLOSE;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_ROUND_CLOSE, chr);
|
||||
} else if (chr == '[') {
|
||||
token_add(self);
|
||||
self->state = STATE_SQUARE_OPEN;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_SQUARE_OPEN, chr);
|
||||
} else if (chr == ']') {
|
||||
token_add(self);
|
||||
self->state = STATE_SQUARE_CLOSE;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_SQUARE_CLOSE, chr);
|
||||
} else if (chr == '{') {
|
||||
token_add(self);
|
||||
self->state = STATE_CURLY_OPEN;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_CURLY_OPEN, chr);
|
||||
} else if (chr == '}') {
|
||||
token_add(self);
|
||||
self->state = STATE_CURLY_CLOSE;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_CURLY_CLOSE, chr);
|
||||
} else if (is_space(chr)) {
|
||||
token_add(self);
|
||||
self->state = STATE_WHITESPACE;
|
||||
buffer_add(self, chr);
|
||||
token_finish(self);
|
||||
token_start_putc(self, STATE_WHITESPACE, chr);
|
||||
} else if (is_number(chr)) {
|
||||
buffer_add(self, chr);
|
||||
token_putc(self, chr);
|
||||
} else if (chr == '"') {
|
||||
token_add(self);
|
||||
self->state = STATE_STRING_START;
|
||||
token_finish(self);
|
||||
token_start(self, STATE_STRING_START);
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
|
@ -383,49 +352,39 @@ void Lexer_lex(const Lexer self, const char chr)
|
|||
case STATE_STRING_START:
|
||||
case STATE_STRING_INSIDE:
|
||||
if (chr == '"') {
|
||||
token_add(self);
|
||||
self->state = STATE_STRING_END;
|
||||
token_finish(self);
|
||||
token_start(self, STATE_STRING_END);
|
||||
} else {
|
||||
buffer_add(self, chr);
|
||||
token_putc(self, chr);
|
||||
}
|
||||
break;
|
||||
case STATE_STRING_END:
|
||||
if (chr == ';') {
|
||||
self->state = STATE_COMMENT_LINE;
|
||||
token_start(self, STATE_COMMENT_LINE);
|
||||
} else if (chr == '(') {
|
||||
self->state = STATE_ROUND_OPEN;
|
||||
buffer_add(self, chr);
|
||||
token_start_putc(self, STATE_ROUND_OPEN, chr);
|
||||
} else if (chr == ')') {
|
||||
self->state = STATE_ROUND_CLOSE;
|
||||
buffer_add(self, chr);
|
||||
token_start_putc(self, STATE_ROUND_CLOSE, chr);
|
||||
} else if (chr == '[') {
|
||||
self->state = STATE_SQUARE_OPEN;
|
||||
buffer_add(self, chr);
|
||||
token_start_putc(self, STATE_SQUARE_OPEN, chr);
|
||||
} else if (chr == ']') {
|
||||
self->state = STATE_SQUARE_CLOSE;
|
||||
buffer_add(self, chr);
|
||||
token_start_putc(self, STATE_SQUARE_CLOSE, chr);
|
||||
} else if (chr == '{') {
|
||||
self->state = STATE_CURLY_OPEN;
|
||||
buffer_add(self, chr);
|
||||
token_start_putc(self, STATE_CURLY_OPEN, chr);
|
||||
} else if (chr == '}') {
|
||||
self->state = STATE_CURLY_CLOSE;
|
||||
buffer_add(self, chr);
|
||||
token_start_putc(self, STATE_CURLY_CLOSE, chr);
|
||||
} else if (chr == '\'') {
|
||||
self->state = STATE_QUOTE;
|
||||
buffer_add(self, chr);
|
||||
token_start_putc(self, STATE_QUOTE, chr);
|
||||
} else if (chr == '#') {
|
||||
self->state = STATE_SHARP;
|
||||
token_start(self, STATE_SHARP);
|
||||
} else if (is_space(chr)) {
|
||||
self->state = STATE_WHITESPACE;
|
||||
buffer_add(self, chr);
|
||||
token_start_putc(self, STATE_WHITESPACE, chr);
|
||||
} else if (is_ident_head(chr)) {
|
||||
self->state = STATE_IDENT;
|
||||
buffer_add(self, chr);
|
||||
token_start_putc(self, STATE_IDENT, chr);
|
||||
} else if (is_number(chr)) {
|
||||
self->state = STATE_NUM;
|
||||
buffer_add(self, chr);
|
||||
token_start_putc(self, STATE_NUM, chr);
|
||||
} else if (chr == '"') {
|
||||
self->state = STATE_STRING_START;
|
||||
token_start(self, STATE_STRING_START);
|
||||
} else {
|
||||
assert(0);
|
||||
}
|
||||
|
|
Loading…
Reference in a new issue