improve lexer

2023-05-04 01:41:04 +04:00 · 2023-05-04 01:41:04 +04:00 · 97e2470e6b
commit 97e2470e6b
parent e4004807fc
3 changed files with 73 additions and 20 deletions
--- a/enums.c
+++ b/enums.c
@ -1,8 +1,9 @@
 #include "enums.h"

+#include <stdbool.h>
 #include <stddef.h>

-const char *State_to_str(enum State state)
+const char *State_to_str(const enum State state)
 {
    switch (state) {
    case STATE_INIT:       return "STATE_INIT";
@ -14,3 +15,39 @@ const char *State_to_str(enum State state)
    }
    return NULL;
 }
+
+const char *TokenType_to_str(const enum TokenType token_type)
+{
+    switch (token_type) {
+    case TOKEN_OPEN:  return "TOKEN_OPEN";
+    case TOKEN_CLOSE: return "TOKEN_CLOSE";
+    case TOKEN_ID:    return "TOKEN_ID";
+    case TOKEN_NUM:   return "TOKEN_NUM";
+    }
+    return NULL;
+}
+
+const char *Type_to_str(const enum Type type)
+{
+    switch (type) {
+    case TYPE_PAIR:   return "TYPE_PAIR";
+    case TYPE_ATOM:   return "TYPE_ATOM";
+    case TYPE_STRING: return "TYPE_STRING";
+    case TYPE_NUMBER: return "TYPE_NUMBER";
+    }
+    return NULL;
+}
+
+bool
+State_to_token_type(const enum State state, enum TokenType *const token_type)
+{
+    switch (state) {
+    case STATE_OPEN:  *token_type = STATE_OPEN;  break;
+    case STATE_CLOSE: *token_type = STATE_CLOSE; break;
+    case STATE_ID:    *token_type = STATE_ID;    break;
+    case STATE_NUM:   *token_type = STATE_NUM;   break;
+    default: return false;
+    }
+
+    return true;
+}
--- a/enums.h
+++ b/enums.h
@ -1,6 +1,8 @@
 #ifndef __ENUMS_H__
 #define __ENUMS_H__

+#include <stdbool.h>
+
 enum State {
    STATE_INIT,
    STATE_WHITESPACE,
@ -10,6 +12,13 @@ enum State {
    STATE_NUM,
 };

+enum TokenType {
+    TOKEN_OPEN,
+    TOKEN_CLOSE,
+    TOKEN_ID,
+    TOKEN_NUM,
+};
+
 enum Type {
  TYPE_PAIR,
  TYPE_ATOM,
@ -18,5 +27,9 @@ enum Type {
 };

 const char *State_to_str(enum State state);
+const char *TokenType_to_str(enum TokenType token_type);
+const char *Type_to_str(enum Type type);
+
+bool State_to_token_type(enum State state, enum TokenType *token_type);

 #endif
--- a/main.c
+++ b/main.c
@ -10,7 +10,7 @@

 struct Token {
    struct Token *next;
-    enum State type;
+    enum TokenType type;
    char *val;
 };

@ -40,7 +40,7 @@ static void error(const char *msg);

 static void buffer_add(char chr);
 static void buffer_clean();
-static void token_add(enum State type, char *val);
+static void token_add(enum State state, char *val);
 static void lex(char chr);

 /**********
@ -49,7 +49,7 @@ static void lex(char chr);

 static void shift_token();
 static struct Object *prepare_object(enum Type type);
-static void expect(enum State state);
+static void expect(enum TokenType token_type);

 static struct Object *parse();
 static struct Object *expr();
@ -78,7 +78,7 @@ int main()
    printf("Tokens:\n");

    for (const struct Token *token = tokens; token; token = token->next) {
-        printf("%s:%s;\n", State_to_str(token->type), token->val);
+        printf("%s:%s;\n", TokenType_to_str(token->type), token->val);
    }

    struct Object *const program = parse();
@ -109,14 +109,17 @@ void buffer_clean()
    buffer_index = 0;
 }

-void token_add(enum State type, char *val)
+void token_add(enum State state, char *val)
 {
-    if (type == STATE_WHITESPACE) return;
+    if (state == STATE_WHITESPACE) return;
+
+    enum TokenType token_type;
+    if (!State_to_token_type(state, &token_type)) error("invalid state");

    struct Token *token = malloc(sizeof(struct Token));
    assert(token);
    token->next = NULL;
-    token->type = type;
+    token->type = token_type;
    token->val = malloc(strlen(val) + 1);
    assert(token->val);
    strcpy(token->val, val);
@ -310,10 +313,10 @@ struct Object *prepare_object(const enum Type type)
    return object;
 }

-void expect(const enum State state)
+void expect(const enum TokenType token_type)
 {
    if (!tokens) error("no tokens in expect");
-    if (tokens->type != state) error("no expected");
+    if (tokens->type != token_type) error("no expected");
    shift_token();
 }

@ -327,15 +330,15 @@ struct Object *expr()
    if (!tokens) error("no tokens");

    switch (tokens->type) {
-    case STATE_OPEN: return parens();
-    case STATE_ID:
+    case TOKEN_OPEN: return parens();
+    case TOKEN_ID:
    {
        struct Object *const object = prepare_object(TYPE_ATOM);
        object->s = tokens->val;
        shift_token();
        return object;
    }
-    case STATE_NUM:
+    case TOKEN_NUM:
    {
        struct Object *const object = prepare_object(TYPE_NUMBER);
        object->i = atoll(tokens->val);
@ -360,11 +363,11 @@ struct Object *parens_part()
 {
    struct Object *const object = prepare_object(TYPE_PAIR);

-    if (tokens && tokens->type != STATE_CLOSE) {
+    if (tokens && tokens->type != TOKEN_CLOSE) {
        object->a = expr();
    }

-    if (tokens && tokens->type != STATE_CLOSE) {
+    if (tokens && tokens->type != TOKEN_CLOSE) {
        object->b = parens_part();
    } else {
        object->b = NULL;