commit 77b95df534de20c182a14c667fa08512c310a913 Author: Alex Kotov Date: Wed May 3 22:42:26 2023 +0400 lexer diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b54616a --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/*.o + +/lisp diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..ee3e1f3 --- /dev/null +++ b/Makefile @@ -0,0 +1,12 @@ +all: lisp + +CC = gcc +CFLAGS = -Wall -Wextra + +OBJS = main.c.o + +lisp: $(OBJS) + $(CC) -o $@ $^ $(CFLAGS) + +%.c.o: %.c + $(CC) -c $< -o $@ $(CFLAGS) diff --git a/main.c b/main.c new file mode 100644 index 0000000..28ada25 --- /dev/null +++ b/main.c @@ -0,0 +1,217 @@ +#include +#include +#include +#include +#include +#include + +enum State { + STATE_INIT, + STATE_WHITESPACE, + STATE_OPEN, + STATE_CLOSE, + STATE_ID, +}; + +struct Token { + struct Token *next; + enum State type; + char *val; +}; + +static char buffer[1024]; +static size_t buffer_index = 0; + +static enum State state = STATE_INIT; +static struct Token *tokens = NULL; + +static void error(const char *msg); +static const char *state_str(enum State state); +static void buffer_add(char chr); +static void buffer_clean(); +static void token_add(enum State type, char *val); +static void lex(char chr); + +int main() +{ + char chr; + while ((chr = getchar()) != EOF) { + lex(chr); + } + + printf("Tokens:\n"); + + for (const struct Token *token = tokens; token; token = token->next) { + printf("%s:%s;\n", state_str(token->type), token->val); + } + + exit(EXIT_SUCCESS); +} + +void error(const char *msg) +{ + printf("ERROR: %s\n", msg); + exit(EXIT_FAILURE); +} + +const char *state_str(enum State state) +{ + switch (state) { + case STATE_INIT: return "STATE_INIT"; + case STATE_WHITESPACE: return "STATE_WHITESPACE"; + case STATE_OPEN: return "STATE_OPEN"; + case STATE_CLOSE: return "STATE_CLOSE"; + case STATE_ID: return "STATE_ID"; + } + return NULL; +} + +void buffer_add(char chr) +{ + if (buffer_index >= 1000) error("token too long"); + buffer[buffer_index++] = chr; + buffer[buffer_index] = 0; +} + +void buffer_clean() +{ + buffer_index = 0; +} + +void token_add(enum State type, char *val) +{ + struct Token *token = malloc(sizeof(struct Token)); + assert(token); + token->next = NULL; + token->type = type; + token->val = malloc(strlen(val) + 1); + assert(token->val); + strcpy(token->val, val); + + if (!tokens) { + tokens = token; + } else { + for (struct Token *curr = tokens; curr; curr = curr->next) { + if (!curr->next) { + curr->next = token; + break; + } + } + } +} + +void lex(char chr) +{ + switch (state) { + case STATE_INIT: + if (chr == '(') { + state = STATE_OPEN; + buffer_add(chr); + } else if (chr == ')') { + state = STATE_CLOSE; + buffer_add(chr); + } else if (isspace(chr)) { + state = STATE_WHITESPACE; + buffer_add(chr); + } else if (isalpha(chr)) { + state = STATE_ID; + buffer_add(chr); + } else { + error("invalid char (STATE_INIT)"); + } + break; + case STATE_WHITESPACE: + if (chr == '(') { + token_add(state, buffer); + buffer_clean(); + state = STATE_OPEN; + buffer_add(chr); + } else if (chr == ')') { + token_add(state, buffer); + buffer_clean(); + state = STATE_CLOSE; + buffer_add(chr); + } else if (isspace(chr)) { + buffer_add(chr); + } else if (isalpha(chr)) { + token_add(state, buffer); + buffer_clean(); + state = STATE_ID; + buffer[buffer_index++] = chr; + } else { + error("invalid char (STATE_WHITESPACE)"); + } + break; + case STATE_OPEN: + if (chr == '(') { + token_add(state, buffer); + buffer_clean(); + state = STATE_OPEN; + buffer_add(chr); + } else if (chr == ')') { + token_add(state, buffer); + buffer_clean(); + state = STATE_CLOSE; + buffer_add(chr); + } else if (isspace(chr)) { + token_add(state, buffer); + buffer_clean(); + state = STATE_WHITESPACE; + buffer_add(chr); + } else if (isalpha(chr)) { + token_add(state, buffer); + buffer_clean(); + state = STATE_ID; + buffer_add(chr); + } else { + error("invalid char (STATE_OPEN)"); + } + break; + case STATE_CLOSE: + if (chr == '(') { + token_add(state, buffer); + buffer_clean(); + state = STATE_OPEN; + buffer_add(chr); + } else if (chr == ')') { + token_add(state, buffer); + buffer_clean(); + state = STATE_CLOSE; + buffer_add(chr); + } else if (isspace(chr)) { + token_add(state, buffer); + buffer_clean(); + state = STATE_WHITESPACE; + buffer_add(chr); + } else if (isalpha(chr)) { + token_add(state, buffer); + buffer_clean(); + state = STATE_ID; + buffer_add(chr); + } else { + error("invalid char (STATE_CLOSE)"); + } + break; + case STATE_ID: + if (chr == '(') { + token_add(state, buffer); + buffer_clean(); + state = STATE_OPEN; + buffer_add(chr); + } else if (chr == ')') { + token_add(state, buffer); + buffer_clean(); + state = STATE_CLOSE; + buffer_add(chr); + } else if (isspace(chr)) { + token_add(state, buffer); + buffer_clean(); + state = STATE_WHITESPACE; + buffer_add(chr); + } else if (isalnum(chr)) { + buffer_add(chr); + } else { + error("invalid char (STATE_ID)"); + } + } +}