lesson-lisp/main.c

#include "enums.h"

#include <assert.h>
#include <ctype.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

struct Token {
    struct Token *next;
    enum State type;
    char *val;
};

struct Object {
  enum Type type;
  union {
    // For PAIR
    struct Object *a, *b;
    // For ATOM, STRING
    char *s;
    // For NUMBER
    int64_t i;
  };
};

static char buffer[1024];
static size_t buffer_index = 0;

static enum State state = STATE_INIT;
static struct Token *tokens = NULL;

static void error(const char *msg);

/*********
 * Lexer *
 *********/

static void buffer_add(char chr);
static void buffer_clean();
static void token_add(enum State type, char *val);
static void lex(char chr);

/**********
 * Parser *
 **********/

static void shift_token();
static struct Object *prepare_object(enum Type type);
static void expect(enum State state);

static struct Object *parse();
static struct Object *expr();
static struct Object *parens();
static struct Object *parens_part();

/********
 * Eval *
 ********/

static struct Object *eval(struct Object *program);

static struct Object *func_sum(struct Object *numbers);

/*******************
 * Implementations *
 *******************/

int main()
{
    char chr;
    while ((chr = getchar()) != EOF) {
        lex(chr);
    }

    printf("Tokens:\n");

    for (const struct Token *token = tokens; token; token = token->next) {
        printf("%s:%s;\n", State_to_str(token->type), token->val);
    }

    struct Object *const program = parse();
    struct Object *const result = eval(program);

    exit(EXIT_SUCCESS);
}

void error(const char *msg)
{
    printf("ERROR: %s\n", msg);
    exit(EXIT_FAILURE);
}

/*********
 * Lexer *
 *********/

void buffer_add(char chr)
{
    if (buffer_index >= 1000) error("token too long");
    buffer[buffer_index++] = chr;
    buffer[buffer_index] = 0;
}

void buffer_clean()
{
    buffer_index = 0;
}

void token_add(enum State type, char *val)
{
    if (type == STATE_WHITESPACE) return;

    struct Token *token = malloc(sizeof(struct Token));
    assert(token);
    token->next = NULL;
    token->type = type;
    token->val = malloc(strlen(val) + 1);
    assert(token->val);
    strcpy(token->val, val);

    if (!tokens) {
        tokens = token;
    } else {
        for (struct Token *curr = tokens; curr; curr = curr->next) {
            if (!curr->next) {
                curr->next = token;
                break;
            }
        }
    }
}

void lex(char chr)
{
    switch (state) {
    case STATE_INIT:
        if (chr == '(') {
            state = STATE_OPEN;
            buffer_add(chr);
        } else if (chr == ')') {
            state = STATE_CLOSE;
            buffer_add(chr);
        } else if (isspace(chr)) {
            state = STATE_WHITESPACE;
            buffer_add(chr);
        } else if (isalpha(chr)) {
            state = STATE_ID;
            buffer_add(chr);
        } else if (isdigit(chr)) {
            state = STATE_NUM;
            buffer_add(chr);
        } else {
            error("invalid char (STATE_INIT)");
        }
        break;
    case STATE_WHITESPACE:
        if (chr == '(') {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_OPEN;
            buffer_add(chr);
        } else if (chr == ')') {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_CLOSE;
            buffer_add(chr);
        } else if (isspace(chr)) {
            buffer_add(chr);
        } else if (isalpha(chr)) {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_ID;
            buffer[buffer_index++] = chr;
        } else if (isdigit(chr)) {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_NUM;
            buffer[buffer_index++] = chr;
        } else {
            error("invalid char (STATE_WHITESPACE)");
        }
        break;
    case STATE_OPEN:
        if (chr == '(') {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_OPEN;
            buffer_add(chr);
        } else if (chr == ')') {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_CLOSE;
            buffer_add(chr);
        } else if (isspace(chr)) {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_WHITESPACE;
            buffer_add(chr);
        } else if (isalpha(chr)) {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_ID;
            buffer_add(chr);
        } else if (isdigit(chr)) {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_NUM;
            buffer_add(chr);
        } else {
            error("invalid char (STATE_OPEN)");
        }
        break;
    case STATE_CLOSE:
        if (chr == '(') {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_OPEN;
            buffer_add(chr);
        } else if (chr == ')') {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_CLOSE;
            buffer_add(chr);
        } else if (isspace(chr)) {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_WHITESPACE;
            buffer_add(chr);
        } else if (isalpha(chr)) {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_ID;
            buffer_add(chr);
        } else if (isdigit(chr)) {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_NUM;
            buffer_add(chr);
        } else {
            error("invalid char (STATE_CLOSE)");
        }
        break;
    case STATE_ID:
        if (chr == '(') {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_OPEN;
            buffer_add(chr);
        } else if (chr == ')') {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_CLOSE;
            buffer_add(chr);
        } else if (isspace(chr)) {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_WHITESPACE;
            buffer_add(chr);
        } else if (isalnum(chr)) {
            buffer_add(chr);
        } else {
            error("invalid char (STATE_ID)");
        }
        break;
    case STATE_NUM:
        if (chr == '(') {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_OPEN;
            buffer_add(chr);
        } else if (chr == ')') {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_CLOSE;
            buffer_add(chr);
        } else if (isspace(chr)) {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_WHITESPACE;
            buffer_add(chr);
        } else if (isdigit(chr)) {
            buffer_add(chr);
        } else {
            error("invalid char (STATE_ID)");
        }
        break;
    }
}

/**********
 * Parser *
 **********/

void shift_token()
{
    struct Token *const cur_token = tokens;
    tokens = tokens->next;
    free(cur_token);
}

struct Object *prepare_object(const enum Type type)
{
    struct Object *const object = malloc(sizeof(struct Object));
    assert(object);
    memset(object, 0, sizeof(struct Object));
    object->type = type;
    return object;
}

void expect(const enum State state)
{
    if (!tokens) error("no tokens in expect");
    if (tokens->type != state) error("no expected");
    shift_token();
}

struct Object *parse()
{
    return parens();
}

struct Object *expr()
{
    if (!tokens) error("no tokens");

    switch (tokens->type) {
    case STATE_OPEN: return parens();
    case STATE_ID:
    {
        struct Object *const object = prepare_object(TYPE_ATOM);
        object->s = tokens->val;
        shift_token();
        return object;
    }
    case STATE_NUM:
    {
        struct Object *const object = prepare_object(TYPE_NUMBER);
        object->i = atoll(tokens->val);
        shift_token();
        return object;
    }
    default:
        error("invalid expr");
        return NULL;
    }
}

struct Object *parens()
{
    expect(STATE_OPEN);
    struct Object *const object = parens_part();
    expect(STATE_CLOSE);
    return object;
}

struct Object *parens_part()
{
    struct Object *const object = prepare_object(TYPE_PAIR);

    if (tokens && tokens->type != STATE_CLOSE) {
        object->a = expr();
    }

    if (tokens && tokens->type != STATE_CLOSE) {
        object->b = parens_part();
    } else {
        object->b = NULL;
    }

    return object;
}

/********
 * Eval *
 ********/

struct Object *eval(struct Object *const program)
{
    if (program->type != TYPE_PAIR) {
        error("eval expects pair");
        return NULL;
    }

    if (!program->a || program->a->type != TYPE_ATOM) {
        error("eval expects atom");
        return NULL;
    }

    if (strcmp(program->a->s, "sum") != 0) {
        error("unknown func");
        return NULL;
    }

    return func_sum(program->b);
}

struct Object *func_sum(struct Object *const numbers)
{
    struct Object *const object = prepare_object(TYPE_NUMBER);
    object->i = 0;

    if (numbers) {
        if (numbers->type == TYPE_NUMBER) {
            object->i = numbers->i;
        } else if (numbers->type == TYPE_PAIR) {
            if (numbers->a->type != TYPE_NUMBER) {
                error("type error");
                return NULL;
            }

            object->i = numbers->a->i;

            struct Object *b = eval(numbers->b);

            if (numbers->b->type != TYPE_NUMBER) {
                error("type error");
                return NULL;
            }

            object->i += b->i;
        } else {
            error("type error");
            return NULL;
        }
    }

    return object;
}