lesson-lisp/main.c

#include "enums.h"
#include "object.h"
#include "tokens.h"

#include <assert.h>
#include <ctype.h>
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

static char buffer[1024];
static size_t buffer_index = 0;

static enum State state = STATE_INIT;

static void error(const char *msg);

/*********
 * Lexer *
 *********/

static void buffer_add(char chr);
static void buffer_clean();
static void token_add(enum State state, char *val);
static void lex(char chr);

/**********
 * Parser *
 **********/

static void expect(enum TokenType token_type);

static struct Object *parse();
static struct Object *expr();
static struct Object *parens();
static struct Object *parens_part();

/********
 * Eval *
 ********/

static struct Object *eval(struct Object *program);

static struct Object *func_sum(struct Object *numbers);

/*******************
 * Implementations *
 *******************/

int main()
{
    char chr;
    while ((chr = getchar()) != EOF) {
        lex(chr);
    }

    printf("Tokens:\n");

    for (
        const struct Tokens *token = tokens_top();
        token;
        token = token->next
    ) {
        printf("%s:%s;\n", TokenType_to_str(token->type), token->val);
    }

    struct Object *const program = parse();

    printf("\nProgram:\n");
    Object_print(program, 0);

    struct Object *const result = eval(program);

    printf("\nResult:\n");
    Object_print(result, 0);

    exit(EXIT_SUCCESS);
}

void error(const char *msg)
{
    printf("ERROR: %s\n", msg);
    exit(EXIT_FAILURE);
}

/*********
 * Lexer *
 *********/

void buffer_add(char chr)
{
    if (buffer_index >= 1000) error("token too long");
    buffer[buffer_index++] = chr;
    buffer[buffer_index] = 0;
}

void buffer_clean()
{
    buffer_index = 0;
}

void token_add(enum State state, char *val)
{
    if (state == STATE_WHITESPACE) return;

    enum TokenType token_type;
    if (!State_to_token_type(state, &token_type)) error("invalid state");

    tokens_push(token_type, val);
}

void lex(char chr)
{
    switch (state) {
    case STATE_INIT:
        if (chr == '(') {
            state = STATE_OPEN;
            buffer_add(chr);
        } else if (chr == ')') {
            state = STATE_CLOSE;
            buffer_add(chr);
        } else if (isspace(chr)) {
            state = STATE_WHITESPACE;
            buffer_add(chr);
        } else if (isalpha(chr)) {
            state = STATE_ATOM;
            buffer_add(chr);
        } else if (isdigit(chr)) {
            state = STATE_NUM;
            buffer_add(chr);
        } else {
            error("invalid char (STATE_INIT)");
        }
        break;
    case STATE_WHITESPACE:
        if (chr == '(') {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_OPEN;
            buffer_add(chr);
        } else if (chr == ')') {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_CLOSE;
            buffer_add(chr);
        } else if (isspace(chr)) {
            buffer_add(chr);
        } else if (isalpha(chr)) {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_ATOM;
            buffer[buffer_index++] = chr;
        } else if (isdigit(chr)) {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_NUM;
            buffer[buffer_index++] = chr;
        } else {
            error("invalid char (STATE_WHITESPACE)");
        }
        break;
    case STATE_OPEN:
        if (chr == '(') {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_OPEN;
            buffer_add(chr);
        } else if (chr == ')') {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_CLOSE;
            buffer_add(chr);
        } else if (isspace(chr)) {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_WHITESPACE;
            buffer_add(chr);
        } else if (isalpha(chr)) {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_ATOM;
            buffer_add(chr);
        } else if (isdigit(chr)) {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_NUM;
            buffer_add(chr);
        } else {
            error("invalid char (STATE_OPEN)");
        }
        break;
    case STATE_CLOSE:
        if (chr == '(') {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_OPEN;
            buffer_add(chr);
        } else if (chr == ')') {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_CLOSE;
            buffer_add(chr);
        } else if (isspace(chr)) {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_WHITESPACE;
            buffer_add(chr);
        } else if (isalpha(chr)) {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_ATOM;
            buffer_add(chr);
        } else if (isdigit(chr)) {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_NUM;
            buffer_add(chr);
        } else {
            error("invalid char (STATE_CLOSE)");
        }
        break;
    case STATE_ATOM:
        if (chr == '(') {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_OPEN;
            buffer_add(chr);
        } else if (chr == ')') {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_CLOSE;
            buffer_add(chr);
        } else if (isspace(chr)) {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_WHITESPACE;
            buffer_add(chr);
        } else if (isalnum(chr)) {
            buffer_add(chr);
        } else {
            error("invalid char (STATE_ATOM)");
        }
        break;
    case STATE_NUM:
        if (chr == '(') {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_OPEN;
            buffer_add(chr);
        } else if (chr == ')') {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_CLOSE;
            buffer_add(chr);
        } else if (isspace(chr)) {
            token_add(state, buffer);
            buffer_clean();
            state = STATE_WHITESPACE;
            buffer_add(chr);
        } else if (isdigit(chr)) {
            buffer_add(chr);
        } else {
            error("invalid char (STATE_NUM)");
        }
        break;
    }
}

/**********
 * Parser *
 **********/

void expect(const enum TokenType token_type)
{
    if (!tokens_expect(token_type)) error("expect");
}

struct Object *parse()
{
    return expr();
}

struct Object *expr()
{
    assert(tokens_top());

    switch (tokens_top()->type) {
    case TOKEN_OPEN: return parens();
    case TOKEN_ATOM:
    {
        struct Object *const object = Object_new_atom(tokens_top()->val);
        tokens_pop();
        return object;
    }
    case TOKEN_NUM:
    {
        struct Object *const object =
            Object_new_number(atoll(tokens_top()->val));
        tokens_pop();
        return object;
    }
    default:
        error("invalid expr");
        return NULL;
    }
}

struct Object *parens()
{
    expect(STATE_OPEN);
    struct Object *const object = parens_part();
    expect(STATE_CLOSE);
    return object;
}

struct Object *parens_part()
{
    assert(tokens_top());
    if (tokens_top()->type == TOKEN_CLOSE) return NULL;

    struct Object *const a = expr();
    assert(tokens_top());

    struct Object *b = NULL;
    if (tokens_top()->type != TOKEN_CLOSE) {
        b = parens_part();
    }

    struct Object *object = Object_new_pair(a, b);
    return object;
}

/********
 * Eval *
 ********/

struct Object *eval(struct Object *const program)
{
    // NULL is an empty list, can't eval
    if (!program) {
        error("can't eval null");
        return NULL;
    }

    // Almost everything evaluates to itself
    if (program->type != TYPE_PAIR && program->type != TYPE_ATOM) {
        return program;
    }

    // Atoms are variable names, but we can't lookup
    if (program->type == TYPE_ATOM) {
        error("can't eval atoms");
        return NULL;
    }

    // The first item of pair should be an atom - a function name
    if (!program->pair.a || program->pair.a->type != TYPE_ATOM) {
        error("eval expects atom");
        return NULL;
    }

    // The func "sum"
    if (strcmp(program->pair.a->s, "sum") == 0) {
        return func_sum(program->pair.b);
    }

    error("unknown func");
    return NULL;
}

struct Object *func_sum(struct Object *const numbers)
{
    struct Object *const object = Object_new_number(0);

    if (!numbers) return object;

    if (numbers->type != TYPE_PAIR) {
        error("sum expects pair");
        return NULL;
    }

    if (!numbers->pair.a) {
        error("type error");
        return NULL;
    }

    if (numbers->pair.a->type != TYPE_NUMBER) {
        error("type error");
        return NULL;
    }

    object->i = numbers->pair.a->i;

    if (!numbers->pair.b) return object;

    struct Object *const b_sum = func_sum(numbers->pair.b);

    object->i += b_sum->i;

    return object;
}