218 lines
5.3 KiB
C
218 lines
5.3 KiB
C
|
#include <assert.h>
|
||
|
#include <ctype.h>
|
||
|
#include <stddef.h>
|
||
|
#include <stdio.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <string.h>
|
||
|
|
||
|
enum State {
|
||
|
STATE_INIT,
|
||
|
STATE_WHITESPACE,
|
||
|
STATE_OPEN,
|
||
|
STATE_CLOSE,
|
||
|
STATE_ID,
|
||
|
};
|
||
|
|
||
|
struct Token {
|
||
|
struct Token *next;
|
||
|
enum State type;
|
||
|
char *val;
|
||
|
};
|
||
|
|
||
|
static char buffer[1024];
|
||
|
static size_t buffer_index = 0;
|
||
|
|
||
|
static enum State state = STATE_INIT;
|
||
|
static struct Token *tokens = NULL;
|
||
|
|
||
|
static void error(const char *msg);
|
||
|
static const char *state_str(enum State state);
|
||
|
static void buffer_add(char chr);
|
||
|
static void buffer_clean();
|
||
|
static void token_add(enum State type, char *val);
|
||
|
static void lex(char chr);
|
||
|
|
||
|
int main()
|
||
|
{
|
||
|
char chr;
|
||
|
while ((chr = getchar()) != EOF) {
|
||
|
lex(chr);
|
||
|
}
|
||
|
|
||
|
printf("Tokens:\n");
|
||
|
|
||
|
for (const struct Token *token = tokens; token; token = token->next) {
|
||
|
printf("%s:%s;\n", state_str(token->type), token->val);
|
||
|
}
|
||
|
|
||
|
exit(EXIT_SUCCESS);
|
||
|
}
|
||
|
|
||
|
void error(const char *msg)
|
||
|
{
|
||
|
printf("ERROR: %s\n", msg);
|
||
|
exit(EXIT_FAILURE);
|
||
|
}
|
||
|
|
||
|
const char *state_str(enum State state)
|
||
|
{
|
||
|
switch (state) {
|
||
|
case STATE_INIT: return "STATE_INIT";
|
||
|
case STATE_WHITESPACE: return "STATE_WHITESPACE";
|
||
|
case STATE_OPEN: return "STATE_OPEN";
|
||
|
case STATE_CLOSE: return "STATE_CLOSE";
|
||
|
case STATE_ID: return "STATE_ID";
|
||
|
}
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
void buffer_add(char chr)
|
||
|
{
|
||
|
if (buffer_index >= 1000) error("token too long");
|
||
|
buffer[buffer_index++] = chr;
|
||
|
buffer[buffer_index] = 0;
|
||
|
}
|
||
|
|
||
|
void buffer_clean()
|
||
|
{
|
||
|
buffer_index = 0;
|
||
|
}
|
||
|
|
||
|
void token_add(enum State type, char *val)
|
||
|
{
|
||
|
struct Token *token = malloc(sizeof(struct Token));
|
||
|
assert(token);
|
||
|
token->next = NULL;
|
||
|
token->type = type;
|
||
|
token->val = malloc(strlen(val) + 1);
|
||
|
assert(token->val);
|
||
|
strcpy(token->val, val);
|
||
|
|
||
|
if (!tokens) {
|
||
|
tokens = token;
|
||
|
} else {
|
||
|
for (struct Token *curr = tokens; curr; curr = curr->next) {
|
||
|
if (!curr->next) {
|
||
|
curr->next = token;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
void lex(char chr)
|
||
|
{
|
||
|
switch (state) {
|
||
|
case STATE_INIT:
|
||
|
if (chr == '(') {
|
||
|
state = STATE_OPEN;
|
||
|
buffer_add(chr);
|
||
|
} else if (chr == ')') {
|
||
|
state = STATE_CLOSE;
|
||
|
buffer_add(chr);
|
||
|
} else if (isspace(chr)) {
|
||
|
state = STATE_WHITESPACE;
|
||
|
buffer_add(chr);
|
||
|
} else if (isalpha(chr)) {
|
||
|
state = STATE_ID;
|
||
|
buffer_add(chr);
|
||
|
} else {
|
||
|
error("invalid char (STATE_INIT)");
|
||
|
}
|
||
|
break;
|
||
|
case STATE_WHITESPACE:
|
||
|
if (chr == '(') {
|
||
|
token_add(state, buffer);
|
||
|
buffer_clean();
|
||
|
state = STATE_OPEN;
|
||
|
buffer_add(chr);
|
||
|
} else if (chr == ')') {
|
||
|
token_add(state, buffer);
|
||
|
buffer_clean();
|
||
|
state = STATE_CLOSE;
|
||
|
buffer_add(chr);
|
||
|
} else if (isspace(chr)) {
|
||
|
buffer_add(chr);
|
||
|
} else if (isalpha(chr)) {
|
||
|
token_add(state, buffer);
|
||
|
buffer_clean();
|
||
|
state = STATE_ID;
|
||
|
buffer[buffer_index++] = chr;
|
||
|
} else {
|
||
|
error("invalid char (STATE_WHITESPACE)");
|
||
|
}
|
||
|
break;
|
||
|
case STATE_OPEN:
|
||
|
if (chr == '(') {
|
||
|
token_add(state, buffer);
|
||
|
buffer_clean();
|
||
|
state = STATE_OPEN;
|
||
|
buffer_add(chr);
|
||
|
} else if (chr == ')') {
|
||
|
token_add(state, buffer);
|
||
|
buffer_clean();
|
||
|
state = STATE_CLOSE;
|
||
|
buffer_add(chr);
|
||
|
} else if (isspace(chr)) {
|
||
|
token_add(state, buffer);
|
||
|
buffer_clean();
|
||
|
state = STATE_WHITESPACE;
|
||
|
buffer_add(chr);
|
||
|
} else if (isalpha(chr)) {
|
||
|
token_add(state, buffer);
|
||
|
buffer_clean();
|
||
|
state = STATE_ID;
|
||
|
buffer_add(chr);
|
||
|
} else {
|
||
|
error("invalid char (STATE_OPEN)");
|
||
|
}
|
||
|
break;
|
||
|
case STATE_CLOSE:
|
||
|
if (chr == '(') {
|
||
|
token_add(state, buffer);
|
||
|
buffer_clean();
|
||
|
state = STATE_OPEN;
|
||
|
buffer_add(chr);
|
||
|
} else if (chr == ')') {
|
||
|
token_add(state, buffer);
|
||
|
buffer_clean();
|
||
|
state = STATE_CLOSE;
|
||
|
buffer_add(chr);
|
||
|
} else if (isspace(chr)) {
|
||
|
token_add(state, buffer);
|
||
|
buffer_clean();
|
||
|
state = STATE_WHITESPACE;
|
||
|
buffer_add(chr);
|
||
|
} else if (isalpha(chr)) {
|
||
|
token_add(state, buffer);
|
||
|
buffer_clean();
|
||
|
state = STATE_ID;
|
||
|
buffer_add(chr);
|
||
|
} else {
|
||
|
error("invalid char (STATE_CLOSE)");
|
||
|
}
|
||
|
break;
|
||
|
case STATE_ID:
|
||
|
if (chr == '(') {
|
||
|
token_add(state, buffer);
|
||
|
buffer_clean();
|
||
|
state = STATE_OPEN;
|
||
|
buffer_add(chr);
|
||
|
} else if (chr == ')') {
|
||
|
token_add(state, buffer);
|
||
|
buffer_clean();
|
||
|
state = STATE_CLOSE;
|
||
|
buffer_add(chr);
|
||
|
} else if (isspace(chr)) {
|
||
|
token_add(state, buffer);
|
||
|
buffer_clean();
|
||
|
state = STATE_WHITESPACE;
|
||
|
buffer_add(chr);
|
||
|
} else if (isalnum(chr)) {
|
||
|
buffer_add(chr);
|
||
|
} else {
|
||
|
error("invalid char (STATE_ID)");
|
||
|
}
|
||
|
}
|
||
|
}
|