small C-like programming language targeting the Uxn virtual computer
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

225 lines
6.4 KiB

#include "umem.h"
#include "ulex.h"
typedef struct {
UTokenType type;
const char *word;
int len;
} UReservedWord;
UReservedWord reservedWords[] = {
{TOKEN_CHAR, "char", 4},
{TOKEN_INT, "int", 3},
{TOKEN_VOID, "void", 4},
{TOKEN_BOOL, "bool", 4},
{TOKEN_WHILE, "while", 5},
{TOKEN_FOR, "for", 3},
{TOKEN_PRINTINT, "prntint", 7},
{TOKEN_IF, "if", 2},
{TOKEN_ELSE, "else", 4},
};
void UL_initLexState(ULexState *state, const char *src) {
state->current = (char*)src;
state->line = 1;
state->last = TOKEN_ERR;
}
UToken makeToken(ULexState *state, UTokenType type) {
UToken tkn;
tkn.str = state->start;
tkn.len = state->current - state->start;
tkn.line = state->line;
tkn.type = type;
/* update the state's last token type */
state->last = type;
return tkn;
}
UToken makeError(ULexState *state, const char *msg) {
UToken tkn;
tkn.str = (char*)msg;
tkn.len = strlen(msg);
tkn.line = state->line;
tkn.type = TOKEN_ERR;
return tkn;
}
/* ==================================[[ char helper functions ]]================================== */
/* check if the current character is a null terminator */
int isEnd(ULexState *state) {
return *state->current == '\0';
}
/* increment the current pointer and return the previous character */
char next(ULexState *state) {
state->current++;
return state->current[-1];
}
char peek(ULexState *state) {
return *state->current;
}
char peekNext(ULexState *state) {
if (isEnd(state))
return '\0';
/* return the next character */
return state->current[1];
}
int charMatch(ULexState *state, char expected) {
if (isEnd(state) || *state->current != expected)
return 0;
/* it charMatched, so increment the currentChar and return true */
state->current++;
return 1;
}
int isAlpha(char c) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'; /* identifiers can have '_' */
}
int isNumeric(char c) {
return c >= '0' && c <= '9';
}
int isHex(char c) {
return isNumeric(c) || ((c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f'));
}
int isWhitespace(char c) {
return c == ' ' || c == '\n' || c == '\r' || c == '\t';
}
/* ==================================[[ parse long tokens ]]================================== */
void skipWhitespace(ULexState *state) {
/* consume all whitespace */
while (isWhitespace(peek(state))) {
/* if it's a new line, make sure we count it */
if (peek(state) == '\n')
state->line++;
next(state);
}
}
UTokenType identifierType(ULexState *state) {
int i;
int len = state->current - state->start;
/* walk through each reserved word and compare it */
for (i = 0; i < sizeof(reservedWords)/sizeof(UReservedWord); i++) {
if (reservedWords[i].len == len && !memcmp(state->start, reservedWords[i].word, len))
return reservedWords[i].type;
}
/* it wasn't found in the reserved word list */
return TOKEN_IDENT;
}
UToken readNumber(ULexState *state) {
switch(peek(state)) {
case 'x': /* hexadecimal number */
next(state);
/* consume hexadecimal */
while(isHex(peek(state)))
next(state);
return makeToken(state, TOKEN_HEX);
/* TODO: add binary encoding, eg. "0b01000101" */
default: break;/* its a normal number, fall through and continue parsing */
}
while (isNumeric(peek(state)))
next(state);
return makeToken(state, TOKEN_NUMBER);
}
UToken readIdentifier(ULexState *state) {
while (!isEnd(state) && (isAlpha(peek(state)) || isNumeric(peek(state))))
next(state);
return makeToken(state, identifierType(state)); /* is it a reserved word? */
}
int consumeCharacter(ULexState *state) {
char c = next(state);
if (c == '\\') {
switch(next(state)) {
case '\\': return '\\';
case 'n': return '\n';
case 't': return '\t';
case 'r': return '\r';
default:
return -1; /* error result */
}
}
return c;
}
UToken readCharacter(ULexState *state) {
if (isEnd(state))
return makeError(state, "Expected end to character literal!");
/* consume character */
if (consumeCharacter(state) == -1)
return makeError(state, "Unknown special character!");
if (next(state) != '\'')
return makeError(state, "Expected end to character literal!");
return makeToken(state, TOKEN_CHAR_LIT);
}
UToken UL_scanNext(ULexState *state) {
char c;
/* check if it's the end of the string */
if (isEnd(state))
return makeToken(state, TOKEN_EOF);
/* skip all whitespace characters then grab the next character */
skipWhitespace(state);
state->start = state->current;
c = next(state);
switch (c) {
/* single character tokens */
case '(': return makeToken(state, TOKEN_LEFT_PAREN);
case ')': return makeToken(state, TOKEN_RIGHT_PAREN);
case '{': return makeToken(state, TOKEN_LEFT_BRACE);
case '}': return makeToken(state, TOKEN_RIGHT_BRACE);
case '[': return makeToken(state, TOKEN_LEFT_BRACKET);
case ']': return makeToken(state, TOKEN_RIGHT_BRACKET);
case ';': return makeToken(state, TOKEN_COLON);
case '+': return makeToken(state, TOKEN_PLUS);
case '-': return makeToken(state, TOKEN_MINUS);
case '/': return makeToken(state, TOKEN_SLASH);
case '*': return makeToken(state, TOKEN_STAR);
case '>': return charMatch(state, '=') ? makeToken(state, TOKEN_GREATER_EQUAL) : makeToken(state, TOKEN_GREATER);
case '<': return charMatch(state, '=') ? makeToken(state, TOKEN_LESS_EQUAL) : makeToken(state, TOKEN_LESS);
case '=': return charMatch(state, '=') ? makeToken(state, TOKEN_EQUAL_EQUAL) : makeToken(state, TOKEN_EQUAL);
case '!': return charMatch(state, '=') ? makeToken(state, TOKEN_BANG_EQUAL) : makeToken(state, TOKEN_BANG);
case '\'': return readCharacter(state);
case '\0': return makeToken(state, TOKEN_EOF);
default:
if (isNumeric(c))
return readNumber(state);
/* its not a number, its probably a keyword or identifier */
if (isAlpha(c))
return readIdentifier(state);
}
/* it's none of those, so it's an unrecognized token */
return makeToken(state, TOKEN_UNREC);
}