Browse Source

Added basic AST parser, printTree() is used to show the generated tree

remotes/origin/HEAD
Seth Stubbs 6 months ago
parent
commit
4fb5aab146
  1. 4
      Makefile
  2. BIN
      bin/uxncle
  3. BIN
      bin/uxnscr
  4. 13
      notes
  5. 7
      src/main.c
  6. BIN
      src/main.o
  7. 2
      src/ulex.c
  8. BIN
      src/ulex.o
  9. 121
      src/uparse.c
  10. 5
      src/uparse.h
  11. BIN
      src/uparse.o

4
Makefile

@ -1,13 +1,15 @@
CC=clang
CFLAGS=-fPIE -Wall -O3 -Isrc -std=c99
LDFLAGS=-lm #-fsanitize=address
OUT=bin/uxnscr
OUT=bin/uxncle
CHDR=\
src/ulex.h\
src/uparse.h\
CSRC=\
src/ulex.c\
src/uparse.c\
src/main.c
COBJ=$(CSRC:.c=.o)

BIN
bin/uxncle

Binary file not shown.

BIN
bin/uxnscr

Binary file not shown.

13
notes

@ -0,0 +1,13 @@
5 + 3 * 2
left = 5
ADD: left = 5, right = ?
MUL: left = 3, right = 2
ADD: left = 5, right = MUL
ADD
5 MUL
3 2

7
src/main.c

@ -0,0 +1,7 @@
#include "uparse.h"
int main() {
UP_parseSource("5 + 3 * 2");
return 0;
}

BIN
src/main.o

Binary file not shown.

2
src/ulex.c

@ -99,7 +99,7 @@ UToken readNumber(ULexState *state) {
}
UToken readIdentifier(ULexState *state) {
while (!isEnd(state) && (isAlpha(peek(state)) || isNumerical(peek(state))))
while (!isEnd(state) && (isAlpha(peek(state)) || isNumeric(peek(state))))
next(state);
return makeToken(state, identifierType(state)); /* is it a reserved word? */

BIN
src/ulex.o

Binary file not shown.

121
src/uparse.c

@ -6,10 +6,11 @@ typedef enum {
PREC_ASSIGNMENT, // =
PREC_TERM, // + -
PREC_FACTOR, // * /
PREC_LITERAL, // literal values
PREC_PRIMARY // everything else
} Precedence;
typedef UASTNode* (*ParseFunc)(UParseState* pstate);
typedef UASTNode* (*ParseFunc)(UParseState *state, UASTNode *left, Precedence currPrec);
typedef struct {
ParseFunc prefix;
@ -17,10 +18,15 @@ typedef struct {
Precedence level;
} ParseRule;
UASTNode* parsePrecedence(UParseState *state, UASTNode *left, Precedence prec);
UASTNode* expression(UParseState *state);
ParseRule ruleTable[];
/* ==================================[[ generic helper functions ]]================================== */
UASTNode *newNode(UParseState *state, UASTNodeType type, UASTNode *left, UASTNode *right) {
UASTNode *node = UM_realloc(NULL, sizeof(UASTNode));
node->type = type;
node->left = left;
node->right = right;
@ -28,7 +34,7 @@ UASTNode *newNode(UParseState *state, UASTNodeType type, UASTNode *left, UASTNod
}
void errorAt(UToken *token, int line, const char *fmt, va_list args) {
print("Syntax error at '%*s' on line %d", token->len, token->str, line);
printf("Syntax error at '%*s' on line %d", token->len, token->str, line);
vprintf(fmt, args);
exit(0);
}
@ -40,13 +46,64 @@ void error(UParseState *state, const char *fmt, ...) {
va_end(args);
}
void next(UParseState *state) {
void advance(UParseState *state) {
state->previous = state->current;
state->current = UL_scanNext(&state->lstate);
}
UASTNode *binExpression(UParseState *state) {
int check(UParseState *state, UTokenType type) {
return state->current.type == type;
}
int match(UParseState *state, UTokenType type) {
if (!check(state, type))
return 0;
/* it matched! consume the token and return true */
advance(state);
return 1;
}
ParseRule* getRule(UTokenType type) {
return &ruleTable[type];
}
const char* getNodeType(UASTNodeType type) {
switch(type) {
case NODE_ADD: return "ADD";
case NODE_SUB: return "SUB";
case NODE_MUL: return "MUL";
case NODE_DIV: return "DIV";
case NODE_NUM: return "NUM";
default: return "err";
}
}
/* ==================================[[ parse functions ]]================================== */
UASTNode* number(UParseState *state, UASTNode *left, Precedence currPrec) {
int num = atoi(state->current.str);
return newNode(state, NODE_NUM, NULL, NULL);
}
UASTNode* binOperator(UParseState *state, UASTNode *left, Precedence currPrec) {
UASTNodeType type;
UASTNode *right;
/* grab the node type */
switch (state->previous.type) {
case TOKEN_PLUS: type = NODE_ADD; break;
case TOKEN_MINUS: type = NODE_SUB; break;
case TOKEN_STAR: type = NODE_MUL; break;
case TOKEN_SLASH: type = NODE_DIV; break;
default:
error(state, "Unknown binary operator '%*s'!", state->current.len, state->current.str);
return NULL;
}
/* grab the right node */
right = expression(state);
return newNode(state, type, left, right);
}
ParseRule ruleTable[] = {
@ -57,7 +114,7 @@ ParseRule ruleTable[] = {
/* literals */
{NULL, NULL, PREC_NONE}, /* TOKEN_IDENT */
{NULL, NULL, PREC_NONE}, /* TOKEN_NUMBER */
{number, NULL, PREC_LITERAL}, /* TOKEN_NUMBER */
{NULL, NULL, PREC_NONE}, /* TOKEN_LEFT_BRACE */
{NULL, NULL, PREC_NONE}, /* TOKEN_RIGHT_BRACE */
@ -67,18 +124,62 @@ ParseRule ruleTable[] = {
{NULL, NULL, PREC_NONE}, /* TOKEN_RIGHT_BRACKET */
{NULL, NULL, PREC_NONE}, /* TOKEN_COLON */
{NULL, NULL, PREC_NONE}, /* TOKEN_POUND */
{NULL, NULL, PREC_NONE}, /* TOKEN_PLUS */
{NULL, NULL, PREC_NONE}, /* TOKEN_MINUS */
{NULL, NULL, PREC_NONE}, /* TOKEN_SLASH */
{NULL, NULL, PREC_NONE}, /* TOKEN_STAR */
{NULL, binOperator, PREC_TERM}, /* TOKEN_PLUS */
{NULL, binOperator, PREC_TERM}, /* TOKEN_MINUS */
{NULL, binOperator, PREC_FACTOR}, /* TOKEN_SLASH */
{NULL, binOperator, PREC_FACTOR}, /* TOKEN_STAR */
{NULL, NULL, PREC_NONE}, /* TOKEN_EOF */
{NULL, NULL, PREC_NONE}, /* TOKEN_ERR */
};
UASTNode* parsePrecedence(UParseState *state, UASTNode *left, Precedence prec) {
ParseFunc func;
/* grab the prefix function */
advance(state);
func = getRule(state->previous.type)->prefix;
if (func == NULL) {
error(state, "Illegal syntax!");
return NULL;
}
left = func(state, left, prec);
while (prec <= getRule(state->current.type)->level) {
func = getRule(state->current.type)->infix;
advance(state);
left = func(state, left, prec);
}
return left;
}
UASTNode* expression(UParseState *state) {
return parsePrecedence(state, NULL, PREC_ASSIGNMENT);
}
UASTNode* statement(UParseState *state) {
/* TODO */
return NULL;
}
void printTree(UASTNode *node, int indent) {
printf("%*s%s\n", indent, "", getNodeType(node->type));
if (node->left)
printTree(node->left, indent-5);
if (node->right)
printTree(node->right, indent+5);
}
UASTNode *UP_parseSource(const char *src) {
UParseState state;
UL_initLexState(&state.lstate, src);
advance(&state);
UASTNode *tree = expression(&state);
printTree(tree, 8);
return NULL;
}

5
src/uparse.h

@ -5,7 +5,10 @@
typedef enum {
NODE_ADD,
NODE_SUB
NODE_SUB,
NODE_MUL,
NODE_DIV,
NODE_NUM
} UASTNodeType;
typedef struct s_UASTNode {

BIN
src/uparse.o

Binary file not shown.
Loading…
Cancel
Save