Browse Source

Changed the way errors are reported, added TOKEN_CHAR_LIT

- basic character parsing support added to ulex.c
- UASTNode now includes it's corresponding UToken so errors can be better reported in uasm.c
remotes/origin/HEAD
Seth Stubbs 5 months ago
parent
commit
457a28c037
  1. 15
      src/uasm.c
  2. 59
      src/ulex.c
  3. 3
      src/ulex.h
  4. 47
      src/uparse.c
  5. 1
      src/uparse.h

15
src/uasm.c

@ -93,6 +93,15 @@ void cError(UCompState *state, const char *fmt, ...) {
exit(EXIT_FAILURE);
}
void cErrorNode(UCompState *state, UASTNode *node, const char *fmt, ...) {
va_list args;
va_start(args, fmt);
printf("Compiler error at '%.*s' on line %d\n\t", node->tkn.len, node->tkn.str, node->tkn.line);
vprintf(fmt, args);
va_end(args);
exit(EXIT_FAILURE);
}
void writeIntLit(UCompState *state, uint16_t lit) {
fprintf(state->out, "#%.4x ", lit);
state->pushed += SIZE_INT;
@ -252,7 +261,7 @@ UVarType compileAssignment(UCompState *state, UASTNode *node) {
/* make sure we can assign the value of this expression to this variable */
if (!compareVarTypes(state, expType, rawVar->type))
cError(state, "Cannot assign type '%s' to '%.*s' of type '%s'", getTypeName(expType), rawVar->len, rawVar->name, getTypeName(rawVar->type));
cErrorNode(state, node, "Cannot assign type '%s' to '%.*s' of type '%s'", getTypeName(expType), rawVar->len, rawVar->name, getTypeName(rawVar->type));
/* duplicate the value on the stack */
dupValue(state, expType);
@ -277,7 +286,7 @@ UVarType compileExpression(UCompState *state, UASTNode *node) {
rType = compileExpression(state, node->right);
if (lType != TYPE_NONE && rType != TYPE_NONE && !compareVarTypes(state, lType, rType))
cError(state, "lType '%s' doesn't match rType '%s'!", getTypeName(lType), getTypeName(rType));
cErrorNode(state, node, "lType '%s' doesn't match rType '%s'!", getTypeName(lType), getTypeName(rType));
switch(node->type) {
case NODE_ADD: doArith(state, "ADD", lType); break;
@ -308,7 +317,7 @@ void compileDeclaration(UCompState *state, UASTNode *node) {
if (node->left) {
type = compileExpression(state, node->left);
if (compareVarTypes(state, type, rawVar->type))
cError(state, "Cannot assign type '%s' to %.*s of type '%s'", getTypeName(type), rawVar->len, rawVar->name, getTypeName(rawVar->type));
cErrorNode(state, node, "Cannot assign type '%s' to %.*s of type '%s'", getTypeName(type), rawVar->len, rawVar->name, getTypeName(rawVar->type));
setIntVar(state, var->scope, var->var);
}
}

59
src/ulex.c

@ -21,14 +21,25 @@ void UL_initLexState(ULexState *state, const char *src) {
}
UToken makeToken(ULexState *state, UTokenType type) {
UToken token;
token.str = state->start;
token.len = state->current - state->start;
token.type = type;
UToken tkn;
tkn.str = state->start;
tkn.len = state->current - state->start;
tkn.line = state->line;
tkn.type = type;
/* update the state's last token type */
state->last = type;
return token;
return tkn;
}
UToken makeError(ULexState *state, const char *msg) {
UToken tkn;
tkn.str = (char*)msg;
tkn.len = strlen(msg);
tkn.line = state->line;
tkn.type = TOKEN_ERR;
return tkn;
}
/* ==================================[[ char helper functions ]]================================== */
@ -73,7 +84,8 @@ int isWhitespace(char c) {
void skipWhitespace(ULexState *state) {
/* consume all whitespace */
while (isWhitespace(peek(state))) {
if (peek(state) == '\n') /* if it's a new line, make sure we count it */
/* if it's a new line, make sure we count it */
if (peek(state) == '\n')
state->line++;
next(state);
}
@ -107,6 +119,36 @@ UToken readIdentifier(ULexState *state) {
return makeToken(state, identifierType(state)); /* is it a reserved word? */
}
int consumeCharacter(ULexState *state) {
char c = next(state);
if (c == '\\') {
switch(next(state)) {
case '\\': return '\\';
case 'n': return '\n';
case 't': return '\t';
case 'r': return '\r';
default:
return -1; /* error result */
}
}
return c;
}
UToken readCharacter(ULexState *state) {
if (isEnd(state))
return makeError(state, "Expected end to character literal!");
/* consume character */
if (consumeCharacter(state) == -1)
return makeError(state, "Unknown special character!");
if (next(state) != '\'')
return makeError(state, "Expected end to character literal!");
return makeToken(state, TOKEN_CHAR_LIT);
}
UToken UL_scanNext(ULexState *state) {
char c;
@ -133,6 +175,7 @@ UToken UL_scanNext(ULexState *state) {
case '/': return makeToken(state, TOKEN_SLASH);
case '*': return makeToken(state, TOKEN_STAR);
case ';': return makeToken(state, TOKEN_COLON);
case '\'': return readCharacter(state);
case '\0': return makeToken(state, TOKEN_EOF);
default:
if (isNumeric(c))
@ -143,6 +186,6 @@ UToken UL_scanNext(ULexState *state) {
return readIdentifier(state);
}
/* it's none of those, so it's an unrecognized token. return an error result for now */
return makeToken(state, TOKEN_ERR);
/* it's none of those, so it's an unrecognized token */
return makeToken(state, TOKEN_UNREC);
}

3
src/ulex.h

@ -11,6 +11,7 @@ typedef enum {
/* literals */
TOKEN_IDENT,
TOKEN_NUMBER,
TOKEN_CHAR_LIT,
TOKEN_LEFT_BRACE,
TOKEN_RIGHT_BRACE,
@ -27,6 +28,7 @@ typedef enum {
TOKEN_STAR,
TOKEN_EOF, /* end of file */
TOKEN_UNREC, /* unrecognized symbol */
TOKEN_ERR /* error type */
} UTokenType;
@ -34,6 +36,7 @@ typedef struct {
UTokenType type;
char *str;
int len;
int line;
} UToken;
typedef struct {

47
src/uparse.c

@ -50,27 +50,28 @@ void error(UParseState *state, const char *fmt, ...) {
va_end(args);
}
UASTNode *newBaseNode(UParseState *state, size_t size, UASTNodeType type, UASTNode *left, UASTNode *right) {
UASTNode *newBaseNode(UParseState *state, UToken tkn, size_t size, UASTNodeType type, UASTNode *left, UASTNode *right) {
UASTNode *node = UM_realloc(NULL, size);
node->type = type;
node->left = left;
node->right = right;
node->tkn = tkn;
return node;
}
UASTNode *newNode(UParseState *state, UASTNodeType type, UASTNode *left, UASTNode *right) {
return newBaseNode(state, sizeof(UASTNode), type, left, right);
UASTNode *newNode(UParseState *state, UToken tkn, UASTNodeType type, UASTNode *left, UASTNode *right) {
return newBaseNode(state, tkn, sizeof(UASTNode), type, left, right);
}
UASTNode *newNumNode(UParseState *state, UASTNode *left, UASTNode *right, int num) {
UASTIntNode *node = (UASTIntNode*)newBaseNode(state, sizeof(UASTIntNode), NODE_INTLIT, left, right);
UASTNode *newNumNode(UParseState *state, UToken tkn, UASTNode *left, UASTNode *right, int num) {
UASTIntNode *node = (UASTIntNode*)newBaseNode(state, tkn, sizeof(UASTIntNode), NODE_INTLIT, left, right);
node->num = num;
return (UASTNode*)node;
}
UASTNode *newScopeNode(UParseState *state, UASTNode *left, UASTNode *right, UScope *scope) {
UASTScopeNode *node = (UASTScopeNode*)newBaseNode(state, sizeof(UASTScopeNode), NODE_STATE_SCOPE, left, right);
UASTNode *newScopeNode(UParseState *state, UToken tkn, UASTNode *left, UASTNode *right, UScope *scope) {
UASTScopeNode *node = (UASTScopeNode*)newBaseNode(state, tkn, sizeof(UASTScopeNode), NODE_STATE_SCOPE, left, right);
node->scope = *scope;
return (UASTNode*)node;
}
@ -135,8 +136,11 @@ void advance(UParseState *state) {
printf("consumed '%.*s', with type %d\n", state->current.len, state->current.str, state->current.type);
if (state->current.type == TOKEN_ERR)
error(state, "unrecognized symbol '%.*s'!", state->current.len, state->current.str);
switch(state->current.type) {
case TOKEN_UNREC: error(state, "Unrecognized symbol '%.*s'!", state->current.len, state->current.str); break;
case TOKEN_ERR: error(state, "%.*s", state->current.len, state->current.str); break;
default: break;
}
}
int check(UParseState *state, UTokenType type) {
@ -165,19 +169,21 @@ ParseRule* getRule(UTokenType type) {
UASTNode* number(UParseState *state, UASTNode *left, Precedence currPrec) {
int num = str2int(state->previous.str, state->previous.len);
printf("got number %d! from token '%.*s' [%d]\n", num, state->previous.len, state->previous.str, state->previous.type);
return newNumNode(state, NULL, NULL, num);
return newNumNode(state, state->previous, NULL, NULL, num);
}
UASTNode* assignment(UParseState *state, UASTNode *left, Precedence currPrec) {
UToken tkn = state->previous;
if (left->type != NODE_VAR)
error(state, "Expected identifier before '='!");
UASTNode *right = expression(state);
return newNode(state, NODE_ASSIGN, left, right);
return newNode(state, tkn, NODE_ASSIGN, left, right);
}
UASTNode* binOperator(UParseState *state, UASTNode *left, Precedence currPrec) {
UASTNodeType type;
UToken tkn = state->previous;
UASTNode *right;
/* grab the node type */
@ -193,7 +199,7 @@ UASTNode* binOperator(UParseState *state, UASTNode *left, Precedence currPrec) {
/* grab the right node */
right = parsePrecedence(state, NULL, currPrec);
return newNode(state, type, left, right);
return newNode(state, tkn, type, left, right);
}
UASTNode* identifer(UParseState *state, UASTNode *left, Precedence currPrec) {
@ -204,7 +210,7 @@ UASTNode* identifer(UParseState *state, UASTNode *left, Precedence currPrec) {
error(state, "Identifer '%.*s' not found!", state->previous.len, state->previous.str);
/* finally, create the Var node */
nVar = (UASTVarNode*)newBaseNode(state, sizeof(UASTVarNode), NODE_VAR, NULL, NULL);
nVar = (UASTVarNode*)newBaseNode(state, state->previous, sizeof(UASTVarNode), NODE_VAR, NULL, NULL);
nVar->var = var->var;
nVar->scope = var->scope;
return (UASTNode*)nVar;
@ -220,6 +226,7 @@ ParseRule ruleTable[] = {
/* literals */
{identifer, NULL, PREC_LITERAL}, /* TOKEN_IDENT */
{number, NULL, PREC_LITERAL}, /* TOKEN_NUMBER */
{NULL, NULL, PREC_NONE}, /* TOKEN_CHAR_LIT */
{NULL, NULL, PREC_NONE}, /* TOKEN_LEFT_BRACE */
{NULL, NULL, PREC_NONE}, /* TOKEN_RIGHT_BRACE */
@ -236,6 +243,7 @@ ParseRule ruleTable[] = {
{NULL, binOperator, PREC_FACTOR}, /* TOKEN_STAR */
{NULL, NULL, PREC_NONE}, /* TOKEN_EOF */
{NULL, NULL, PREC_NONE}, /* TOKEN_UNREC */
{NULL, NULL, PREC_NONE}, /* TOKEN_ERR */
};
@ -286,8 +294,9 @@ UASTNode* parseScope(UParseState *state, int expectBrace) {
}
UASTNode* printStatement(UParseState *state) {
UToken tkn = state->previous;
/* make our statement node & return */
return newNode(state, NODE_STATE_PRNT, expression(state), NULL);
return newNode(state, tkn, NODE_STATE_PRNT, expression(state), NULL);
}
UASTNode* intStatement(UParseState *state) {
@ -302,7 +311,7 @@ UASTNode* intStatement(UParseState *state) {
var = newVar(state, TYPE_INT, state->previous.str, state->previous.len);
/* if it's assigned a value, evaluate the expression & set the left node, if not set it to NULL */
node = (UASTVarNode*)newBaseNode(state, sizeof(UASTVarNode), NODE_STATE_DECLARE_VAR, (match(state, TOKEN_EQUAL)) ? expression(state) : NULL, NULL);
node = (UASTVarNode*)newBaseNode(state, state->previous, sizeof(UASTVarNode), NODE_STATE_DECLARE_VAR, (match(state, TOKEN_EQUAL)) ? expression(state) : NULL, NULL);
node->var = var;
node->scope = state->sCount-1;
return (UASTNode*)node;
@ -310,10 +319,11 @@ UASTNode* intStatement(UParseState *state) {
UASTNode* scopeStatement(UParseState *state) {
UASTScopeNode *node;
UToken tkn = state->previous;
UScope *scope = newScope(state);
/* create scope node and copy the finished scope struct */
node = (UASTScopeNode*)newBaseNode(state, sizeof(UASTScopeNode), NODE_STATE_SCOPE, parseScope(state, 1), NULL);
node = (UASTScopeNode*)newBaseNode(state, tkn, sizeof(UASTScopeNode), NODE_STATE_SCOPE, parseScope(state, 1), NULL);
node->scope = *scope;
endScope(state);
@ -341,9 +351,10 @@ UASTNode* statement(UParseState *state) {
} else if (match(state, TOKEN_LEFT_BRACE)) {
node = scopeStatement(state);
} else {
UToken tkn = state->previous;
/* no statement match was found, just parse the expression */
node = expression(state);
node = newNode(state, NODE_STATE_EXPR, node, NULL);
node = newNode(state, tkn, NODE_STATE_EXPR, node, NULL);
}
if (!match(state, TOKEN_COLON))
@ -401,7 +412,7 @@ UASTNode *UP_parseSource(const char *src) {
scope = newScope(&state);
/* create scope node and copy the finished scope struct */
root = (UASTScopeNode*)newBaseNode(&state, sizeof(UASTScopeNode), NODE_STATE_SCOPE, parseScope(&state, 0), NULL);
root = (UASTScopeNode*)newBaseNode(&state, state.previous, sizeof(UASTScopeNode), NODE_STATE_SCOPE, parseScope(&state, 0), NULL);
root->scope = *scope;
endScope(&state);

1
src/uparse.h

@ -50,6 +50,7 @@ typedef struct {
typedef struct s_UASTNode {
UASTNodeType type;
UToken tkn;
struct s_UASTNode *left;
struct s_UASTNode *right;
} UASTNode;

Loading…
Cancel
Save