From 2e1b745624498da64022e2981d1ec6b8ba0fcc81 Mon Sep 17 00:00:00 2001 From: CPunch Date: Wed, 28 Oct 2020 00:16:30 -0500 Subject: [PATCH] Initial commit --- .gitignore | 2 + .vscode/settings.json | 12 + LICENSE.md | 21 + Makefile | 48 ++ README.md | 7 + fortest.lua | 13 + src/cbaselib.c | 18 + src/cbaselib.h | 9 + src/cchunk.c | 70 +++ src/cchunk.h | 39 ++ src/cdebug.c | 158 +++++++ src/cdebug.h | 9 + src/clex.c | 239 ++++++++++ src/clex.h | 88 ++++ src/cmem.c | 217 +++++++++ src/cmem.h | 47 ++ src/cobj.c | 209 +++++++++ src/cobj.h | 98 ++++ src/coperators.c | 1 + src/coperators.h | 59 +++ src/cosmo.h | 35 ++ src/cparse.c | 1017 +++++++++++++++++++++++++++++++++++++++++ src/cparse.h | 39 ++ src/cstate.c | 74 +++ src/cstate.h | 63 +++ src/ctable.c | 193 ++++++++ src/ctable.h | 29 ++ src/cvalue.c | 72 +++ src/cvalue.h | 62 +++ src/cvm.c | 417 +++++++++++++++++ src/cvm.h | 16 + src/main.c | 131 ++++++ test.lua | 21 + test.py | 7 + 34 files changed, 3540 insertions(+) create mode 100644 .gitignore create mode 100644 .vscode/settings.json create mode 100644 LICENSE.md create mode 100644 Makefile create mode 100644 README.md create mode 100644 fortest.lua create mode 100644 src/cbaselib.c create mode 100644 src/cbaselib.h create mode 100644 src/cchunk.c create mode 100644 src/cchunk.h create mode 100644 src/cdebug.c create mode 100644 src/cdebug.h create mode 100644 src/clex.c create mode 100644 src/clex.h create mode 100644 src/cmem.c create mode 100644 src/cmem.h create mode 100644 src/cobj.c create mode 100644 src/cobj.h create mode 100644 src/coperators.c create mode 100644 src/coperators.h create mode 100644 src/cosmo.h create mode 100644 src/cparse.c create mode 100644 src/cparse.h create mode 100644 src/cstate.c create mode 100644 src/cstate.h create mode 100644 src/ctable.c create mode 100644 src/ctable.h create mode 100644 src/cvalue.c create mode 100644 src/cvalue.h create mode 100644 src/cvm.c create mode 100644 src/cvm.h create mode 100644 src/main.c create mode 100644 test.lua create mode 100644 test.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7726904 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.o +bin \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..8ff69fd --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,12 @@ +{ + "files.associations": { + "array": "cpp", + "functional": "cpp", + "istream": "cpp", + "ostream": "cpp", + "tuple": "cpp", + "type_traits": "cpp", + "utility": "cpp", + "vector": "cpp" + } +} \ No newline at end of file diff --git a/LICENSE.md b/LICENSE.md new file mode 100644 index 0000000..b0d62e8 --- /dev/null +++ b/LICENSE.md @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 Seth Stubbs + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..3dceea8 --- /dev/null +++ b/Makefile @@ -0,0 +1,48 @@ +# make clean && make && ./bin/cosmo + +CC=clang +CFLAGS=-fPIE -O3 #-g3 +LDFLAGS=#-fsanitize=address +OUT=bin/cosmo + +CHDR=\ + src/cchunk.h\ + src/cdebug.h\ + src/clex.h\ + src/cmem.h\ + src/coperators.h\ + src/cosmo.h\ + src/cparse.h\ + src/cstate.h\ + src/cvalue.h\ + src/ctable.h\ + src/cvm.h\ + src/cobj.h\ + src/cbaselib.h\ + +CSRC=\ + src/cchunk.c\ + src/cdebug.c\ + src/clex.c\ + src/cmem.c\ + src/coperators.c\ + src/cparse.c\ + src/cstate.c\ + src/cvalue.c\ + src/ctable.c\ + src/cvm.c\ + src/cobj.c\ + src/cbaselib.c\ + src/main.c\ + +COBJ=$(CSRC:.c=.o) + +.c.o: + $(CC) -c $(CFLAGS) $< -o $@ + +$(OUT): $(COBJ) $(CHDR) + mkdir -p bin + $(CC) $(COBJ) $(LDFLAGS) -o $(OUT) + +clean: + rm -rf $(COBJ) $(OUT) \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..3861c90 --- /dev/null +++ b/README.md @@ -0,0 +1,7 @@ +# Cosmo +Cosmo is a portable scripting language loosely based off of Lua. Designed for embeddability, Cosmo will have a built-in C++ wrapper for ease of use for embedding in in C++ applications. + +# Why Cosmo? +While C++ wrappers for Lua exist (see: SolLua), they're all maintained by outside entitties while Cosmo writes it's own first party wrapper. Additionally, Cosmo is very easily modifiable having been written in clean C99 with well documented code; this makes it a great candidate for early language hackers and researchers alike. + +However Cosmo is not just a friendly developer tool, Cosmo's easy syntax and readability makes it a great scripting language for everyone to use. \ No newline at end of file diff --git a/fortest.lua b/fortest.lua new file mode 100644 index 0000000..5fdb5df --- /dev/null +++ b/fortest.lua @@ -0,0 +1,13 @@ +local function fact(num) + var total = 1 + for (var i = num; i > 0; i = i - 1) do + total = total * i + end + return total +end + +for (var x = 0; x < 1000; x=x+1) do + for (var z = 0; z < 100; z=z+1) do + print("The factorial of " .. z .. " is " .. fact(z)) + end +end \ No newline at end of file diff --git a/src/cbaselib.c b/src/cbaselib.c new file mode 100644 index 0000000..b265914 --- /dev/null +++ b/src/cbaselib.c @@ -0,0 +1,18 @@ +#include "cbaselib.h" +#include "cvalue.h" +#include "cobj.h" + +void cosmoB_loadlibrary(CState *state) { + cosmoV_register(state, "print", cosmoV_newObj(cosmoO_newCFunction(state, cosmoB_print))); +} + +int cosmoB_print(CState *state, int nargs, CValue *args) { + for (int i = 0; i < nargs; i++) { + CObjString *str = cosmoV_toString(state, args[i]); + printf("%s", cosmoO_readCString(str)); + } + + printf("\n"); + + return 0; // print doesn't return any args +} \ No newline at end of file diff --git a/src/cbaselib.h b/src/cbaselib.h new file mode 100644 index 0000000..34f3136 --- /dev/null +++ b/src/cbaselib.h @@ -0,0 +1,9 @@ +#ifndef COSMO_BASELIB +#define COSMO_BASELIB + +#include "cstate.h" + +COSMO_API void cosmoB_loadlibrary(CState *state); +COSMO_API int cosmoB_print(CState *state, int nargs, CValue *args); + +#endif \ No newline at end of file diff --git a/src/cchunk.c b/src/cchunk.c new file mode 100644 index 0000000..99ac4df --- /dev/null +++ b/src/cchunk.c @@ -0,0 +1,70 @@ +#include "cmem.h" +#include "cchunk.h" +#include "cvalue.h" +#include "cvm.h" + +CChunk *newChunk(CState* state, size_t startCapacity) { + CChunk *chunk = cosmoM_xmalloc(state, sizeof(CChunk)); + initChunk(state, chunk, startCapacity); + return chunk; +} + +void initChunk(CState* state, CChunk *chunk, size_t startCapacity) { + chunk->capacity = startCapacity; + chunk->lineCapacity = startCapacity; + chunk->count = 0; + chunk->buf = NULL; // when writeByteChunk is called, it'll allocate the array for us + chunk->lineInfo = NULL; + + // constants + initValArray(state, &chunk->constants, ARRAY_START); +} + +void cleanChunk(CState* state, CChunk *chunk) { + // first, free the chunk buffer + cosmoM_freearray(state, INSTRUCTION, chunk->buf, chunk->capacity); + // then the line info + cosmoM_freearray(state, int, chunk->lineInfo, chunk->capacity); + // free the constants + cleanValArray(state, &chunk->constants); +} + +void freeChunk(CState* state, CChunk *chunk) { + cleanChunk(state, chunk); + // now, free the wrapper struct + cosmoM_free(state, CChunk, chunk); +} + +int addConstant(CState* state, CChunk *chunk, CValue value) { + // before adding the constant, check if we already have it + for (int i = 0; i < chunk->constants.count; i++) { + if (cosmoV_equal(value, chunk->constants.values[i])) + return i; // we already have a matching constant! + } + + cosmoM_freezeGC(state); // so our GC doesn't free it + appendValArray(state, &chunk->constants, value); + cosmoM_unfreezeGC(state); + return chunk->constants.count - 1; // return the index of the new constants +} + +// ================================================================ [WRITE TO CHUNK] ================================================================ + +void writeu8Chunk(CState* state, CChunk *chunk, INSTRUCTION i, int line) { + // does the buffer need to be reallocated? + cosmoM_growarray(state, INSTRUCTION, chunk->buf, chunk->count, chunk->capacity); + cosmoM_growarray(state, int, chunk->lineInfo, chunk->count, chunk->lineCapacity); + + // write data to the chunk :) + chunk->lineInfo[chunk->count] = line; + chunk->buf[chunk->count++] = i; +} + +void writeu16Chunk(CState* state, CChunk *chunk, uint16_t i, int line) { + INSTRUCTION *buffer = (INSTRUCTION*)(&i); + int sz = sizeof(uint16_t) / sizeof(INSTRUCTION); + + for (int i = 0; i < sz; i++) { + writeu8Chunk(state, chunk, buffer[i], line); + } +} diff --git a/src/cchunk.h b/src/cchunk.h new file mode 100644 index 0000000..20507d5 --- /dev/null +++ b/src/cchunk.h @@ -0,0 +1,39 @@ +#ifndef CCHUNK_H +#define CCHUNK_H + +#include "cosmo.h" + +#include "coperators.h" +#include "cvalue.h" + +typedef struct CValueArray CValueArray; + +typedef struct CChunk { + size_t capacity; // the ammount of space we've allocated for + size_t count; // the space we're currently using + INSTRUCTION *buf; // whole chunk + CValueArray constants; // holds constants + size_t lineCapacity; + int *lineInfo; +} CChunk; + +CChunk *newChunk(CState* state, size_t startCapacity); +void initChunk(CState* state, CChunk *chunk, size_t startCapacity); +void cleanChunk(CState* state, CChunk *chunk); // free's everything but the struct +void freeChunk(CState* state, CChunk *chunk); // free's everything including the struct +int addConstant(CState* state, CChunk *chunk, CValue value); + +// write to chunk +void writeu8Chunk(CState* state, CChunk *chunk, INSTRUCTION i, int line); +void writeu16Chunk(CState* state, CChunk *chunk, uint16_t i, int line); + +// read from chunk +static inline INSTRUCTION readu8Chunk(CChunk *chunk, int offset) { + return chunk->buf[offset]; +} + +static inline uint16_t readu16Chunk(CChunk *chunk, int offset) { + return *((uint16_t*)(&chunk->buf[offset])); +} + +#endif \ No newline at end of file diff --git a/src/cdebug.c b/src/cdebug.c new file mode 100644 index 0000000..4d511c6 --- /dev/null +++ b/src/cdebug.c @@ -0,0 +1,158 @@ +#include "cdebug.h" +#include "cvalue.h" +#include "cobj.h" + +void printIndent(int indent) { + for (int i = 0; i < indent; i++) + printf("\t"); +} + +int simpleInstruction(const char *name, int offset) { + printf("%s", name); + return offset + 1; // consume opcode +} + +int shortOperandInstruction(const char *name, CChunk *chunk, int offset) { + printf("%-16s [%03d]", name, readu8Chunk(chunk, offset + 1)); + return offset + 2; +} + +int longOperandInstruction(const char *name, CChunk *chunk, int offset) { + printf("%-16s [%05d]", name, readu16Chunk(chunk, offset + 1)); + return offset + 1 + (sizeof(uint16_t) / sizeof(INSTRUCTION)); +} + +int constInstruction(const char *name, CChunk *chunk, int offset, int indent) { + int index = readu16Chunk(chunk, offset + 1); + printf("%-16s [%05d] - ", name, index); + CValue val = chunk->constants.values[index]; + + printValue(val); + + return offset + 1 + (sizeof(uint16_t) / sizeof(INSTRUCTION)); // consume opcode + uint +} + +int ABOperandInstruction(const char *name, CChunk *chunk, int offset) { + int args = readu8Chunk(chunk, offset + 1); + int nresults = readu8Chunk(chunk, offset + 2); + + printf("%-16s [%03d] [%03d]", name, args, nresults); + return offset + 3; +} + +// public methods in the cdebug.h header + +void disasmChunk(CChunk *chunk, const char *name, int indent) { + printIndent(indent); + printf("===[[ %s ]]===\n", name); + + for (int offset = 0; offset < chunk->count;) { + offset = disasmInstr(chunk, offset, indent); + printf("\n"); + } +} + +int disasmInstr(CChunk *chunk, int offset, int indent) { + printIndent(indent); + printf("%04d ", offset); + + INSTRUCTION i = chunk->buf[offset]; + int line = chunk->lineInfo[offset]; + + if (offset > 0 && line == chunk->lineInfo[offset - 1]) { + printf(" | "); + } else { + printf("%4d ", line); + } + + switch (i) { + case OP_LOADCONST: + return constInstruction("OP_LOADCONST", chunk, offset, indent); + case OP_SETGLOBAL: + return constInstruction("OP_SETGLOBAL", chunk, offset, indent); + case OP_GETGLOBAL: + return constInstruction("OP_GETGLOBAL", chunk, offset, indent); + case OP_SETLOCAL: + return shortOperandInstruction("OP_SETLOCAL", chunk, offset); + case OP_GETLOCAL: + return shortOperandInstruction("OP_GETLOCAL", chunk, offset); + case OP_SETUPVAL: + return shortOperandInstruction("OP_SETUPVAL", chunk, offset); + case OP_GETUPVAL: + return shortOperandInstruction("OP_GETUPVAL", chunk, offset); + case OP_PEJMP: + return longOperandInstruction("OP_PEJMP", chunk, offset); + case OP_EJMP: + return longOperandInstruction("OP_EJMP", chunk, offset); + case OP_JMP: + return longOperandInstruction("OP_JMP", chunk, offset); + case OP_JMPBACK: + return longOperandInstruction("OP_JMPBACK", chunk, offset); + case OP_POP: + return shortOperandInstruction("OP_POP", chunk, offset); + case OP_CALL: + return ABOperandInstruction("OP_CALL", chunk, offset); + case OP_CLOSURE: { + int index = readu16Chunk(chunk, offset + 1); + printf("%-16s [%05d] - ", "OP_CLOSURE", index); + CValue val = chunk->constants.values[index]; + CObjFunction *cobjFunc = (CObjFunction*)val.val.obj; + offset += 3; // we consumed the opcode + u16 + + printValue(val); + printf("\n"); + + // list the upvalues/locals that are captured + for (int i = 0; i < cobjFunc->upvals; i++) { + uint8_t encoding = readu8Chunk(chunk, offset++); + uint8_t index = readu8Chunk(chunk, offset++); + printIndent(indent + 1); + printf("references %s [%d]\n", encoding == OP_GETLOCAL ? "local" : "upvalue", index); + } + + // print the chunk + disasmChunk(&cobjFunc->chunk, cobjFunc->name == NULL ? UNNAMEDCHUNK : cobjFunc->name->str, indent+1); + return offset; + } + case OP_CLOSE: + return simpleInstruction("OP_CLOSE", offset); + case OP_ADD: + return simpleInstruction("OP_ADD", offset); + case OP_SUB: + return simpleInstruction("OP_SUB", offset); + case OP_MULT: + return simpleInstruction("OP_MULT", offset); + case OP_DIV: + return simpleInstruction("OP_DIV", offset); + case OP_TRUE: + return simpleInstruction("OP_TRUE", offset); + case OP_FALSE: + return simpleInstruction("OP_FALSE", offset); + case OP_NIL: + return simpleInstruction("OP_NIL", offset); + case OP_NOT: + return simpleInstruction("OP_NOT", offset); + case OP_EQUAL: + return simpleInstruction("OP_EQUAL", offset); + case OP_GREATER: + return simpleInstruction("OP_GREATER", offset); + case OP_GREATER_EQUAL: + return simpleInstruction("OP_GREATER_EQUAL", offset); + case OP_LESS: + return simpleInstruction("OP_LESS", offset); + case OP_LESS_EQUAL: + return simpleInstruction("OP_LESS_EQUAL", offset); + case OP_NEGATE: + return simpleInstruction("OP_NEGATE", offset); + case OP_CONCAT: + return shortOperandInstruction("OP_CONCAT", chunk, offset); + case OP_RETURN: + return shortOperandInstruction("OP_RETURN", chunk, offset); + default: + printf("Unknown opcode! [%d]\n", i); + exit(0); + } + + + return 1; +} \ No newline at end of file diff --git a/src/cdebug.h b/src/cdebug.h new file mode 100644 index 0000000..829fc03 --- /dev/null +++ b/src/cdebug.h @@ -0,0 +1,9 @@ +#ifndef CDEBUG_H +#define CDEBUG_H + +#include "cchunk.h" + +COSMO_API void disasmChunk(CChunk *chunk, const char *name, int indent); +COSMO_API int disasmInstr(CChunk *chunk, int offset, int indent); + +#endif \ No newline at end of file diff --git a/src/clex.c b/src/clex.c new file mode 100644 index 0000000..e388be2 --- /dev/null +++ b/src/clex.c @@ -0,0 +1,239 @@ +#include "clex.h" +#include "cmem.h" + +#include + +CReservedWord reservedWords[] = { + {TOKEN_AND, "and", 3}, + {TOKEN_DO, "do", 2}, + {TOKEN_ELSE, "else", 4}, + {TOKEN_ELSEIF, "elseif", 6}, + {TOKEN_END, "end", 3}, + {TOKEN_FALSE, "false", 5}, + {TOKEN_FOR, "for", 3}, + {TOKEN_FUNCTION, "function", 8}, + {TOKEN_IF, "if", 2}, + {TOKEN_LOCAL, "local", 5}, + {TOKEN_NIL, "nil", 3}, + {TOKEN_NOT, "not", 3}, + {TOKEN_OR, "or", 2}, + {TOKEN_RETURN, "return", 6}, + {TOKEN_THEN, "then", 4}, + {TOKEN_TRUE, "true", 4}, + {TOKEN_VAR, "var", 3}, + {TOKEN_WHILE, "while", 5} +}; + + +static CToken makeToken(CLexState *state, CTokenType type) { + CToken token; + token.type = type; + token.start = state->startChar; + token.length = state->currentChar - state->startChar; // delta between start & current + token.line = state->line; + + state->lastType = type; + + return token; +} + +static CToken makeError(CLexState *state, const char *msg) { + CToken token; + token.type = TOKEN_ERROR; + token.start = (char*)msg; + token.length = strlen(msg); + token.line = state->line; + + return token; +} + +static inline bool isEnd(CLexState *state) { + return state->isEnd; +} + +static inline bool isNumerical(char c) { + return c >= '0' && c <= '9'; +} + +static bool isAlpha(char c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'; // identifiers can have '_' +} + +static bool match(CLexState *state, char expected) { + if (isEnd(state) || *state->currentChar != expected) + return false; + + // it matched, so increment the currentChar and return true + state->currentChar++; + return true; +} + +char peek(CLexState *state) { + return *state->currentChar; +} + +static char peekNext(CLexState *state) { + if (isEnd(state)) + return '\0'; + + return state->currentChar[1]; +} + +char next(CLexState *state) { + state->currentChar++; + return state->currentChar[-1]; +} + +CTokenType identifierType(CLexState *state) { + int length = state->currentChar - state->startChar; + + // check against reserved word list + for (int i = 0; i < sizeof(reservedWords) / sizeof(CReservedWord); i++) { + // it matches the reserved word + if (reservedWords[i].len == length && memcmp(state->startChar, reservedWords[i].word, length) == 0) + return reservedWords[i].type; + } + + // else, it's an identifier + return TOKEN_IDENTIFIER; +} + +void skipWhitespace(CLexState *state) { + while (true) { + char c = peek(state); + switch (c) { + case ' ': + case '\r': + case '\t': + next(state); // consume the whitespace + break; + case '\n': // mark new line, make the main loop consume it + state->line++; + return; + case '-': // consume comments + if (peekNext(state) == '-') { + + // skip to next line (also let \n be consumed on the next iteration to properly handle that) + while (!isEnd(state) && peek(state) != '\n' && peek(state) != '\0') // if it's not a newline or null terminator + next(state); + + break; + } + return; // it's a TOKEN_SLASH, let the main body handle that + default: // it's no longer whitespace, return! + return; + } + } +} + +CToken parseString(CLexState *state) { + while (peek(state) != '"' && !isEnd(state)) { + if (peek(state) == '\n') // strings can't stretch across lines + return makeError(state, "Unterminated string!"); + next(state); // consume + } + + if (isEnd(state)) + return makeError(state, "Unterminated string!"); + + next(state); // consume closing quote + return makeToken(state, TOKEN_STRING); +} + +CToken parseNumber(CLexState *state) { + // consume number + while (isNumerical(peek(state))) + next(state); + + if (peek(state) == '.' && isNumerical(peekNext(state))) { + next(state); // consume '.' + + // consume number + while (isNumerical(peek(state))) + next(state); + } + + return makeToken(state, TOKEN_NUMBER); +} + +CToken parseIdentifier(CLexState *state) { + // read literal + while ((isAlpha(peek(state)) || isNumerical(peek(state))) && !isEnd(state)) + next(state); + + return makeToken(state, identifierType(state)); // is it a reserved word? +} + +CLexState *cosmoL_newLexState(CState *cstate, const char *source) { + CLexState *state = cosmoM_xmalloc(cstate, sizeof(CLexState)); + state->startChar = (char*)source; + state->currentChar = (char*)source; + state->line = 1; + state->lastLine = 0; + state->openedBraces = 0; + state->isEnd = false; + state->lastType = TOKEN_ERROR; + + return state; +} + +void cosmoL_freeLexState(CState *state, CLexState *lstate) { + cosmoM_free(state, CLexState, lstate); +} + +CToken cosmoL_scanToken(CLexState *state) { +_scanTokenEnter: + skipWhitespace(state); + + state->startChar = state->currentChar; + + if (isEnd(state)) + return makeToken(state, TOKEN_EOF); + + char c = next(state); + + switch (c) { + // single character tokens + case '(': state->openedBraces++; return makeToken(state, TOKEN_LEFT_PAREN); + case ')': state->openedBraces--; return makeToken(state, TOKEN_RIGHT_PAREN); + case '{': state->openedBraces++; return makeToken(state, TOKEN_LEFT_BRACE); + case '}': state->openedBraces--; return makeToken(state, TOKEN_RIGHT_BRACE); + case '\0': + state->isEnd = true; + if (state->lastType == TOKEN_EOS) + return makeToken(state, TOKEN_EOF); + // fall through + case ';': return makeToken(state, TOKEN_EOS); + case ',': return makeToken(state, TOKEN_COMMA); + case '+': return makeToken(state, TOKEN_PLUS); + case '-': return makeToken(state, TOKEN_MINUS); + case '*': return makeToken(state, TOKEN_STAR); + case '/': return makeToken(state, TOKEN_SLASH); + case '\n': { // might be treated like a TOKEN_EOS + if (state->openedBraces == 0 && state->lastType != TOKEN_EOS) + return makeToken(state, TOKEN_EOS); + else // go back to the start + goto _scanTokenEnter; + } + // two character tokens + case '.': + return match(state, '.') ? makeToken(state, TOKEN_DOT_DOT) : makeToken(state, TOKEN_DOT); + case '!': + return match(state, '=') ? makeToken(state, TOKEN_BANG_EQUAL) : makeToken(state, TOKEN_BANG); + case '=': + return match(state, '=') ? makeToken(state, TOKEN_EQUAL_EQUAL) : makeToken(state, TOKEN_EQUAL); + case '>': + return match(state, '=') ? makeToken(state, TOKEN_GREATER_EQUAL) : makeToken(state, TOKEN_GREATER); + case '<': + return match(state, '=') ? makeToken(state, TOKEN_LESS_EQUAL) : makeToken(state, TOKEN_LESS); + // literals + case '"': return parseString(state); + default: + if (isNumerical(c)) + return parseNumber(state); + if (isAlpha(c)) + return parseIdentifier(state); + } + + return makeError(state, "Unknown symbol!"); +} \ No newline at end of file diff --git a/src/clex.h b/src/clex.h new file mode 100644 index 0000000..141d7ef --- /dev/null +++ b/src/clex.h @@ -0,0 +1,88 @@ +#ifndef CLEX_H +#define CLEX_H + +#include "cosmo.h" + +typedef enum { + // single character tokens + TOKEN_LEFT_PAREN, + TOKEN_RIGHT_PAREN, + TOKEN_LEFT_BRACE, + TOKEN_RIGHT_BRACE, + TOKEN_COMMA, + TOKEN_DOT, + TOKEN_DOT_DOT, + TOKEN_MINUS, + TOKEN_PLUS, + TOKEN_SLASH, + TOKEN_STAR, + TOKEN_EOS, // end of statement + + // equality operators + TOKEN_BANG, + TOKEN_BANG_EQUAL, + TOKEN_EQUAL, + TOKEN_EQUAL_EQUAL, + TOKEN_GREATER, + TOKEN_GREATER_EQUAL, + TOKEN_LESS, + TOKEN_LESS_EQUAL, + + // literals + TOKEN_IDENTIFIER, + TOKEN_STRING, + TOKEN_NUMBER, + TOKEN_NIL, + TOKEN_TRUE, + TOKEN_FALSE, + + // keywords & reserved words + TOKEN_AND, + TOKEN_DO, + TOKEN_ELSE, + TOKEN_ELSEIF, + TOKEN_END, + TOKEN_FOR, + TOKEN_FUNCTION, + TOKEN_IF, + TOKEN_LOCAL, + TOKEN_NOT, + TOKEN_OR, + TOKEN_RETURN, + TOKEN_THEN, + TOKEN_VAR, + TOKEN_WHILE, + + TOKEN_ERROR, + TOKEN_EOF +} CTokenType; + +typedef struct { + CTokenType type; + const char *word; + int len; +} CReservedWord; + +typedef struct { + CTokenType type; + char *start; + int length; + int line; +} CToken; + +typedef struct { + char *currentChar; + char *startChar; + int line; // current line + int lastLine; // line of the previous consumed token + int openedBraces; // tracks open [], {}, or () + bool isEnd; + CTokenType lastType; +} CLexState; + +CLexState *cosmoL_newLexState(CState *state, const char *source); +void cosmoL_freeLexState(CState *state, CLexState *lstate); + +CToken cosmoL_scanToken(CLexState *state); + +#endif \ No newline at end of file diff --git a/src/cmem.c b/src/cmem.c new file mode 100644 index 0000000..066a88c --- /dev/null +++ b/src/cmem.c @@ -0,0 +1,217 @@ +#include "cmem.h" +#include "cstate.h" +#include "cvalue.h" +#include "ctable.h" +#include "cparse.h" +#include "cobj.h" + +/* + copy buffer to new larger buffer, and free the old buffer +*/ +void *cosmoM_reallocate(CState* state, void *buf, size_t oldSize, size_t newSize) { + state->allocatedBytes += newSize - oldSize; + + if (newSize == 0) { // it needs to be free'd + free(buf); + return NULL; + } + +#ifdef GC_STRESS + if (!(cosmoM_isFrozen(state)) && newSize > oldSize) { + cosmoM_collectGarbage(state); + } +#else + // if the state isn't frozen && we've reached the GC event + if (!(cosmoM_isFrozen(state)) && state->allocatedBytes > state->nextGC) { + cosmoM_collectGarbage(state); // cya lol + } +#endif + + // otherwise just use realloc to do all the heavy lifting + void *newBuf = realloc(buf, newSize); + + if (newBuf == NULL) { + CERROR("failed to allocate memory!"); + exit(1); + } + + return newBuf; +} + +void markObject(CState *state, CObj *obj); +void markValue(CState *state, CValue val); + +void markTable(CState *state, CTable *tbl) { + if (tbl->table == NULL) // table is still being initialized + return; + + for (int i = 0; i < tbl->capacity; i++) { + CTableEntry *entry = &tbl->table[i]; + markValue(state, entry->key); + markValue(state, entry->val); + } +} + +// free's white members from the table +void tableRemoveWhite(CState *state, CTable *tbl) { + if (tbl->table == NULL) // table is still being initialized + return; + + for (int i = 0; i < tbl->capacity; i++) { + CTableEntry *entry = &tbl->table[i]; + if (IS_OBJ(entry->key) && !(entry->key.val.obj)->isMarked) { // if the key is a object and it's white (unmarked), remove it from the table + cosmoT_remove(tbl, entry->key); + } + } +} + +void markArray(CState *state, CValueArray *array) { + for (int i = 0; i < array->count; i++) { + markValue(state, array->values[i]); + } +} + +// mark all references associated with the object +void blackenObject(CState *state, CObj *obj) { + switch (obj->type) { + case COBJ_STRING: + case COBJ_CFUNCTION: + // stubbed + break; + case COBJ_UPVALUE: { + markValue(state, ((CObjUpval*)obj)->closed); + + break; + } + case COBJ_FUNCTION: { + CObjFunction *func = (CObjFunction*)obj; + markObject(state, (CObj*)func->name); + markArray(state, &func->chunk.constants); + + break; + } + case COBJ_CLOSURE: { + CObjClosure *closure = (CObjClosure*)obj; + markObject(state, (CObj*)closure->function); + + // mark all upvalues + for (int i = 0; i < closure->upvalueCount; i++) { + markObject(state, (CObj*)closure->upvalues[i]); + } + + break; + } + default: + printf("Unknown type in blackenObject with %p, type %d\n", obj, obj->type); + break; + } +} + +void markObject(CState *state, CObj *obj) { + if (obj == NULL || obj->isMarked) // skip if NULL or already marked + return; + + obj->isMarked = true; + +#ifdef GC_DEBUG + printf("marking %p, [", obj); + printObject(obj); + printf("]\n"); +#endif + + // they don't need to be added to the gray stack, they don't reference any other CObjs + if (obj->type == COBJ_CFUNCTION || obj->type == COBJ_STRING) + return; + + // we don't use cosmoM_growarray because we don't want to trigger another GC event while in the GC! + if (state->grayCount >= state->grayCapacity || state->grayStack == NULL) { + int old = state->grayCapacity; + state->grayCapacity = old * GROW_FACTOR; + state->grayStack = (CObj**)realloc(state->grayStack, sizeof(CObj*) * state->grayCapacity); + + if (state->grayStack == NULL) { + CERROR("failed to allocate memory for grayStack!"); + exit(1); + } + } + + state->grayStack[state->grayCount++] = obj; +} + +void markValue(CState *state, CValue val) { + if (IS_OBJ(val)) + markObject(state, cosmoV_readObj(val)); +} + +// trace our gray references +void traceGrays(CState *state) { + while (state->grayCount > 0) { + CObj* obj = state->grayStack[--state->grayCount]; + blackenObject(state, obj); + } +} + +void sweep(CState *state) { + CObj *prev = NULL; + CObj *object = state->objects; + while (object != NULL) { + if (object->isMarked) { // skip over it + object->isMarked = false; // rest to white + prev = object; + object = object->next; + } else { // free it! + CObj *oldObj = object; + + object = object->next; + if (prev == NULL) { + state->objects = object; + } else { + prev->next = object; + } + + cosmoO_freeObject(state, oldObj); + } + } +} + +void markRoots(CState *state) { + // mark all values on the stack + for (StkPtr value = state->stack; value < state->top; value++) { + markValue(state, *value); + } + + // mark all active callframe closures + for (int i = 0; i < state->frameCount; i++) { + markObject(state, (CObj*)state->callFrame[i].closure); + } + + // mark all open upvalues + for (CObjUpval *upvalue = state->openUpvalues; upvalue != NULL; upvalue = upvalue->next) { + markObject(state, (CObj*)upvalue); + } + + markTable(state, &state->globals); + + traceGrays(state); +} + +COSMO_API void cosmoM_collectGarbage(CState *state) { +#ifdef GC_DEBUG + printf("-- GC start\n"); + size_t start = state->allocatedBytes; +#endif + + markRoots(state); + + tableRemoveWhite(state, &state->strings); // make sure we aren't referencing any strings that are about to be free'd + // now finally, free all the unmarked objects + sweep(state); + + // set our next GC event + state->nextGC = state->allocatedBytes * HEAP_GROW_FACTOR; + +#ifdef GC_DEBUG + printf("-- GC end, reclaimed %ld bytes (started at %ld, ended at %ld), next garbage collection scheduled at %ld bytes\n", + start - state->allocatedBytes, start, state->allocatedBytes, state->nextGC); +#endif +} \ No newline at end of file diff --git a/src/cmem.h b/src/cmem.h new file mode 100644 index 0000000..bb16ffc --- /dev/null +++ b/src/cmem.h @@ -0,0 +1,47 @@ +#ifndef CMEME_C +#define CMEME_C // meme lol + +#include "cosmo.h" + +#include "cstate.h" + +//#define GC_STRESS +//#define GC_DEBUG +// arrays will grow by a factor of 2 +#define GROW_FACTOR 2 +#define HEAP_GROW_FACTOR 2 +#define ARRAY_START 8 + +#define cosmoM_freearray(state, type, buf, capacity) \ + cosmoM_reallocate(state, buf, sizeof(type) *capacity, 0) + +#define cosmoM_growarray(state, type, buf, count, capacity) \ + if (count >= capacity || buf == NULL) { \ + int old = capacity; \ + capacity = old *GROW_FACTOR; \ + buf = (type*)cosmoM_reallocate(state, buf, sizeof(type) *old, sizeof(type) *capacity); \ + } + +#define cosmoM_free(state, type, x) \ + cosmoM_reallocate(state, x, sizeof(type), 0) + +#define cosmoM_isFrozen(state) \ + state->freezeGC > 0 + +#define cosmoM_freezeGC(state) \ + state->freezeGC++ + +#define cosmoM_unfreezeGC(state) \ + state->freezeGC-- + +COSMO_API void *cosmoM_reallocate(CState* state, void *buf, size_t oldSize, size_t newSize); +COSMO_API void cosmoM_collectGarbage(CState* state); + +/* + wrapper for cosmoM_reallocate so we can track our memory usage (it's also safer :P) +*/ +static inline void *cosmoM_xmalloc(CState *state, size_t sz) { + return cosmoM_reallocate(state, NULL, 0, sz); +} + +#endif \ No newline at end of file diff --git a/src/cobj.c b/src/cobj.c new file mode 100644 index 0000000..4c99c9b --- /dev/null +++ b/src/cobj.c @@ -0,0 +1,209 @@ +#include "cstate.h" +#include "ctable.h" +#include "cobj.h" +#include "cmem.h" + +#include + +// we don't actually hash the whole string :eyes: +uint32_t hashString(const char *str, size_t sz) { + uint32_t hash = sz; + size_t step = (sz>>5)+1; + + for (int i = sz; i >= step; i-=step) + hash = ((hash << 5) + (hash>>2)) + str[i-1]; + + return hash; +} + +CObj *cosmoO_allocateObject(CState *state, size_t sz, CObjType type) { + CObj* obj = (CObj*)cosmoM_xmalloc(state, sz); + obj->type = type; + obj->isMarked = false; + + obj->next = state->objects; + state->objects = obj; + return obj; +} + +void cosmoO_freeObject(CState *state, CObj* obj) { +#ifdef GC_DEBUG + printf("freeing %p [", obj); + printObject(obj); + printf("]\n"); +#endif + switch(obj->type) { + case COBJ_STRING: { + CObjString *objStr = (CObjString*)obj; + cosmoM_freearray(state, char, objStr->str, objStr->length); + cosmoM_free(state, CObjString, objStr); + break; + } + case COBJ_UPVALUE: { + cosmoM_free(state, CObjUpval, obj); + break; + } + case COBJ_FUNCTION: { + CObjFunction *objFunc = (CObjFunction*)obj; + cleanChunk(state, &objFunc->chunk); + cosmoM_free(state, CObjFunction, objFunc); + break; + } + case COBJ_CFUNCTION: { + cosmoM_free(state, CObjCFunction, obj); + break; + } + case COBJ_CLOSURE: { + CObjClosure* closure = (CObjClosure*)obj; + cosmoM_freearray(state, CObjUpval*, closure->upvalues, closure->upvalueCount); + cosmoM_free(state, CObjClosure, closure); + break; + } + } +} + +bool cosmoO_equalObject(CObj* obj1, CObj* obj2) { + if (obj1->type != obj2->type) + return false; + + switch (obj1->type) { + case COBJ_STRING: + return obj1 == obj2; // compare pointers because we already intern all strings :) + default: + return false; // they're some unknown type, probably malformed :( + } +} + +CObjFunction *cosmoO_newFunction(CState *state) { + CObjFunction *func = (CObjFunction*)cosmoO_allocateObject(state, sizeof(CObjFunction), COBJ_FUNCTION); + func->args = 0; + func->upvals = 0; + func->name = NULL; + + initChunk(state, &func->chunk, ARRAY_START); + return func; +} + +CObjCFunction *cosmoO_newCFunction(CState *state, CosmoCFunction func) { + CObjCFunction *cfunc = (CObjCFunction*)cosmoO_allocateObject(state, sizeof(CObjCFunction), COBJ_CFUNCTION); + cfunc->cfunc = func; + return cfunc; +} + +CObjClosure *cosmoO_newClosure(CState *state, CObjFunction *func) { + // intialize array of pointers + CObjUpval **upvalues = cosmoM_xmalloc(state, sizeof(CObjUpval*) * func->upvals); + + for (int i = 0; i < func->upvals; i++) { + upvalues[i] = NULL; + } + + CObjClosure *closure = (CObjClosure*)cosmoO_allocateObject(state, sizeof(CObjClosure), COBJ_CLOSURE); + closure->function = func; + closure->upvalues = upvalues; + closure->upvalueCount = func->upvals; + + return closure; +} + +CObjUpval *cosmoO_newUpvalue(CState *state, CValue *val) { + CObjUpval *upval = (CObjUpval*)cosmoO_allocateObject(state, sizeof(CObjUpval), COBJ_UPVALUE); + upval->val = val; + upval->closed = cosmoV_newNil(); + upval->next = NULL; + + return upval; +} + +CObjString *cosmoO_copyString(CState *state, const char *str, size_t sz) { + uint32_t hash = hashString(str, sz); + CObjString *lookup = cosmoT_lookupString(&state->strings, str, sz, hash); + + // have we already interned this string? + if (lookup != NULL) + return lookup; + + char *buf = cosmoM_xmalloc(state, sizeof(char) * (sz + 1)); // +1 for null terminator + memcpy(buf, str, sz); // copy string to heap + buf[sz] = '\0'; // don't forget our null terminator + + return cosmoO_allocateString(state, buf, sz, hash); +} + +CObjString *cosmoO_takeString(CState *state, char *str, size_t sz) { + uint32_t hash = hashString(str, sz); + + CObjString *lookup = cosmoT_lookupString(&state->strings, str, sz, hash); + + // have we already interned this string? + if (lookup != NULL) { + cosmoM_freearray(state, char, str, sz); // free our passed character array, it's unneeded! + return lookup; + } + + return cosmoO_allocateString(state, str, sz, hash); +} + +CObjString *cosmoO_allocateString(CState *state, const char *str, size_t sz, uint32_t hash) { + CObjString *strObj = (CObjString*)cosmoO_allocateObject(state, sizeof(CObjString), COBJ_STRING); + strObj->str = (char*)str; + strObj->length = sz; + strObj->hash = hash; + + // we push & pop the string so our GC can find it (we don't use freezeGC/unfreezeGC because we *want* a GC event to happen) + cosmoV_pushValue(state, cosmoV_newObj(strObj)); + cosmoT_insert(state, &state->strings, cosmoV_newObj((CObj*)strObj)); + cosmoV_pop(state); + + return strObj; +} + +CObjString *cosmoO_toString(CState *state, CObj *val) { + switch (val->type) { + case COBJ_STRING: { + return (CObjString*)val; + } + case COBJ_FUNCTION: { + CObjFunction *func = (CObjFunction*)val; + return func->name != NULL ? func->name : cosmoO_copyString(state, UNNAMEDCHUNK, strlen(UNNAMEDCHUNK)); + } + default: + return cosmoO_copyString(state, "", 6); + } +} + +void printObject(CObj *o) { + switch (o->type) { + case COBJ_STRING: { + CObjString *objStr = (CObjString*)o; + printf("\"%.*s\"", objStr->length, objStr->str); + break; + } + case COBJ_UPVALUE: { + CObjUpval *upval = (CObjUpval*)o; + printf(" -> ", upval->val); + printValue(*upval->val); + break; + } + case COBJ_FUNCTION: { + CObjFunction *objFunc = (CObjFunction*)o; + if (objFunc->name != NULL) + printf(" %.*s", objFunc->name->length, objFunc->name->str); + else + printf(" _main"); + break; + } + case COBJ_CFUNCTION: { + CObjCFunction *objCFunc = (CObjCFunction*)o; + printf(" %p", objCFunc->cfunc); + break; + } + case COBJ_CLOSURE: { + CObjClosure *closure = (CObjClosure*)o; + printObject((CObj*)closure->function); // just print the function + break; + } + default: + printf(""); + } +} \ No newline at end of file diff --git a/src/cobj.h b/src/cobj.h new file mode 100644 index 0000000..daba240 --- /dev/null +++ b/src/cobj.h @@ -0,0 +1,98 @@ +#ifndef COBJ_H +#define COBJ_H + +#include "cosmo.h" +#include "cchunk.h" +#include "cvalue.h" + +typedef struct CState CState; + +typedef enum { + COBJ_STRING, + COBJ_UPVALUE, + COBJ_FUNCTION, + COBJ_CFUNCTION, + COBJ_CLOSURE +} CObjType; + +#define CommonHeader CObj obj; + +typedef int (*CosmoCFunction)(CState *state, int argCount, CValue *args); + +typedef struct CObj { + CObjType type; + bool isMarked; // for the GC + struct CObj *next; +} CObj; + +typedef struct CObjString { + CommonHeader; // "is a" CObj + int length; + char *str; + uint32_t hash; // for hashtable lookup +} CObjString; + +typedef struct CObjUpval { + CommonHeader; // "is a" CObj + CValue *val; + CValue closed; + struct CObjUpval *next; +} CObjUpval; + +typedef struct CObjFunction { + CommonHeader; // "is a" CObj + CChunk chunk; + int args; + int upvals; + CObjString *name; +} CObjFunction; + +typedef struct CObjCFunction { + CommonHeader; // "is a" CObj + CosmoCFunction cfunc; +} CObjCFunction; + +typedef struct CObjClosure { + CommonHeader; + CObjFunction *function; + CObjUpval **upvalues; + int upvalueCount; +} CObjClosure; + +#define IS_STRING(x) isObjType(x, COBJ_STRING) +#define IS_FUNCTION(x) isObjType(x, COBJ_FUNCTION) +#define IS_CFUNCTION(x) isObjType(x, COBJ_CFUNCTION) +#define IS_CLOSURE(x) isObjType(x, COBJ_CLOSURE) + +#define cosmoV_readString(x) ((CObjString*)cosmoV_readObj(x)) +#define cosmoV_readFunction(x) ((CObjFunction*)cosmoV_readObj(x)) +#define cosmoV_readCFunction(x) (((CObjCFunction*)cosmoV_readObj(x))->cfunc) +#define cosmoV_readClosure(x) ((CObjClosure*)cosmoV_readObj(x)) + +static inline bool isObjType(CValue val, CObjType type) { + return IS_OBJ(val) && cosmoV_readObj(val)->type == type; +} + +CObj *cosmoO_allocateObject(CState *state, size_t sz, CObjType type); +void cosmoO_freeObject(CState *state, CObj* obj); + +bool cosmoO_equalObject(CObj* obj1, CObj* obj2); + +CObjFunction *cosmoO_newFunction(CState *state); +CObjCFunction *cosmoO_newCFunction(CState *state, CosmoCFunction func); +CObjClosure *cosmoO_newClosure(CState *state, CObjFunction *func); +CObjString *cosmoO_toString(CState *state, CObj *val); +CObjUpval *cosmoO_newUpvalue(CState *state, CValue *val); + +// copies the *str buffer to the heap and returns a CObjString struct which is also on the heap +CObjString *cosmoO_copyString(CState *state, const char *str, size_t sz); +// pass an already allocated str buffer! +CObjString *cosmoO_takeString(CState *state, char *str, size_t sz); +// allocates a CObjStruct pointing directly to *str +CObjString *cosmoO_allocateString(CState *state, const char *str, size_t sz, uint32_t hash); + +COSMO_API void printObject(CObj *o); + +#define cosmoO_readCString(x) ((CObjString*)x)->str + +#endif \ No newline at end of file diff --git a/src/coperators.c b/src/coperators.c new file mode 100644 index 0000000..9a7ec46 --- /dev/null +++ b/src/coperators.c @@ -0,0 +1 @@ +#include "coperators.h" \ No newline at end of file diff --git a/src/coperators.h b/src/coperators.h new file mode 100644 index 0000000..4661f1f --- /dev/null +++ b/src/coperators.h @@ -0,0 +1,59 @@ +#ifndef COPERATORS_H +#define COPERATORS_H + +#include "cosmo.h" + +// instruction types +typedef enum { + I_O, // just the operand (uint8_t) + I_OBYTE, // operand (uint8_t) + uint8_t + I_OSHORT, // operand (uint8_t) + uint16_t +} InstructionType; + +// instructions + +typedef enum { + // STACK MANIPULATION + OP_LOADCONST, + OP_SETGLOBAL, + OP_GETGLOBAL, + OP_SETLOCAL, + OP_GETLOCAL, + OP_GETUPVAL, + OP_SETUPVAL, + OP_PEJMP, // pops, if false jumps uint16_t + OP_EJMP, // if peek(0) is falsey jumps uint16_t + OP_JMP, // always jumps uint16_t + OP_JMPBACK, // jumps -uint16_t + OP_POP, // - pops[uint8_t] from stack + OP_CALL, // calls top[-uint8_t] + OP_CLOSURE, + OP_CLOSE, + + // ARITHMETIC + OP_ADD, + OP_SUB, + OP_MULT, + OP_DIV, + OP_NOT, + OP_NEGATE, + OP_CONCAT, // concats uint8_t vars on the stack + + // EQUALITY + OP_EQUAL, + OP_LESS, + OP_GREATER, + OP_LESS_EQUAL, + OP_GREATER_EQUAL, + + // LITERALS + OP_TRUE, + OP_FALSE, + OP_NIL, + + OP_RETURN, + + OP_NONE // used as an error result +} COPCODE; + +#endif \ No newline at end of file diff --git a/src/cosmo.h b/src/cosmo.h new file mode 100644 index 0000000..738685d --- /dev/null +++ b/src/cosmo.h @@ -0,0 +1,35 @@ +#ifndef COSMOMAIN_H +#define COSMOMAIN_H + +#include +#include +#include +#include +#include + +// forward declare *most* stuff so our headers are cleaner +typedef struct CState CState; +typedef struct CChunk CChunk; +typedef struct CValue CValue; + +// objs +typedef struct CObj CObj; +typedef struct CObjString CObjString; +typedef struct CObjUpval CObjUpval; +typedef struct CObjFunction CObjFunction; +typedef struct CObjCFunction CObjCFunction; +typedef struct CObjClosure CObjClosure; + +typedef uint8_t INSTRUCTION; + +#define COSMOMAX_UPVALS 80 +#define FRAME_MAX 64 +#define STACK_MAX (256 * FRAME_MAX) + +#define COSMO_API extern +#define UNNAMEDCHUNK "_main" + +#define CERROR(err) \ + printf("%s : %s\n", "[ERROR]", err) + +#endif \ No newline at end of file diff --git a/src/cparse.c b/src/cparse.c new file mode 100644 index 0000000..52b8459 --- /dev/null +++ b/src/cparse.c @@ -0,0 +1,1017 @@ +#include "cparse.h" +#include "cstate.h" +#include "clex.h" +#include "cchunk.h" +#include "cdebug.h" +#include "cmem.h" + +#include + +// we define all of this here because we only need it in this file, no need for it to be in the header /shrug + +typedef struct { + CLexState *lex; + CCompilerState* compiler; + CState *state; + CToken current; + CToken previous; // token right after the current token + bool hadError; + bool panic; +} CParseState; + +typedef enum { + PREC_NONE, + PREC_ASSIGNMENT, // = + PREC_CONCAT, // .. + PREC_OR, // or + PREC_AND, // and + PREC_EQUALITY, // == != + PREC_COMPARISON, // < > <= >= + PREC_TERM, // + - + PREC_FACTOR, // * / + PREC_UNARY, // ! - + PREC_CALL, // . () + PREC_PRIMARY // everything else +} Precedence; + +typedef void (*ParseFunc)(CParseState* pstate, bool canAssign); + +typedef struct { + ParseFunc prefix; + ParseFunc infix; + Precedence level; +} ParseRule; + +static void parsePrecedence(CParseState*, Precedence); +static void variable(CParseState *pstate, bool canAssign); +static void expression(CParseState*); +static void statement(CParseState *pstate); +static void declaration(CParseState *pstate); +static void function(CParseState *pstate, FunctionType type); +static void expressionStatement(CParseState *pstate); +static ParseRule* getRule(CTokenType type); +static CObjFunction *endCompiler(CParseState *pstate, int results); + +// ================================================================ [FRONT END/TALK TO LEXER] ================================================================ + +static void initCompilerState(CParseState* pstate, CCompilerState *ccstate, FunctionType type, CCompilerState *enclosing) { + pstate->compiler = ccstate; + + ccstate->enclosing = enclosing; + ccstate->function = NULL; + ccstate->localCount = 0; + ccstate->scopeDepth = 0; + ccstate->pushedValues = 0; + ccstate->savedPushed = 0; + ccstate->type = type; + ccstate->function = cosmoO_newFunction(pstate->state); + + + if (type != FTYPE_SCRIPT) + ccstate->function->name = cosmoO_copyString(pstate->state, pstate->previous.start, pstate->previous.length); + + // mark first local slot as used (this'll hold the CObjFunction of the current function) + Local *local = &ccstate->locals[ccstate->localCount++]; + local->depth = 0; + local->isCaptured = false; + local->name.length = 0; + local->name.start = ""; +} + +static void initParseState(CParseState *pstate, CCompilerState *ccstate, CState *s, const char *source) { + pstate->lex = cosmoL_newLexState(s, source); + + pstate->state = s; + pstate->hadError = false; + pstate->panic = false; + pstate->compiler = ccstate; + + initCompilerState(pstate, ccstate, FTYPE_SCRIPT, NULL); // enclosing starts as NULL +} + +static void freeParseState(CParseState *pstate) { + cosmoL_freeLexState(pstate->state, pstate->lex); +} + +static void errorAt(CParseState *pstate, CToken *token, const char * msg) { + if (pstate->hadError) + return; + + fprintf(stderr, "[line %d] Objection", token->line); + + if (token->type == TOKEN_EOF) { + fprintf(stderr, " at end"); + } else if (token->type == TOKEN_ERROR) { + + } else { + fprintf(stderr, " at '%.*s'", token->length, token->start); + } + + printf(": \n\t%s\n", msg); + pstate->hadError = true; + pstate->panic = true; +} + +static void errorAtCurrent(CParseState *pstate, const char *msg) { + errorAt(pstate, &pstate->current, msg); +} + +static void error(CParseState *pstate, const char *msg) { + errorAt(pstate, &pstate->previous, msg); +} + +static void advance(CParseState *pstate) { + pstate->previous = pstate->current; + pstate->current = cosmoL_scanToken(pstate->lex); + + //printf("got %d [%.*s]\n", pstate->current.type, pstate->current.length, pstate->current.start); + + if (pstate->current.type == TOKEN_ERROR) { + // go ahead and consume the rest of the errors so it doesn't cascade + CToken temp; + do { + temp = cosmoL_scanToken(pstate->lex); + } while(temp.type == TOKEN_ERROR); + } +} + +static bool check(CParseState *pstate, CTokenType type) { + return pstate->current.type == type; +} + +// consumes the next token if it matches type, otherwise errors +static void consume(CParseState* pstate, CTokenType type, const char *msg) { + if (pstate->current.type == type) { // if token matches, consume the next token + advance(pstate); + return; + } + + errorAtCurrent(pstate, msg); +} + +static bool match(CParseState *pstate, CTokenType type) { + if (!check(pstate, type)) + return false; + + // if it matched, go ahead and consume the next token + advance(pstate); + return true; +} + +static bool identifiersEqual(CToken *idA, CToken *idB) { + return idA->length == idB->length && memcmp(idA->start, idB->start, idA->length) == 0; +} + +static void inline valuePushed(CParseState *pstate, int values) { + pstate->compiler->pushedValues += values; +} + +static void inline valuePopped(CParseState *pstate, int values) { + pstate->compiler->pushedValues -= values; +} + +// ================================================================ [WRITE TO CHUNK] ================================================================ + +CChunk* getChunk(CParseState *pstate) { + return &pstate->compiler->function->chunk; +} + +// safely adds constant to chunk, checking for overflow +uint16_t makeConstant(CParseState *pstate, CValue val) { + int indx = addConstant(pstate->state, getChunk(pstate), val); + if (indx > UINT16_MAX) { + error(pstate, "UInt overflow! Too many constants in one chunk!"); + return 0; + } + + return (uint16_t)indx; +} + +void writeu8(CParseState *pstate, INSTRUCTION i) { + writeu8Chunk(pstate->state, getChunk(pstate), i, pstate->previous.line); +} + +void writeu16(CParseState *pstate, uint16_t i) { + writeu16Chunk(pstate->state, getChunk(pstate), i, pstate->previous.line); +} + +void writeConstant(CParseState *pstate, CValue val) { + writeu8(pstate, OP_LOADCONST); + writeu16(pstate, makeConstant(pstate, val)); + + valuePushed(pstate, 1); +} + +int writeJmp(CParseState *pstate, INSTRUCTION i) { + writeu8(pstate, i); + writeu16(pstate, 0xFFFF); + + return getChunk(pstate)->count - 2; +} + +void writePop(CParseState *pstate, int times) { + writeu8(pstate, OP_POP); + writeu8(pstate, times); +} + +void writeJmpBack(CParseState *pstate, int location) { + int jmp = (getChunk(pstate)->count - location) + 3; + + if (jmp > UINT16_MAX) + error(pstate, "UInt overflow! Too much code to jump!"); + + writeu8(pstate, OP_JMPBACK); + writeu16(pstate, jmp); +} + +// patches offset operand at location +void patchJmp(CParseState *pstate, int index) { + unsigned int jump = getChunk(pstate)->count - index - 2; + + if (jump > UINT16_MAX) + error(pstate, "UInt overflow! Too much code to jump!"); + + memcpy(&getChunk(pstate)->buf[index], &jump, sizeof(uint16_t)); +} + +static uint16_t identifierConstant(CParseState *pstate, CToken *name) { + return makeConstant(pstate, cosmoV_newObj((CObj*)cosmoO_copyString(pstate->state, name->start, name->length))); +} + +static void addLocal(CParseState *pstate, CToken name) { + if (pstate->compiler->localCount > UINT8_MAX) + return error(pstate, "UInt overflow! Too many locals in scope!"); + + Local *local = &pstate->compiler->locals[pstate->compiler->localCount++]; + local->name = name; + local->depth = -1; + local->isCaptured = false; +} + +static int addUpvalue(CCompilerState *ccstate, uint8_t indx, bool isLocal) { + int upvals = ccstate->function->upvals; + + // check and make sure we haven't already captured it + for (int i = 0; i < upvals; i++) { + Upvalue *upval = &ccstate->upvalues[i]; + if (upval->index == indx && upval->isLocal == isLocal) // it matches! return that + return i; + } + + // TODO: throw error if upvals >= UINT8_MAX + + ccstate->upvalues[upvals].index = indx; + ccstate->upvalues[upvals].isLocal = isLocal; + return ccstate->function->upvals++; +} + +static int getLocal(CCompilerState *ccstate, CToken *name) { + for (int i = ccstate->localCount - 1; i >= 0; i--) { + Local *local = &ccstate->locals[i]; + if (local->depth != -1 && identifiersEqual(name, &local->name)) { // if the identifer is initalized and it matches, use it! + return i; + } + } + + // it wasn't found + return -1; +} + +static int getUpvalue(CCompilerState *ccstate, CToken *name) { + if (ccstate->enclosing == NULL) // there's no upvalues to lookup! + return -1; + + int local = getLocal(ccstate->enclosing, name); + if (local != -1) { + ccstate->enclosing->locals[local].isCaptured = true; + return addUpvalue(ccstate, local, true); + } + + int upval = getUpvalue(ccstate->enclosing, name); + if (upval != -1) + return addUpvalue(ccstate, upval, false); + + return -1; // failed! +} + +static void markInitialized(CParseState *pstate, int local) { + pstate->compiler->locals[local].depth = pstate->compiler->scopeDepth; +} + +static int parseArguments(CParseState *pstate) { + int args = 0; + + // there are args to parse! + if (!check(pstate, TOKEN_RIGHT_PAREN)) { + do { + expression(pstate); + args++; + } while(match(pstate, TOKEN_COMMA)); + } + consume(pstate, TOKEN_RIGHT_PAREN, "Expected ')' to end call."); + + // sanity check + if (args > UINT8_MAX) { + errorAtCurrent(pstate, "Too many arguments passed in call."); + } + return args; +} + +// recovers stack (pops unneeded values, reports missing values) +static void alignStack(CParseState *pstate, int alignment) { + // realign the stack + if (pstate->compiler->pushedValues > alignment) { + writePop(pstate, pstate->compiler->pushedValues - alignment); + } else if (pstate->compiler->pushedValues < alignment) { + error(pstate, "Missing expression!"); + } + + pstate->compiler->pushedValues = alignment; +} + +// ================================================================ [PRATT'S PARSER] ================================================================ + +static void number(CParseState *pstate, bool canAssign) { + cosmo_Number num = strtod(pstate->previous.start, NULL); + writeConstant(pstate, cosmoV_newNumber(num)); +} + +static void string(CParseState *pstate, bool canAssign) { + CObjString *strObj = cosmoO_copyString(pstate->state, pstate->previous.start + 1, pstate->previous.length - 2); + writeConstant(pstate, cosmoV_newObj((CObj*)strObj)); +} + +static void literal(CParseState *pstate, bool canAssign) { + switch (pstate->previous.type) { + case TOKEN_TRUE: writeu8(pstate, OP_TRUE); break; + case TOKEN_FALSE: writeu8(pstate, OP_FALSE); break; + case TOKEN_NIL: writeu8(pstate, OP_NIL); break; + default: + break; + } + + valuePushed(pstate, 1); +} + +// parses prefix operators +static void unary(CParseState *pstate, bool canAssign) { + CTokenType type = pstate->previous.type; + int cachedLine = pstate->previous.line; // eval'ing the next expression might change the line number + + // only eval the next *value* + parsePrecedence(pstate, PREC_UNARY); + + switch(type) { + case TOKEN_MINUS: writeu8Chunk(pstate->state, getChunk(pstate), OP_NEGATE, cachedLine); break; + case TOKEN_BANG: writeu8Chunk(pstate->state, getChunk(pstate), OP_NOT, cachedLine); break; + default: + error(pstate, "Unexpected unary operator!"); + } +} + +// parses infix operators +static void binary(CParseState *pstate, bool canAssign) { + CTokenType type = pstate->previous.type; // already consumed + int cachedLine = pstate->previous.line; // eval'ing the next expression might change the line number + + parsePrecedence(pstate, getRule(type)->level + 1); + + switch (type) { + // ARITH + case TOKEN_PLUS: writeu8Chunk(pstate->state, getChunk(pstate), OP_ADD, cachedLine); break; + case TOKEN_MINUS: writeu8Chunk(pstate->state, getChunk(pstate), OP_SUB, cachedLine); break; + case TOKEN_STAR: writeu8Chunk(pstate->state, getChunk(pstate), OP_MULT, cachedLine); break; + case TOKEN_SLASH: writeu8Chunk(pstate->state, getChunk(pstate), OP_DIV, cachedLine); break; + // EQUALITY + case TOKEN_EQUAL_EQUAL: writeu8Chunk(pstate->state, getChunk(pstate), OP_EQUAL, cachedLine); break; + case TOKEN_GREATER: writeu8Chunk(pstate->state, getChunk(pstate), OP_GREATER, cachedLine); break; + case TOKEN_LESS: writeu8Chunk(pstate->state, getChunk(pstate), OP_LESS, cachedLine); break; + case TOKEN_GREATER_EQUAL: writeu8Chunk(pstate->state, getChunk(pstate), OP_GREATER_EQUAL, cachedLine); break; + case TOKEN_LESS_EQUAL: writeu8Chunk(pstate->state, getChunk(pstate), OP_LESS_EQUAL, cachedLine); break; + case TOKEN_BANG_EQUAL: writeu8Chunk(pstate->state, getChunk(pstate), OP_EQUAL, cachedLine); writeu8Chunk(pstate->state, getChunk(pstate), OP_NOT, cachedLine); break; + default: + error(pstate, "Unexpected operator!"); + } + + valuePopped(pstate, 1); // we pop 2 values off the stack and push 1 for a net pop of 1 value +} + +static void group(CParseState *pstate, bool canAssign) { + expression(pstate); + consume(pstate, TOKEN_RIGHT_PAREN, "Expected ')'"); +} + +static void _etterOP(CParseState *pstate, uint8_t op, int arg) { + writeu8(pstate, op); + if (op == OP_GETGLOBAL || op == OP_SETGLOBAL) // globals are stored with a u16 + writeu16(pstate, arg); + else + writeu8(pstate, arg); +} + +static void namedVariable(CParseState *pstate, CToken name, bool canAssign) { + uint8_t opGet, opSet; + int arg = getLocal(pstate->compiler, &name); + + if (arg != -1) { + // we found it in out local table! + opGet = OP_GETLOCAL; + opSet = OP_SETLOCAL; + } else if ((arg = getUpvalue(pstate->compiler, &name)) != -1) { + opGet = OP_GETUPVAL; + opSet = OP_SETUPVAL; + } else { + // local & upvalue wasnt' found, assume it's a global! + arg = identifierConstant(pstate, &name); + opGet = OP_GETGLOBAL; + opSet = OP_SETGLOBAL; + } + + if (canAssign && match(pstate, TOKEN_EQUAL)) { + // setter + expression(pstate); + _etterOP(pstate, opSet, arg); + valuePopped(pstate, 1); + } else { + // getter + _etterOP(pstate, opGet, arg); + valuePushed(pstate, 1); + } +} + +static void and_(CParseState *pstate, bool canAssign) { + int jump = writeJmp(pstate, OP_EJMP); // conditional jump without popping + + writePop(pstate, 1); + parsePrecedence(pstate, PREC_AND); + + patchJmp(pstate, jump); +} + +static void or_(CParseState *pstate, bool canAssign) { + int elseJump = writeJmp(pstate, OP_EJMP); + int endJump = writeJmp(pstate, OP_JMP); + + patchJmp(pstate, elseJump); + writePop(pstate, 1); + + parsePrecedence(pstate, PREC_OR); + + patchJmp(pstate, endJump); +} + +static void anonFunction(CParseState *pstate, bool canAssign) { + function(pstate, FTYPE_FUNCTION); +} + +static void variable(CParseState *pstate, bool canAssign) { + namedVariable(pstate, pstate->previous, canAssign); +} + +static void concat(CParseState *pstate, bool canAssign) { + CTokenType type = pstate->previous.type; + + int vars = 1; // we already have something on the stack + do { + parsePrecedence(pstate, getRule(type)->level + 1); // parse until next concat + vars++; + } while (match(pstate, TOKEN_DOT_DOT)); + + writeu8(pstate, OP_CONCAT); + writeu8(pstate, vars); + + valuePopped(pstate, vars - 1); // - 1 because we're pushing the concat result +} + +static void call_(CParseState *pstate, bool canAssign) { + // we enter having already consumed the '(' + + // grab our arguments + uint8_t argCount = parseArguments(pstate); + valuePopped(pstate, argCount + 1); // all of these values will be popped off the stack when returned (+1 for the function) + writeu8(pstate, OP_CALL); + writeu8(pstate, argCount); + + // hacky hacky hacky hACKY GACJ HACK!!!!!!!! + if (pstate->compiler->pushedValues < pstate->compiler->savedPushed) { // there's empty spots on the stack waiting to be filled, lets make OP_CALL fill those spots for us + writeu8(pstate, pstate->compiler->savedPushed - pstate->compiler->pushedValues); // number of expected results + pstate->compiler->pushedValues = pstate->compiler->savedPushed; // either way the stack will be balanaced after this call + } else { + writeu8(pstate, 1); // we expect 1 result by default + valuePushed(pstate, 1); + } +} + +ParseRule ruleTable[] = { + [TOKEN_LEFT_PAREN] = {group, call_, PREC_CALL}, + [TOKEN_RIGHT_PAREN] = {NULL, NULL, PREC_NONE}, + [TOKEN_LEFT_BRACE] = {NULL, NULL, PREC_NONE}, + [TOKEN_RIGHT_BRACE] = {NULL, NULL, PREC_NONE}, + [TOKEN_COMMA] = {NULL, NULL, PREC_NONE}, + [TOKEN_DOT] = {NULL, NULL, PREC_NONE}, + [TOKEN_DOT_DOT] = {NULL, concat, PREC_CONCAT}, + [TOKEN_MINUS] = {unary, binary, PREC_TERM}, + [TOKEN_PLUS] = {NULL, binary, PREC_TERM}, + [TOKEN_SLASH] = {NULL, binary, PREC_FACTOR}, + [TOKEN_STAR] = {NULL, binary, PREC_FACTOR}, + [TOKEN_EOS] = {NULL, NULL, PREC_NONE}, + [TOKEN_BANG] = {unary, NULL, PREC_NONE}, + [TOKEN_BANG_EQUAL] = {NULL, binary, PREC_EQUALITY}, + [TOKEN_EQUAL] = {NULL, NULL, PREC_NONE}, + [TOKEN_EQUAL_EQUAL] = {NULL, binary, PREC_EQUALITY}, + [TOKEN_GREATER] = {NULL, binary, PREC_COMPARISON}, + [TOKEN_GREATER_EQUAL] = {NULL, binary, PREC_COMPARISON}, + [TOKEN_LESS] = {NULL, binary, PREC_COMPARISON}, + [TOKEN_LESS_EQUAL] = {NULL, binary, PREC_COMPARISON}, + [TOKEN_IDENTIFIER] = {variable, NULL, PREC_NONE}, + [TOKEN_STRING] = {string, NULL, PREC_NONE}, + [TOKEN_NUMBER] = {number, NULL, PREC_NONE}, + [TOKEN_NIL] = {literal, NULL, PREC_NONE}, + [TOKEN_TRUE] = {literal, NULL, PREC_NONE}, + [TOKEN_FALSE] = {literal, NULL, PREC_NONE}, + [TOKEN_AND] = {NULL, and_, PREC_AND}, + [TOKEN_DO] = {NULL, NULL, PREC_NONE}, + [TOKEN_ELSE] = {NULL, NULL, PREC_NONE}, + [TOKEN_ELSEIF] = {NULL, NULL, PREC_NONE}, + [TOKEN_END] = {NULL, NULL, PREC_NONE}, + [TOKEN_FOR] = {NULL, NULL, PREC_NONE}, + [TOKEN_FUNCTION] = {anonFunction, NULL, PREC_NONE}, + [TOKEN_IF] = {NULL, NULL, PREC_NONE}, + [TOKEN_LOCAL] = {NULL, NULL, PREC_NONE}, + [TOKEN_NOT] = {NULL, NULL, PREC_NONE}, + [TOKEN_OR] = {NULL, or_, PREC_OR}, + [TOKEN_RETURN] = {NULL, NULL, PREC_NONE}, + [TOKEN_THEN] = {NULL, NULL, PREC_NONE}, + [TOKEN_WHILE] = {NULL, NULL, PREC_NONE}, + [TOKEN_ERROR] = {NULL, NULL, PREC_NONE}, + [TOKEN_EOF] = {NULL, NULL, PREC_NONE} +}; + +static ParseRule* getRule(CTokenType type) { + return &ruleTable[type]; +} + +static void parsePrecedence(CParseState *pstate, Precedence prec) { + advance(pstate); + + ParseFunc prefix = getRule(pstate->previous.type)->prefix; + + if (prefix == NULL) { + return error(pstate, "Expected expression!"); + } + + bool canAssign = prec <= PREC_ASSIGNMENT; + prefix(pstate, canAssign); + + while (prec <= getRule(pstate->current.type)->level) { + ParseFunc infix = getRule(pstate->current.type)->infix; + advance(pstate); + infix(pstate, canAssign); + } + + if (canAssign && match(pstate, TOKEN_EQUAL)) { + error(pstate, "Invalid assignment!"); + } +} + +static void declareLocal(CParseState *pstate, bool forceLocal) { + if (pstate->compiler->scopeDepth == 0 && !forceLocal) + return; + + CToken* name = &pstate->previous; + + // check if we already have a local with that identifier + for (int i = 0; i < pstate->compiler->localCount; i++) { + Local *local = &pstate->compiler->locals[i]; + + // we've reached a previous scope or an invalid scope, stop checking lol + if (local->depth != -1 && pstate->compiler->scopeDepth > local->depth) + break; + + if (identifiersEqual(name, &local->name)) + error(pstate, "There's already a local in scope with this name!"); + } + + addLocal(pstate, *name); +} + +static uint16_t parseVariable(CParseState *pstate, const char* errorMessage, bool forceLocal) { + consume(pstate, TOKEN_IDENTIFIER, errorMessage); + + declareLocal(pstate, forceLocal); + if (pstate->compiler->scopeDepth > 0 || forceLocal) + return pstate->compiler->localCount - 1; + + return identifierConstant(pstate, &pstate->previous); +} + +static void defineVariable(CParseState *pstate, uint16_t global, bool forceLocal) { + if (pstate->hadError) + return; + + if (pstate->compiler->scopeDepth > 0 || forceLocal) { + markInitialized(pstate, global); + valuePopped(pstate, 1); // the local stays on the stack! + return; + } + + writeu8(pstate, OP_SETGLOBAL); + writeu16(pstate, global); + + valuePopped(pstate, 1); +} + +static void popLocals(CParseState *pstate, int toScope) { + if (pstate->hadError) + return; + + // count the locals in scope to pop + int localsToPop = 0; + + while (pstate->compiler->localCount > 0 && pstate->compiler->locals[pstate->compiler->localCount - 1].depth > toScope) { + Local *local = &pstate->compiler->locals[localsToPop]; + + if (local->isCaptured) { // local needs to be closed over so other closures can reference it + // first though, if there are other locals in queue to pop first, go ahead and pop those :) + if (localsToPop > 0) { + writePop(pstate, localsToPop); + localsToPop = 0; + } + + writeu8(pstate, OP_CLOSE); + } else { + localsToPop++; + } + + pstate->compiler->localCount--; + } + + if (localsToPop > 0) { + writePop(pstate, localsToPop); + } +} + +static void beginScope(CParseState *pstate) { + pstate->compiler->scopeDepth++; +} + +static void endScope(CParseState *pstate) { + pstate->compiler->scopeDepth--; + + popLocals(pstate, pstate->compiler->scopeDepth); +} + +// parses expressionStatements until a TOKEN_END is consumed +static void block(CParseState *pstate) { + while(!check(pstate, TOKEN_END) && !check(pstate, TOKEN_EOF)) { + declaration(pstate); + } + + consume(pstate, TOKEN_END, "'end' expected to end block.'"); +} + +static void varDeclaration(CParseState *pstate, bool forceLocal) { + uint16_t global = parseVariable(pstate, "Expected identifer!", forceLocal); + + if (match(pstate, TOKEN_EQUAL)) { // assigning a variable + valuePopped(pstate, 1); // we are expecting a value + + // consume all the ',' + do { + expression(pstate); + } while (match(pstate, TOKEN_COMMA)); + + if (pstate->compiler->pushedValues < pstate->compiler->savedPushed) { + writeu8(pstate, OP_NIL); // didn't get expected result + valuePushed(pstate, 1); + } + + valuePushed(pstate, 1); + } else if (match(pstate, TOKEN_COMMA)) { + valuePopped(pstate, 1); // we are expecting a value + varDeclaration(pstate, forceLocal); + + if (pstate->compiler->pushedValues < pstate->compiler->savedPushed) { + writeu8(pstate, OP_NIL); // didn't get expected result + valuePushed(pstate, 1); + } + + valuePushed(pstate, 1); // we already popped, & when we call defineVariable it'll pop, so go ahead and fix it here + } else { + writeu8(pstate, OP_NIL); + valuePushed(pstate, 1); + } + + defineVariable(pstate, global, forceLocal); +} + +static void ifStatement(CParseState *pstate) { + expression(pstate); + consume(pstate, TOKEN_THEN, "Expect 'then' after expression."); + + int jump = writeJmp(pstate, OP_PEJMP); + valuePopped(pstate, 1); // OP_PEJMP pops the conditional! + + // parse until 'end' or 'else' + beginScope(pstate); + + while(!check(pstate, TOKEN_END) && !check(pstate, TOKEN_ELSE) && !check(pstate, TOKEN_ELSEIF) && !check(pstate, TOKEN_EOF)) { + declaration(pstate); + } + + endScope(pstate); + + if (match(pstate, TOKEN_ELSE)) { + int elseJump = writeJmp(pstate, OP_JMP); + + // setup our jump + patchJmp(pstate, jump); + + // parse until 'end' + beginScope(pstate); + block(pstate); + endScope(pstate); + + patchJmp(pstate, elseJump); + } else if (match(pstate, TOKEN_ELSEIF)) { + int elseJump = writeJmp(pstate, OP_JMP); + + // setup our jump + patchJmp(pstate, jump); + + ifStatement(pstate); // recursively call into ifStatement + patchJmp(pstate, elseJump); + } else { // the most vanilla if statement possible (no else, no elseif) + patchJmp(pstate, jump); + consume(pstate, TOKEN_END, "'end' expected to end block."); + } +} + +static void whileStatement(CParseState *pstate) { + int jumpLocation = getChunk(pstate)->count; + expression(pstate); + + consume(pstate, TOKEN_DO, "expected 'do' after conditional expression."); + + int exitJump = writeJmp(pstate, OP_PEJMP); // pop equality jump + valuePopped(pstate, 1); // OP_PEJMP pops the conditional! + + beginScope(pstate); + block(pstate); // parse until 'end' + endScope(pstate); + + writeJmpBack(pstate, jumpLocation); + patchJmp(pstate, exitJump); +} + +static void function(CParseState *pstate, FunctionType type) { + CCompilerState compiler; + initCompilerState(pstate, &compiler, type, pstate->compiler); + + int savedPushed = pstate->compiler->pushedValues; + // start parsing function + beginScope(pstate); + + // parse the parameters + consume(pstate, TOKEN_LEFT_PAREN, "Expected '(' after identifier."); + if (!check(pstate, TOKEN_RIGHT_PAREN)) { + do { + // add arg to function + compiler.function->args++; + if (compiler.function->args > UINT16_MAX - 1) { // -1 since the function would already be on the stack + errorAtCurrent(pstate, "Too many parameters!"); + } + + // parse identifier for param (force them to be a local) + uint8_t funcIdent = parseVariable(pstate, "Expected identifier for function!", true); + defineVariable(pstate, funcIdent, true); + valuePushed(pstate, 1); // they *will* be populated during runtime + } while (match(pstate, TOKEN_COMMA)); + } + consume(pstate, TOKEN_RIGHT_PAREN, "Expected ')' after parameters."); + + // compile function block + block(pstate); + alignStack(pstate, savedPushed); + endScope(pstate); + + CObjFunction *objFunc = endCompiler(pstate, 0); + + // push closure + writeu8(pstate, OP_CLOSURE); + writeu16(pstate, makeConstant(pstate, cosmoV_newObj(objFunc))); + valuePushed(pstate, 1); + + // tell the vm what locals/upvalues to pass to this closure + for (int i = 0; i < objFunc->upvals; i++) { + writeu8(pstate, compiler.upvalues[i].isLocal ? OP_GETLOCAL : OP_GETUPVAL); + writeu8(pstate, compiler.upvalues[i].index); + } +} + +static void functionDeclaration(CParseState *pstate) { + uint8_t var = parseVariable(pstate, "Expected identifer!", false); + + if (pstate->compiler->scopeDepth > 0) + markInitialized(pstate, var); + + function(pstate, FTYPE_FUNCTION); + + defineVariable(pstate, var, false); +} + +static void returnStatement(CParseState *pstate) { + if (pstate->compiler->type != FTYPE_FUNCTION) { + error(pstate, "Expected 'return' in function!"); + return; + } + + // can return multiple results + int results = 0; + if (!check(pstate, TOKEN_EOS)) { // make sure its not an end of a statement + do { + expression(pstate); + results++; + } while (match(pstate, TOKEN_COMMA)); + + if (results > UINT8_MAX) { + error(pstate, "Too many results returned!"); + return; + } + } + + writeu8(pstate, OP_RETURN); + writeu8(pstate, results); + valuePopped(pstate, results); +} + +static void localFunction(CParseState *pstate) { + uint8_t var = parseVariable(pstate, "Expected identifer!", true); + markInitialized(pstate, var); + + function(pstate, FTYPE_FUNCTION); + + defineVariable(pstate, var, true); +} + +static void forLoop(CParseState *pstate) { + beginScope(pstate); + + consume(pstate, TOKEN_LEFT_PAREN, "Expected '(' after 'for'"); + + // parse initalizer + if (!match(pstate, TOKEN_EOS)) { + expressionStatement(pstate); + } + + int loopStart = getChunk(pstate)->count; + + // parse conditional + int exitJmp = -1; + if (!match(pstate, TOKEN_EOS)) { + expression(pstate); + consume(pstate, TOKEN_EOS, "Expected ';' after conditional"); + + exitJmp = writeJmp(pstate, OP_PEJMP); + valuePopped(pstate, 1); + } + + // parse iterator + if (!match(pstate, TOKEN_RIGHT_PAREN)) { + int bodyJmp = writeJmp(pstate, OP_JMP); + + int iteratorStart = getChunk(pstate)->count; + expression(pstate); + consume(pstate, TOKEN_RIGHT_PAREN, "Expected ')' after iterator"); + + writeJmpBack(pstate, loopStart); + loopStart = iteratorStart; + patchJmp(pstate, bodyJmp); + } + + consume(pstate, TOKEN_DO, "Expected 'do'"); + + block(pstate); // parses until 'end' + + writeJmpBack(pstate, loopStart); + + if (exitJmp != -1) { + patchJmp(pstate, exitJmp); + } + + endScope(pstate); +} + +static void synchronize(CParseState *pstate) { + pstate->panic = false; + + while (pstate->current.type != TOKEN_EOF) { + if (pstate->previous.type == TOKEN_EOS) + return; + + advance(pstate); + } +} + +static void expression(CParseState *pstate) { + parsePrecedence(pstate, PREC_ASSIGNMENT); +} + +static void expressionStatement(CParseState *pstate) { + pstate->compiler->savedPushed = pstate->compiler->pushedValues; + + if (match(pstate, TOKEN_VAR)) { + varDeclaration(pstate, false); + } else if (match(pstate, TOKEN_LOCAL)) { + // force declare a local + if (match(pstate, TOKEN_FUNCTION)) + localFunction(pstate); // force local a function + else + varDeclaration(pstate, true); // force local a variable + } else if (match(pstate, TOKEN_IF)) { + ifStatement(pstate); + } else if (match(pstate, TOKEN_DO)) { + beginScope(pstate); + block(pstate); + endScope(pstate); + } else if (match(pstate, TOKEN_WHILE)) { + whileStatement(pstate); + } else if (match(pstate, TOKEN_FOR)) { + forLoop(pstate); + } else if (match(pstate, TOKEN_FUNCTION)) { + functionDeclaration(pstate); + } else if (match(pstate, TOKEN_RETURN)) { + returnStatement(pstate); + } else if (check(pstate, TOKEN_EOS)) { + // do nothing, just consume it + } else { + expression(pstate); + } + consume(pstate, TOKEN_EOS, "Expected end of statement after expression."); + + // realign the stack + alignStack(pstate, pstate->compiler->savedPushed); +} + +static void statement(CParseState *pstate) { + expressionStatement(pstate); +} + +static void declaration(CParseState *pstate) { + statement(pstate); + + // if we paniced, skip the whole statement! + if (pstate->panic) + synchronize(pstate); +} + +static CObjFunction *endCompiler(CParseState *pstate, int results) { + popLocals(pstate, pstate->compiler->scopeDepth); // remove the locals from other scopes + writeu8(pstate, OP_RETURN); + writeu8(pstate, results); + + // update pstate to next compiler state + CCompilerState *cachedCCState = pstate->compiler; + pstate->compiler = cachedCCState->enclosing; + + return cachedCCState->function; +} + +// ================================================================ [API] ================================================================ + +CObjFunction* cosmoP_compileString(CState *state, const char *source) { + CParseState parser; + CCompilerState compiler; + cosmoM_freezeGC(state); // ignore all GC events while compiling + initParseState(&parser, &compiler, state, source); + + advance(&parser); + + while (!match(&parser, TOKEN_EOF)) { + declaration(&parser); + } + + consume(&parser, TOKEN_EOF, "End of file expected!"); + + popLocals(&parser, -1); // needed to close over the values + + if (parser.hadError) { // free the function too + cosmoO_freeObject(state, (CObj*)parser.compiler->function); + endCompiler(&parser, 0); + freeParseState(&parser); + + // the VM still expects a result on the stack TODO: push the error string to the stack + cosmoV_pushValue(state, cosmoV_newNil()); + cosmoM_unfreezeGC(state); + return NULL; + } + + CObjFunction* resFunc = compiler.function; + // VM expects the closure on the stack :P (we do this before ending the compiler so our GC doesn't free it) + cosmoV_pushValue(state, cosmoV_newObj((CObj*)cosmoO_newClosure(state, resFunc))); + + endCompiler(&parser, 0); + freeParseState(&parser); + cosmoM_unfreezeGC(state); + return resFunc; +} \ No newline at end of file diff --git a/src/cparse.h b/src/cparse.h new file mode 100644 index 0000000..8ff9c1d --- /dev/null +++ b/src/cparse.h @@ -0,0 +1,39 @@ +#ifndef CPARSE_H +#define CPARSE_H + +#include "cosmo.h" +#include "clex.h" + +typedef struct { + CToken name; + int depth; + bool isCaptured; // is the Local referenced in an upvalue? +} Local; + +typedef struct { + uint8_t index; + bool isLocal; +} Upvalue; + +typedef enum { + FTYPE_FUNCTION, + FTYPE_SCRIPT +} FunctionType; + +typedef struct CCompilerState { + CObjFunction *function; + FunctionType type; + + Local locals[256]; + Upvalue upvalues[256]; + int localCount; + int scopeDepth; + int pushedValues; + int savedPushed; + struct CCompilerState* enclosing; +} CCompilerState; + +// compiles source into CChunk, if NULL is returned, a syntaxical error has occured and pushed onto the stack +CObjFunction* cosmoP_compileString(CState *state, const char *source); + +#endif \ No newline at end of file diff --git a/src/cstate.c b/src/cstate.c new file mode 100644 index 0000000..50d9b80 --- /dev/null +++ b/src/cstate.c @@ -0,0 +1,74 @@ +#include "cstate.h" +#include "cchunk.h" +#include "cobj.h" +#include "cvm.h" + +#include + +CState *cosmoV_newState() { + // we use C's malloc because we don't want to trigger a GC with an invalid state + CState *state = malloc(sizeof(CState)); + + if (state == NULL) { + CERROR("failed to allocate memory!"); + exit(1); + } + + state->panic = false; + state->freezeGC = false; + + // GC + state->objects = NULL; + state->grayCount = 0; + state->grayCapacity = 2; + state->grayStack = NULL; + state->allocatedBytes = 0; + state->nextGC = 1024 * 8; // threshhold starts at 8kb + + // init stack + state->top = state->stack; + state->frameCount = 0; + state->openUpvalues = NULL; + + cosmoT_initTable(state, &state->strings, 8); // init string table + cosmoT_initTable(state, &state->globals, 8); // init global table + return state; +} + +void cosmoV_freeState(CState *state) { + // frees all the objects + CObj *objs = state->objects; + while (objs != NULL) { + CObj *next = objs->next; + cosmoO_freeObject(state, objs); + objs = next; + } + + // free our string & global table + cosmoT_clearTable(state, &state->strings); + cosmoT_clearTable(state, &state->globals); + + // free our gray stack & finally free the state structure + free(state->grayStack); + free(state); +} + +void cosmoV_register(CState *state, const char *identifier, CValue val) { + // we push the values so the garbage collector can find them + cosmoV_pushValue(state, cosmoV_newObj(cosmoO_copyString(state, identifier, strlen(identifier)))); + cosmoV_pushValue(state, val); + + CValue *oldVal = cosmoT_insert(state, &state->globals, *cosmoV_getTop(state, 1)); + *oldVal = val; + + cosmoV_setTop(state, 2); // pops the 2 values off the stack +} + +void cosmoV_printStack(CState *state) { + printf("==== [[ stack dump ]] ====\n"); + for (CValue *top = state->top - 1; top >= state->stack; top--) { + printf("%d: ", (int)(top - state->stack)); + printValue(*top); + printf("\n"); + } +} \ No newline at end of file diff --git a/src/cstate.h b/src/cstate.h new file mode 100644 index 0000000..817c9c0 --- /dev/null +++ b/src/cstate.h @@ -0,0 +1,63 @@ +#ifndef CSTATE_H +#define CSTATE_H + +#include "cosmo.h" +#include "cvalue.h" +#include "cobj.h" +#include "ctable.h" + +typedef struct CCompilerState CCompilerState; + +typedef struct CCallFrame { + CObjClosure *closure; + INSTRUCTION *pc; + CValue* base; +} CCallFrame; + +typedef struct CState { + bool panic; + int freezeGC; // when > 0, GC events will be ignored (for internal use) + CObj *objects; // tracks all of our allocated objects + CObj **grayStack; // keeps track of which objects *haven't yet* been traversed in our GC, but *have been* found + int grayCount; + int grayCapacity; + size_t allocatedBytes; + size_t nextGC; // when allocatedBytes reaches this threshhold, trigger a GC event + + CObjUpval *openUpvalues; // tracks all of our still open (meaning still on the stack) upvalues + CTable strings; + CTable globals; + + CValue *top; // top of the stack + CValue stack[STACK_MAX]; // stack + CCallFrame callFrame[FRAME_MAX]; // call frames + int frameCount; +} CState; + +COSMO_API CState *cosmoV_newState(); +COSMO_API void cosmoV_register(CState *state, const char *identifier, CValue val); +COSMO_API void cosmoV_freeState(CState *state); +COSMO_API void cosmoV_printStack(CState *state); + +// pushes value to the stack +static inline void cosmoV_pushValue(CState *state, CValue val) { + *(state->top++) = val; +} + +// sets stack->top to stack->top - indx +static inline StkPtr cosmoV_setTop(CState *state, int indx) { + state->top -= indx; + return state->top; +} + +// returns stack->top - indx - 1 +static inline StkPtr cosmoV_getTop(CState *state, int indx) { + return &state->top[-(indx + 1)]; +} + +// pops 1 value off the stack +static inline StkPtr cosmoV_pop(CState *state) { + return cosmoV_setTop(state, 1); +} + +#endif \ No newline at end of file diff --git a/src/ctable.c b/src/ctable.c new file mode 100644 index 0000000..db0eed0 --- /dev/null +++ b/src/ctable.c @@ -0,0 +1,193 @@ +#include "ctable.h" +#include "cmem.h" +#include "cvalue.h" +#include "cobj.h" + +#include + +#define MAX_TABLE_FILL 0.75 + +void cosmoT_initTable(CState *state, CTable *tbl, int startCap) { + tbl->capacity = startCap; + tbl->count = 0; + tbl->table = NULL; // to let out GC know we're initalizing + tbl->table = cosmoM_xmalloc(state, sizeof(CTableEntry) * startCap); + + // init everything to NIL + for (int i = 0; i < startCap; i++) { + tbl->table[i].key = cosmoV_newNil(); + tbl->table[i].val = cosmoV_newNil(); + } +} + +void cosmoT_addTable(CState *state, CTable *from, CTable *to) { + for (int i = 0; i < from->capacity; i++) { + CTableEntry *entry = &from->table[i]; + + if (!(IS_NIL(entry->key))) { + CValue *newVal = cosmoT_insert(state, to, entry->key); + *newVal = entry->val; + } + } +} + +void cosmoT_clearTable(CState *state, CTable *tbl) { + cosmoM_freearray(state, CTableEntry, tbl->table, tbl->capacity); +} + +uint32_t getObjectHash(CObj *obj) { + switch(obj->type) { + case COBJ_STRING: + return ((CObjString*)obj)->hash; + default: + return 0; + } +} + +uint32_t getValueHash(CValue *val) { + switch (val->type) { + case COSMO_TOBJ: + return getObjectHash(val->val.obj); + case COSMO_TNUMBER: + // how the fuck + // TODO: add support for other types + default: + return 0; + } +} + +// mask should always be (capacity - 1) +static CTableEntry *findEntry(CTableEntry *entries, int mask, CValue key) { + uint32_t hash = getValueHash(&key); + uint32_t indx = hash & mask; // since we know the capacity will *always* be a power of 2, we can use bitwise & to perform a MUCH faster mod operation + CTableEntry *tomb = NULL; + + // keep looking for an open slot in the entries array + while (true) { + CTableEntry *entry = &entries[indx]; + + if (IS_NIL(entry->key)) { + // check if it's an empty bucket or a tombstone + if (IS_NIL(entry->val)) { + // it's empty! if we found a tombstone, return that so it'll be reused + return tomb != NULL ? tomb : entry; + } else { + // its a tombstone! + tomb = entry; + } + } else if (cosmoV_equal(entry->key, key)) { + return entry; + } + + indx = (indx + 1) & mask; // fast mod here too + } +} + +static void growTbl(CState *state, CTable *tbl, size_t newCapacity) { + CTableEntry *entries = cosmoM_xmalloc(state, sizeof(CTableEntry) * newCapacity); + int newCount; + + // set all nodes as NIL : NIL + for (int i = 0; i < newCapacity; i++) { + entries[i].key = cosmoV_newNil(); + entries[i].val = cosmoV_newNil(); + } + + // move over old values to the new buffer + for (int i = 0; i < tbl->capacity; i++) { + CTableEntry *oldEntry = &tbl->table[i]; + if (IS_NIL(oldEntry->key)) + continue; // skip empty keys + + // get new entry location & update the node + CTableEntry *newEntry = findEntry(entries, newCapacity - 1, oldEntry->key); + newEntry->key = oldEntry->key; + newEntry->val = oldEntry->val; + newCount++; // inc count + } + + // free the old table + cosmoM_freearray(state, CTableEntry, tbl->table, tbl->capacity); + + tbl->table = entries; + tbl->capacity = newCapacity; + tbl->count = newCount; +} + +// returns a pointer to the allocated value +COSMO_API CValue* cosmoT_insert(CState *state, CTable *tbl, CValue key) { + // make sure we have enough space allocated + if (tbl->count + 1 > tbl->capacity * MAX_TABLE_FILL) { + int newCap = tbl->capacity * GROW_FACTOR; + growTbl(state, tbl, newCap); + } + + // insert into the table + CTableEntry *entry = findEntry(tbl->table, tbl->capacity - 1, key); // -1 for our capacity mask + + if (IS_NIL(entry->key) && IS_NIL(entry->val)) // is it empty? + tbl->count++; + + entry->key = key; + return &entry->val; +} + +bool cosmoT_get(CTable *tbl, CValue key, CValue *val) { + if (tbl->count == 0) { + *val = cosmoV_newNil(); + return false; // sanity check + } + + CTableEntry *entry = findEntry(tbl->table, tbl->capacity - 1, key); + *val = entry->val; + + return !(IS_NIL(entry->key)); +} + +bool cosmoT_remove(CTable *tbl, CValue key) { + if (tbl->count == 0) return 0; // sanity check + + CTableEntry *entry = findEntry(tbl->table, tbl->capacity - 1, key); + if (IS_NIL(entry->key)) // sanity check + return false; + + // crafts tombstone + entry->key = cosmoV_newNil(); // this has to be nil + entry->val = cosmoV_newBoolean(false); // doesn't reall matter what this is, as long as it isn't nil + + return true; +} + +CObjString *cosmoT_lookupString(CTable *tbl, const char *str, size_t length, uint32_t hash) { + if (tbl->count == 0) return 0; // sanity check + uint32_t indx = hash & (tbl->capacity - 1); // since we know the capacity will *always* be a power of 2, we can use bitwise & to perform a MUCH faster mod operation + + // keep looking for an open slot in the entries array + while (true) { + CTableEntry *entry = &tbl->table[indx]; + + // check if it's an empty slot (meaning we dont have it in the table) + if (IS_NIL(entry->key) && IS_NIL(entry->val)) { + return NULL; + } else if (IS_STRING(entry->key) && cosmoV_readString(entry->key)->length == length && memcmp(cosmoV_readString(entry->key)->str, str, length) == 0) { + // it's a match! + return (CObjString*)entry->key.val.obj; + } + + indx = (indx + 1) & (tbl->capacity - 1); // fast mod here too + } +} + +// for debugging purposes +void cosmoT_printTable(CTable *tbl, const char *name) { + printf("==== [[%s]] ====\n", name); + for (int i = 0; i < tbl->capacity; i++) { + CTableEntry *entry = &tbl->table[i]; + if (!(IS_NIL(entry->key))) { + printValue(entry->key); + printf(" - "); + printValue(entry->val); + printf("\n"); + } + } +} \ No newline at end of file diff --git a/src/ctable.h b/src/ctable.h new file mode 100644 index 0000000..e9e1b84 --- /dev/null +++ b/src/ctable.h @@ -0,0 +1,29 @@ +#ifndef CTABLE_H +#define CTABLE_H + +#include "cosmo.h" +#include "cvalue.h" + +typedef struct CTableEntry { + CValue key; + CValue val; +} CTableEntry; + +typedef struct CTable { + int count; + int capacity; + CTableEntry *table; +} CTable; + +COSMO_API void cosmoT_initTable(CState *state, CTable *tbl, int startCap); +COSMO_API void cosmoT_clearTable(CState *state, CTable *tbl); +COSMO_API void cosmoT_addTable(CState *state, CTable *from, CTable *to); +COSMO_API CValue *cosmoT_insert(CState *state, CTable *tbl, CValue key); + +CObjString *cosmoT_lookupString(CTable *tbl, const char *str, size_t length, uint32_t hash); +bool cosmoT_get(CTable *tbl, CValue key, CValue *val); +bool cosmoT_remove(CTable *tbl, CValue key); + +void cosmoT_printTable(CTable *tbl, const char *name); + +#endif \ No newline at end of file diff --git a/src/cvalue.c b/src/cvalue.c new file mode 100644 index 0000000..5f04c4d --- /dev/null +++ b/src/cvalue.c @@ -0,0 +1,72 @@ +#include "cmem.h" +#include "cvalue.h" +#include "cobj.h" + +void initValArray(CState *state, CValueArray *val, size_t startCapacity) { + val->count = 0; + val->capacity = startCapacity; + val->values = NULL; +} + +void cleanValArray(CState *state, CValueArray *array) { + cosmoM_freearray(state, CValue, array->values, array->capacity); +} + +void appendValArray(CState *state, CValueArray *array, CValue val) { + cosmoM_growarray(state, CValue, array->values, array->count, array->capacity); + + array->values[array->count++] = val; +} + +bool cosmoV_equal(CValue valA, CValue valB) { + if (valA.type != valB.type) // are they the same type? + return false; + + // compare + switch (valA.type) { + case COSMO_TBOOLEAN: return valA.val.b == valB.val.b; + case COSMO_TNUMBER: return valA.val.num == valB.val.num; + case COSMO_TOBJ: return cosmoO_equalObject(valA.val.obj, valB.val.obj); + case COSMO_TNIL: return true; + default: + return false; + } +} + +COSMO_API CObjString *cosmoV_toString(CState *state, CValue val) { + switch (val.type) { + case COSMO_TNUMBER: { + char buf[32]; + int size = snprintf((char*)&buf, 32, "%.14g", val.val.num); + return cosmoO_copyString(state, (char*)&buf, size); + } + case COSMO_TBOOLEAN: { + return val.val.b ? cosmoO_copyString(state, "true", 4) : cosmoO_copyString(state, "false", 5); + } + case COSMO_TOBJ: { + return cosmoO_toString(state, val.val.obj); + } + default: + return cosmoO_copyString(state, "", 6); + } +} + +void printValue(CValue val) { + switch (val.type) { + case COSMO_TNUMBER: + printf("%g", val.val.num); + break; + case COSMO_TBOOLEAN: + printf(cosmoV_readBoolean(val) ? "true" : "false"); + break; + case COSMO_TOBJ: { + printObject(val.val.obj); + break; + } + case COSMO_TNIL: + printf("nil"); + break; + default: + printf(""); + } +} \ No newline at end of file diff --git a/src/cvalue.h b/src/cvalue.h new file mode 100644 index 0000000..1c03aa6 --- /dev/null +++ b/src/cvalue.h @@ -0,0 +1,62 @@ +#ifndef CVALUE_H +#define CVALUE_H + +#include "cosmo.h" + +typedef enum { + COSMO_TNIL, + COSMO_TBOOLEAN, + COSMO_TNUMBER, + COSMO_TOBJ, + COSMO_TUSERDATA +} CosmoType; + +typedef double cosmo_Number; + +/* + holds primitive cosmo types +*/ +typedef struct CValue { + CosmoType type; + union { + cosmo_Number num; + bool b; // boolean + void *ptr; // userdata + CObj *obj; + } val; +} CValue; +typedef CValue* StkPtr; + +typedef struct CValueArray { + size_t capacity; + size_t count; + CValue *values; +} CValueArray; + +COSMO_API void initValArray(CState *state, CValueArray *val, size_t startCapacity); +COSMO_API void cleanValArray(CState *state, CValueArray *array); // cleans array +COSMO_API void appendValArray(CState *state, CValueArray *array, CValue val); + +COSMO_API void printValue(CValue val); +COSMO_API bool cosmoV_equal(CValue valA, CValue valB); +COSMO_API CObjString *cosmoV_toString(CState *state, CValue val); + +#define IS_NUMBER(x) x.type == COSMO_TNUMBER +#define IS_BOOLEAN(x) x.type == COSMO_TBOOLEAN +#define IS_NIL(x) x.type == COSMO_TNIL +#define IS_OBJ(x) x.type == COSMO_TOBJ + +// create CValues + +#define cosmoV_newNumber(x) ((CValue){COSMO_TNUMBER, {.num = x}}) +#define cosmoV_newBoolean(x) ((CValue){COSMO_TBOOLEAN, {.b = x}}) +#define cosmoV_newObj(x) ((CValue){COSMO_TOBJ, {.obj = (CObj*)x}}) +#define cosmoV_newNil() ((CValue){COSMO_TNIL, {.num = 0}}) + +// read CValues + +#define cosmoV_readNumber(x) ((cosmo_Number)x.val.num) +#define cosmoV_readBoolean(x) ((bool)x.val.b) +#define cosmoV_readObj(x) ((CObj*)x.val.obj) + +#endif \ No newline at end of file diff --git a/src/cvm.c b/src/cvm.c new file mode 100644 index 0000000..600a664 --- /dev/null +++ b/src/cvm.c @@ -0,0 +1,417 @@ +#include "cvm.h" +#include "cstate.h" +#include "cdebug.h" +#include "cmem.h" + +#include +#include + +void runtimeError(CState *state, const char *format, ...) { + if (state->panic) + return; + + // print stack trace + for (int i = 0; i < state->frameCount; i++) { + CCallFrame *frame = &state->callFrame[i]; + CObjFunction *function = frame->closure->function; + CChunk *chunk = &function->chunk; + + int line = chunk->lineInfo[frame->pc - chunk->buf - 1]; + + if (i == state->frameCount - 1) { // it's the last call frame, prepare for the objection to be printed + fprintf(stderr, "Objection on [line %d] in ", line); + if (function->name == NULL) { // unnamed chunk + fprintf(stderr, "%s\n\t", UNNAMEDCHUNK); + } else { + fprintf(stderr, "%.*s()\n\t", function->name->length, function->name->str); + } + } else { + fprintf(stderr, "[line %d] in ", line); + if (function->name == NULL) { // unnamed chunk + fprintf(stderr, "%s\n", UNNAMEDCHUNK); + } else { + fprintf(stderr, "%.*s()\n", function->name->length, function->name->str); + } + } + } + + va_list args; + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); + fputs("\n", stderr); + + // TODO: push error onto the stack :P + state->panic = true; +} + +CObjUpval *captureUpvalue(CState *state, CValue *local) { + CObjUpval *prev = NULL; + CObjUpval *upvalue = state->openUpvalues; + + while (upvalue != NULL && upvalue->val > local) { // while upvalue exists and is higher on the stack than local + prev = upvalue; + upvalue = upvalue->next; + } + + if (upvalue != NULL && upvalue->val == local) { // we found the local we were going to capture + return upvalue; + } + + CObjUpval *newUpval = cosmoO_newUpvalue(state, local); + newUpval->next = upvalue; + + // the list is sorted, so insert it at our found upvalue + if (prev == NULL) { + state->openUpvalues = newUpval; + } else { + prev->next = newUpval; + } + + return newUpval; +} + +void closeUpvalues(CState *state, CValue *local) { + while (state->openUpvalues != NULL && state->openUpvalues->val >= local) { // for every upvalue that points to the local or anything above it + CObjUpval *upvalue = state->openUpvalues; + upvalue->closed = *upvalue->val; + upvalue->val = &upvalue->closed; // upvalue now points to itself :P + state->openUpvalues = upvalue->next; + } +} + +void pushCallFrame(CState *state, CObjClosure *closure, int args) { + CCallFrame *frame = &state->callFrame[state->frameCount++]; + frame->base = state->top - args - 1; // - 1 for the function + frame->pc = closure->function->chunk.buf; + frame->closure = closure; +} + +void popCallFrame(CState *state) { + closeUpvalues(state, state->callFrame[state->frameCount - 1].base); // close any upvalue still open + + state->top = state->callFrame[state->frameCount - 1].base; // resets the stack + state->frameCount--; +} + +CObjString *cosmoV_concat(CState *state, CObjString *strA, CObjString *strB) { + size_t sz = strA->length + strB->length; + char *buf = cosmoM_xmalloc(state, sz + 1); // +1 for null terminator + + memcpy(buf, strA->str, strA->length); + memcpy(buf + strA->length, strB->str, strB->length); + buf[sz] = '\0'; + + return cosmoO_takeString(state, buf, sz); +} + +int cosmoV_execute(CState *state); + +typedef enum { + CALL_CLOSURE, + CALL_CFUNCTION +} preCallResult; + +int cosmoV_preCall(CState *state, int args, int nresults) { + return -1; +} + +// args = # of pass parameters, nresults = # of expected results +COSMOVMRESULT cosmoV_call(CState *state, int args, int nresults) { + StkPtr val = cosmoV_getTop(state, args); // function will always be right above the args + + if (!(val->type == COSMO_TOBJ)) { + runtimeError(state, "Cannot call non-function value!"); + return COSMOVM_RUNTIME_ERR; + } + + switch (val->val.obj->type) { + case COBJ_CLOSURE: { + CObjClosure *closure = (CObjClosure*)(val->val.obj); + + // missmatched args, thats an obvious user error, so error. + if (args != closure->function->args) { + runtimeError(state, "Expected %d parameters for %s, got %d!", closure->function->args, closure->function->name == NULL ? UNNAMEDCHUNK : closure->function->name->str, args); + return COSMOVM_RUNTIME_ERR; + } + + // load function into callframe + pushCallFrame(state, closure, closure->function->args); + + // execute + int res = cosmoV_execute(state); + + // so, since we can have any # of results, we need to move the expected results to the original call frame (that means popping/adding however many results) + CValue* results = state->top; + + // pop the callframe and return result :) + popCallFrame(state); + + // return the results to the stack + for (int i = 1; i <= nresults; i++) { + if (i <= res) + cosmoV_pushValue(state, results[-i]); + else + cosmoV_pushValue(state, cosmoV_newNil()); + } + + break; + } + case COBJ_CFUNCTION: { + // it's a C function, so call it + CosmoCFunction cfunc = ((CObjCFunction*)(val->val.obj))->cfunc; + CValue *savedBase = state->top - args - 1; + + cosmoM_freezeGC(state); // we don't want a GC event during c api because we don't actually trust the user to know how to evade the GC + int res = cfunc(state, args, state->top - args); + cosmoM_unfreezeGC(state); + + // so, since we can have any # of results, we need to move the expected results to the original call frame + CValue* results = state->top; + state->top = savedBase; + + // return the results to the stack + for (int i = 1; i <= nresults; i++) { + if (i <= res) + cosmoV_pushValue(state, results[-i]); + else + cosmoV_pushValue(state, cosmoV_newNil()); + } + + break; + } + default: + runtimeError(state, "Cannot call non-function value!"); + return COSMOVM_RUNTIME_ERR; + } + + return state->panic ? COSMOVM_RUNTIME_ERR : COSMOVM_OK; +} + +static inline bool isFalsey(StkPtr val) { + return val->type == COSMO_TNIL || (val->type == COSMO_TBOOLEAN && !val->val.b); +} + +#define BINARYOP(typeConst, op) \ + StkPtr valA = cosmoV_getTop(state, 1); \ + StkPtr valB = cosmoV_getTop(state, 0); \ + if (valA->type == COSMO_TNUMBER && valB->type == COSMO_TNUMBER) { \ + cosmoV_setTop(state, 2); /* pop the 2 values */ \ + cosmoV_pushValue(state, typeConst((valA->val.num) op (valB->val.num))); \ + } else { \ + runtimeError(state, "Expected number! got %d and %d", valA->type, valB->type); \ + } \ + + +// returns -1 if error, otherwise returns ammount of results +int cosmoV_execute(CState *state) { + CCallFrame* frame = &state->callFrame[state->frameCount - 1]; // grabs the current frame + CValue *constants = frame->closure->function->chunk.constants.values; // cache the pointer :) + +#define READBYTE() *frame->pc++ +#define READUINT() (frame->pc += 2, *(uint16_t*)(&frame->pc[-2])) + + while (!state->panic) { + /*disasmInstr(&frame->closure->function->chunk, frame->pc - frame->closure->function->chunk.buf, 0); + printf("\n");*/ + switch (READBYTE()) { + case OP_LOADCONST: { // push const[uint] to stack + uint16_t indx = READUINT(); + cosmoV_pushValue(state, constants[indx]); + break; + } + case OP_SETGLOBAL: { + uint16_t indx = READUINT(); + CValue ident = constants[indx]; // grabs identifier + CValue *val = cosmoT_insert(state, &state->globals, ident); + *val = *cosmoV_pop(state); // sets the value in the hash table + break; + } + case OP_GETGLOBAL: { + uint16_t indx = READUINT(); + CValue ident = constants[indx]; // grabs identifier + CValue val; // to hold our value + cosmoT_get(&state->globals, ident, &val); + cosmoV_pushValue(state, val); // pushes the value to the stack + break; + } + case OP_SETLOCAL: { + frame->base[READBYTE()] = *cosmoV_pop(state); + break; + } + case OP_GETLOCAL: { + cosmoV_pushValue(state, frame->base[READBYTE()]); + break; + } + case OP_GETUPVAL: { + uint8_t indx = READBYTE(); + cosmoV_pushValue(state, *frame->closure->upvalues[indx]->val); + break; + } + case OP_SETUPVAL: { + uint8_t indx = READBYTE(); + *frame->closure->upvalues[indx]->val = *cosmoV_pop(state); + break; + } + case OP_PEJMP: { + uint16_t offset = READUINT(); + + if (isFalsey(cosmoV_pop(state))) { // pop, if the condition is false, jump! + frame->pc += offset; + } + break; + } + case OP_EJMP: { + uint16_t offset = READUINT(); + + if (isFalsey(cosmoV_getTop(state, 0))) { // if the condition is false, jump! + frame->pc += offset; + } + break; + } + case OP_JMP: { + uint16_t offset = READUINT(); + frame->pc += offset; + break; + } + case OP_JMPBACK: { + uint16_t offset = READUINT(); + frame->pc -= offset; + break; + } + case OP_POP: { // pops value off the stack + cosmoV_setTop(state, READBYTE()); + break; + } + case OP_CALL: { + uint8_t args = READBYTE(); + uint8_t results = READBYTE(); + COSMOVMRESULT result = cosmoV_call(state, args, results); + if (result != COSMOVM_OK) { + return result; + } + break; + } + case OP_CLOSURE: { + uint16_t index = READUINT(); + CObjFunction *func = cosmoV_readFunction(constants[index]); + CObjClosure *closure = cosmoO_newClosure(state, func); + cosmoV_pushValue(state, cosmoV_newObj((CObj*)closure)); + + for (int i = 0; i < closure->upvalueCount; i++) { + uint8_t encoding = READBYTE(); + uint8_t index = READBYTE(); + if (encoding == OP_GETUPVAL) { + // capture upvalue from current frame's closure + closure->upvalues[i] = frame->closure->upvalues[index]; + } else { + // capture local + closure->upvalues[i] = captureUpvalue(state, frame->base + index); + } + } + + break; + } + case OP_CLOSE: { + closeUpvalues(state, state->top - 1); + cosmoV_pop(state); + break; + } + case OP_ADD: { // pop 2 values off the stack & try to add them together + BINARYOP(cosmoV_newNumber, +); + break; + } + case OP_SUB: { // pop 2 values off the stack & try to subtracts them + BINARYOP(cosmoV_newNumber, -) + break; + } + case OP_MULT: { // pop 2 values off the stack & try to multiplies them together + BINARYOP(cosmoV_newNumber, *) + break; + } + case OP_DIV: { // pop 2 values off the stack & try to divides them + BINARYOP(cosmoV_newNumber, /) + break; + } + case OP_NOT: { + cosmoV_pushValue(state, cosmoV_newBoolean(isFalsey(cosmoV_pop(state)))); + break; + } + case OP_NEGATE: { // pop 1 value off the stack & try to negate + StkPtr val = cosmoV_getTop(state, 0); + + if (val->type == COSMO_TNUMBER) { + cosmoV_pop(state); + cosmoV_pushValue(state, cosmoV_newNumber(-(val->val.num))); + } else { + runtimeError(state, "Expected number!"); + } + break; + } + case OP_CONCAT: { + uint8_t vals = READBYTE(); + StkPtr start = state->top - vals; + StkPtr end = cosmoV_getTop(state, 0); + StkPtr current; + + CObjString *result = cosmoV_toString(state, *start); + for (StkPtr current = start + 1; current <= end; current++) { + cosmoV_pushValue(state, cosmoV_newObj(result)); // so our GC can find our current result string + CObjString *otherStr = cosmoV_toString(state, *current); + cosmoV_pushValue(state, cosmoV_newObj(otherStr)); // also so our GC won't free otherStr + result = cosmoV_concat(state, result, otherStr); + + cosmoV_setTop(state, 2); // pop result & otherStr off the stack + } + + state->top = start; + cosmoV_pushValue(state, cosmoV_newObj(result)); + break; + } + case OP_EQUAL: { + // pop vals + StkPtr valB = cosmoV_pop(state); + StkPtr valA = cosmoV_pop(state); + + // compare & push + cosmoV_pushValue(state, cosmoV_newBoolean(cosmoV_equal(*valA, *valB))); + break; + } + case OP_GREATER: { + BINARYOP(cosmoV_newBoolean, >) + break; + } + case OP_LESS: { + BINARYOP(cosmoV_newBoolean, <) + break; + } + case OP_GREATER_EQUAL: { + BINARYOP(cosmoV_newBoolean, >=) + break; + } + case OP_LESS_EQUAL: { + BINARYOP(cosmoV_newBoolean, <=) + break; + } + case OP_TRUE: cosmoV_pushValue(state, cosmoV_newBoolean(true)); break; + case OP_FALSE: cosmoV_pushValue(state, cosmoV_newBoolean(false)); break; + case OP_NIL: cosmoV_pushValue(state, cosmoV_newNil()); break; + case OP_RETURN: { + uint8_t results = READBYTE(); + return results; + } + default: + CERROR("unknown opcode!"); + exit(0); + } + //cosmoV_printStack(state); + } + +#undef READBYTE +#undef READUINT + + // we'll only reach this is state->panic is true + return COSMOVM_RUNTIME_ERR; +} + +#undef BINARYOP \ No newline at end of file diff --git a/src/cvm.h b/src/cvm.h new file mode 100644 index 0000000..ee63609 --- /dev/null +++ b/src/cvm.h @@ -0,0 +1,16 @@ +#ifndef COSMOVM_H +#define COSMOVM_H + +#include "cosmo.h" +#include "cstate.h" + +typedef enum { + COSMOVM_OK, + COSMOVM_RUNTIME_ERR, + COSMOVM_BUILDTIME_ERR +} COSMOVMRESULT; + +// args = # of pass parameters, nresults = # of expected results +COSMO_API COSMOVMRESULT cosmoV_call(CState *state, int args, int nresults); + +#endif \ No newline at end of file diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..4df6b2f --- /dev/null +++ b/src/main.c @@ -0,0 +1,131 @@ +#include "cosmo.h" +#include "cchunk.h" +#include "cdebug.h" +#include "cvm.h" +#include "cparse.h" +#include "cbaselib.h" + +#include "cmem.h" + +static void interpret(const char* script) { + CState *state = cosmoV_newState(); + + // cosmoP_compileString pushes the result onto the stack (NIL or COBJ_FUNCTION) + CObjFunction* func = cosmoP_compileString(state, script); + cosmoB_loadlibrary(state); + if (func != NULL) { + disasmChunk(&func->chunk, "_main", 0); + + cosmoV_call(state, 0, 0); // 0 args being passed, 0 results expected + + //cosmoV_printStack(state); + //cosmoT_printTable(&state->globals, "globals"); + //cosmoT_printTable(&state->strings, "strings"); + } + + cosmoV_freeState(state); +} + +static void repl() { + char line[1024]; + + while (true) { + printf("> "); + + if (!fgets(line, sizeof(line), stdin)) { // better than gets() + printf("\n> "); + break; + } + + interpret(line); + } +} + +static char *readFile(const char* path) { + FILE* file = fopen(path, "rb"); + if (file == NULL) { + fprintf(stderr, "Could not open file \"%s\".\n", path); + exit(74); + } + + // first, we need to know how big our file is + fseek(file, 0L, SEEK_END); + size_t fileSize = ftell(file); + rewind(file); + + char *buffer = (char*)malloc(fileSize + 1); // make room for the null byte + if (buffer == NULL) { + fprintf(stderr, "failed to allocate!"); + exit(1); + } + + size_t bytesRead = fread(buffer, sizeof(char), fileSize, file); + + if (bytesRead < fileSize) { + printf("failed to read file \"%s\"!\n", path); + exit(74); + } + + buffer[bytesRead] = '\0'; // place our null terminator + + // close the file handler and return the script buffer + fclose(file); + return buffer; +} + +static void runFile(const char* fileName) { + char* script = readFile(fileName); + + interpret(script); + + free(script); +} + +int main(int argc, const char *argv[]) { + + //interpret("\"hello world!\""); + + if (argc == 1) { + repl(); + } else if (argc >= 2) { // they passed a file (or more lol) + for (int i = 1; i < argc; i++) { + runFile(argv[i]); + } + } + + + /* + CChunk *chnk = newChunk(1); + CState *state = cosmoV_newState(); + + // adds our constant values + int constIndx = addConstant(chnk, cosmoV_newNumber(2)); + int const2Indx = addConstant(chnk, cosmoV_newNumber(4)); + + // pushes constant to the stack + writeu8Chunk(chnk, OP_LOADCONST, 1); + writeu16Chunk(chnk, constIndx, 1); + + writeu8Chunk(chnk, OP_LOADCONST, 1); + writeu16Chunk(chnk, const2Indx, 1); + + // pops 2 values off the stack, multiples them together and pushes the result + writeu8Chunk(chnk, OP_MULT, 1); + + // pops a value off the stack, negates it, and pushes the result + writeu8Chunk(chnk, OP_NEGATE, 2); + + // prints to the console + writeu8Chunk(chnk, OP_RETURN, 2); + disasmChunk(chnk, "test"); + + // load chunk to the state & run it + cosmoV_loadChunk(state, chnk); + cosmoV_execute(state, 0); + + // clean up :) + freeChunk(chnk); + cosmoV_freeState(state);*/ + + return 0; +} \ No newline at end of file diff --git a/test.lua b/test.lua new file mode 100644 index 0000000..571ee01 --- /dev/null +++ b/test.lua @@ -0,0 +1,21 @@ +local function fact(i) + local total = 1 + local x = i + + while (x > 1) do + total = total * x + x = x - 1 + end + + return total +end + +local i = 1 +while i < 1000 do + local x = 1 + while x < 100 do + print("The factorial of " .. x .. " is " .. fact(x)) + x = x + 1 + end + i = i + 1 +end \ No newline at end of file diff --git a/test.py b/test.py new file mode 100644 index 0000000..6491ca2 --- /dev/null +++ b/test.py @@ -0,0 +1,7 @@ +def fib(i): + if i < 2: + return i + + return fib(i - 2) + fib(i - 1) + +print(fib(35)) \ No newline at end of file