diff --git a/README.md b/README.md index ae82d81..5d2cc7c 100644 --- a/README.md +++ b/README.md @@ -12,48 +12,69 @@ Lua has a relatively small instruction set (only 38 different opcodes!). This ma ```sh > cat example.lua && luac5.1 -o example.luac example.lua -local tbl = {"Hello", "World"} +local tbl = {"He", "llo", " ", "Wo", "rld", "!"} +local str = "" -print(tbl[1] .. " " .. tbl[2] .. ": " .. 2.5) +for i = 1, #tbl do + str = str .. tbl[i] +end + +print(str) > python main.py example.luac example.luac ==== [[example.lua's constants]] ==== -0: [STRING] Hello -1: [STRING] World -2: [STRING] print -3: [NUMBER] 1.0 -4: [STRING] -5: [NUMBER] 2.0 -6: [STRING] : -7: [NUMBER] 2.5 +0: [STRING] He +1: [STRING] llo +2: [STRING] +3: [STRING] Wo +4: [STRING] rld +5: [STRING] ! +6: [STRING] +7: [NUMBER] 1.0 +8: [STRING] print ==== [[example.lua's locals]] ==== R[0]: tbl +R[1]: str +R[2]: (for index) +R[3]: (for limit) +R[4]: (for step) +R[5]: i ==== [[example.lua's dissassembly]] ==== -[ 0] NEWTABLE : 0 2 0 ; -[ 1] LOADK : R[1] K[0] ; load "Hello" into R[1] -[ 2] LOADK : R[2] K[1] ; load "World" into R[2] -[ 3] SETLIST : 0 2 1 ; -[ 4] GETGLOBAL : R[1] K[2] ; move _G["print"] into R[1] -[ 5] GETTABLE : R[2] 0 K[3] ; -[ 6] LOADK : R[3] K[4] ; load " " into R[3] -[ 7] GETTABLE : R[4] 0 K[5] ; -[ 8] LOADK : R[5] K[6] ; load ": " into R[5] -[ 9] LOADK : R[6] K[7] ; load 2.5 into R[6] -[ 10] CONCAT : 2 2 6 ; concat 5 values from R[2] to R[6], store into R[2] -[ 11] CALL : 1 2 1 ; -[ 12] RETURN : 0 1 0 ; +[ 0] NEWTABLE : 0 6 0 ; +[ 1] LOADK : R[1] K[0] ; load "He" into R[1] +[ 2] LOADK : R[2] K[1] ; load "llo" into R[2] +[ 3] LOADK : R[3] K[2] ; load " " into R[3] +[ 4] LOADK : R[4] K[3] ; load "Wo" into R[4] +[ 5] LOADK : R[5] K[4] ; load "rld" into R[5] +[ 6] LOADK : R[6] K[5] ; load "!" into R[6] +[ 7] SETLIST : 0 6 1 ; +[ 8] LOADK : R[1] K[6] ; load "" into R[1] +[ 9] LOADK : R[2] K[7] ; load 1 into R[2] +[ 10] LEN : 3 0 0 ; +[ 11] LOADK : R[4] K[7] ; load 1 into R[4] +[ 12] FORPREP : R[2] 3 ; +[ 13] MOVE : 6 1 0 ; move R[1] into R[6] +[ 14] GETTABLE : R[7] 0 R[5] ; +[ 15] CONCAT : 1 6 7 ; concat 2 values from R[6] to R[7], store into R[1] +[ 16] FORLOOP : R[2] -4 ; +[ 17] GETGLOBAL : R[2] K[8] ; move _G["print"] into R[2] +[ 18] MOVE : 3 1 0 ; move R[1] into R[3] +[ 19] CALL : 2 2 1 ; +[ 20] RETURN : 0 1 0 ; ==== [[example.lua's pseudo-code]] ==== -local tbl = {} -tbl[1] = "Hello" -tbl[2] = "World" -print(tbl[1] .. " " .. tbl[2] .. ": " .. 2.5) +local tbl = {"He", "llo", " ", "Wo", "rld", "!", } +for i = 1, #tbl, 1 do + local str = str .. tbl[i] +end +print(str) + ``` \ No newline at end of file diff --git a/lparser.py b/lparser.py index c3c93fc..0e09990 100644 --- a/lparser.py +++ b/lparser.py @@ -29,7 +29,12 @@ class _Line: self.scope = scope def isValidLocal(ident: str) -> bool: - for c in ident: + # has to start with an alpha or _ + if ident[0] not in "abcdefghijklmnopqrstuvwxyz_": + return False + + # then it can be alphanum or _ + for c in ident[1:]: if c not in "abcdefghijklmnopqrstuvwxyz1234567890_": return False @@ -200,6 +205,9 @@ class LuaDecomp: self.__addExpr("end") self.scope.pop() + self.__endStatement() + + # =====================================[[ Instructions ]]====================================== def __emitOperand(self, a: int, b: str, c: str, op: str) -> None: @@ -254,6 +262,33 @@ class LuaDecomp: else: return self.__getReg(rk) + # walk & peak ahead NEWTABLE + def __parseNewTable(self, indx: int): + # TODO: parse SETTABLE too? + tblOps = [Opcodes.LOADK, Opcodes.SETLIST] + + instr = self.__getNextInstr() + cachedRegs = self.top + tbl = "{" + while instr.opcode in tblOps: + if instr.opcode == Opcodes.LOADK: # operate on registers + cachedRegs[instr.A] = self.chunk.getConstant(instr.B).toCode() + elif instr.opcode == Opcodes.SETLIST: + numElems = instr.B + + for i in range(numElems): + tbl += "%s, " % cachedRegs[instr.A + i + 1] + + self.pc += 1 + instr = self.__getNextInstr() + tbl += "}" + + # i use forceLocal here even though i don't know *for sure* that the register is a local. + # this does help later though if the table is reused (which is 99% of the time). the other 1% + # only affects syntax and may look a little weird but is fine and equivalent non-the-less + self.__setReg(indx, tbl, forceLocal=True) + self.__endStatement() + def parseInstr(self): instr = self.__getCurrInstr() @@ -279,12 +314,7 @@ class LuaDecomp: self.__addExpr(self.__getReg(instr.A) + "[" + self.__readRK(instr.B) + "] = " + self.__readRK(instr.C)) self.__endStatement() elif instr.opcode == Opcodes.NEWTABLE: - # i use forceLocal here even though i don't know *for sure* that the register is a local. - # this does help later though if the table is populated (which is 99% of the time). the other 1% - # only affects syntax and may look a little weird but is fine and equivalent non-the-less - - # TODO: make this better - self.__setReg(instr.A, "{}", forceLocal=True) + self.__parseNewTable(instr.A) elif instr.opcode == Opcodes.ADD: self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " + ") elif instr.opcode == Opcodes.SUB: @@ -302,7 +332,7 @@ class LuaDecomp: elif instr.opcode == Opcodes.NOT: self.__setReg(instr.A, "not " + self.__getReg(instr.B)) elif instr.opcode == Opcodes.LEN: - self.__setReg(instr.A, "#" + self.__getCurrInstr(instr.B)) + self.__setReg(instr.A, "#" + self.__getReg(instr.B)) elif instr.opcode == Opcodes.CONCAT: count = instr.C-instr.B+1 concatStr = ""