From 95ca3bb26b6015342eeac4f7d18e7cf19e4dd868 Mon Sep 17 00:00:00 2001 From: CPunch Date: Thu, 11 Aug 2022 17:26:48 -0500 Subject: [PATCH] lparser.py: added support for while loops --- README.md | 52 +++++++++++++++++++++++++++------------------------- lparser.py | 46 ++++++++++++++++++++++++++++++++++------------ lundump.py | 6 +++--- 3 files changed, 64 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index f1819d8..436e8e1 100644 --- a/README.md +++ b/README.md @@ -12,44 +12,46 @@ Lua has a relatively small instruction set (only 38 different opcodes!). This ma ```sh > cat example.lua && luac5.1 -o example.luac example.lua -pp = "pri" .. "nt" +i = 0 -if 2 + 2 == 4 then - _G[pp]("Hello world") +while i < 10 do + print(i) + i = i + 1 end - > python main.py example.luac example.luac ==== [[example.lua's constants]] ==== -0: [STRING] pp -1: [STRING] pri -2: [STRING] nt -3: [NUMBER] 4.0 -4: [STRING] _G -5: [STRING] Hello world +0: [STRING] i +1: [NUMBER] 0.0 +2: [NUMBER] 10.0 +3: [STRING] print +4: [NUMBER] 1.0 ==== [[example.lua's dissassembly]] ==== -[ 0] LOADK : R[0] K[1] ; load "pri" into R[0] -[ 1] LOADK : R[1] K[2] ; load "nt" into R[1] -[ 2] CONCAT : R[0] R[0] R[1] ; concat 2 values from R[0] to R[1], store into R[0] -[ 3] SETGLOBAL : R[0] K[0] ; -[ 4] EQ : R[0] K[3] K[3] ; -[ 5] JMP : R[0] R[5] ; -[ 6] GETGLOBAL : R[0] K[4] ; -[ 7] GETGLOBAL : R[1] K[0] ; -[ 8] GETTABLE : R[0] R[0] R[1] ; -[ 9] LOADK : R[1] K[5] ; load "Hello world" into R[1] -[ 10] CALL : R[0] R[2] R[1] ; -[ 11] RETURN : R[0] R[1] R[0] ; +[ 0] LOADK : R[0] K[1] ; load 0.0 into R[0] +[ 1] SETGLOBAL : R[0] K[0] ; +[ 2] GETGLOBAL : R[0] K[0] ; +[ 3] LT : R[0] R[0] K[2] ; +[ 4] JMP : R[0] 7 ; +[ 5] GETGLOBAL : R[0] K[3] ; +[ 6] GETGLOBAL : R[1] K[0] ; +[ 7] CALL : R[0] 2 1 ; +[ 8] GETGLOBAL : R[0] K[0] ; +[ 9] ADD : R[0] R[0] K[4] ; +[ 10] SETGLOBAL : R[0] K[0] ; +[ 11] JMP : R[0] -10 ; +[ 12] RETURN : R[0] 1 0 ; ==== [[example.lua's decompiled source]] ==== -pp = "pri" .. "nt" -if 4.0 == 4.0 then - _G[pp]("Hello world") +i = 0.0 +while i < 10.0 do + print(i) + i = (i + 1.0) end + ``` \ No newline at end of file diff --git a/lparser.py b/lparser.py index e0f8c39..ef4046f 100644 --- a/lparser.py +++ b/lparser.py @@ -53,11 +53,17 @@ class LuaDecomp: self.__startStatement() self.__addExpr("local " + self.locals[indx] + " = " + expr) + def __getInstrAtPC(self, pc: int) -> Instruction: + if pc < len(self.chunk.instructions): + return self.chunk.instructions[pc] + + raise Exception("Decompilation failed!") + def __getNextInstr(self) -> Instruction: if self.pc + 1 < len(self.chunk.instructions): return self.chunk.instructions[self.pc + 1] - return None + raise Exception("Decompilation failed!") def __getCurrInstr(self) -> Instruction: return self.chunk.instructions[self.pc] @@ -102,6 +108,30 @@ class LuaDecomp: def __emitOperand(self, a: int, b: str, c: str, op: str) -> None: self.__setReg(a, "(" + b + op + c + ")") + def __compJmp(self, op: str): + instr = self.__getCurrInstr() + jmpType = "if" + scopeStart = "then" + + # we need to check if the jmp location has a jump back (if so, it's a while loop) + jmp = self.__getNextInstr().B + 1 + jmpToInstr = self.__getInstrAtPC(self.pc + jmp) + + if jmpToInstr.opcode == Opcodes.JMP: + # if this jump jumps back to this compJmp, it's a loop! + if self.pc + jmp + jmpToInstr.B <= self.pc + 1: + jmpType = "while" + scopeStart = "do" + + self.__startStatement() + if instr.A > 0: + self.__addExpr("%s not " % jmpType) + else: + self.__addExpr("%s " % jmpType) + self.__addExpr(self.__readRK(instr.B) + op + self.__readRK(instr.C) + " ") + self.__startScope("%s " % scopeStart, jmp) + self.pc += 1 # skip next instr + # 'RK's are special in because can be a register or a konstant. a bitflag is read to determine which def __readRK(self, rk: int) -> str: if (whichRK(rk)) > 0: @@ -163,19 +193,11 @@ class LuaDecomp: elif instr.opcode == Opcodes.JMP: pass elif instr.opcode == Opcodes.EQ: - self.__startStatement() - if instr.A > 0: - self.__addExpr("if not ") - else: - self.__addExpr("if ") - self.__addExpr(self.__readRK(instr.B) + " == " + self.__readRK(instr.C) + " ") - self.__startScope("then ", self.__getNextInstr().B + 1) - - self.pc += 1 # skip next instr + self.__compJmp(" == ") elif instr.opcode == Opcodes.LT: - self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " < ") + self.__compJmp(" < ") elif instr.opcode == Opcodes.LE: - self.__emitOperand(instr.A, instr.B, instr.C, " <= ") + self.__compJmp(" <= ") elif instr.opcode == Opcodes.CALL: preStr = "" callStr = "" diff --git a/lundump.py b/lundump.py index 6c462b8..c3250f5 100644 --- a/lundump.py +++ b/lundump.py @@ -101,8 +101,8 @@ class Instruction: if self.type == InstructionType.ABC: # by default, treat them as registers A = "R[%d]" % self.A - B = "R[%d]" % self.B - C = "R[%d]" % self.C + B = "%d" % self.B + C = "%d" % self.C # these opcodes have RKs for B & C if self.opcode in _RKBCInstr: @@ -114,7 +114,7 @@ class Instruction: regs = "%6s %6s %6s" % (A, B, C) elif self.type == InstructionType.ABx or self.type == InstructionType.AsBx: A = "R[%d]" % self.A - B = "R[%d]" % self.B + B = "%d" % self.B if self.opcode in _KBx: B = "K[%d]" % self.B