From 0d947f4f3dcaf1a1eb40299da6facdaac53a9790 Mon Sep 17 00:00:00 2001 From: CPunch Date: Fri, 12 Aug 2022 17:08:31 -0500 Subject: [PATCH] Added 'repeat .. until' support - lines are now tracked by start & end PC - new config option: annotatedLines. if true line annotations show start & end PC will be emitted - lundump.py now shows locals for each proto --- README.md | 36 +++++++++++--------- lparser.py | 98 ++++++++++++++++++++++++++++++++++++++++-------------- lundump.py | 9 +++-- 3 files changed, 99 insertions(+), 44 deletions(-) diff --git a/README.md b/README.md index 403cf5d..f8821a7 100644 --- a/README.md +++ b/README.md @@ -14,10 +14,11 @@ Lua has a relatively small instruction set (only 38 different opcodes!). This ma > cat example.lua && luac5.1 -o example.luac example.lua local i, x = 0, 2 -while i < 10 do +repeat print(i + x) i = i + 1 -end +until i < 10 + > python main.py example.luac example.luac @@ -25,31 +26,34 @@ example.luac 0: [NUMBER] 0.0 1: [NUMBER] 2.0 -2: [NUMBER] 10.0 -3: [STRING] print -4: [NUMBER] 1.0 +2: [STRING] print +3: [NUMBER] 1.0 +4: [NUMBER] 10.0 + +==== [[example.lua's locals]] ==== + +R[0]: i +R[1]: x ==== [[example.lua's dissassembly]] ==== [ 0] LOADK : R[0] K[0] ; load 0.0 into R[0] [ 1] LOADK : R[1] K[1] ; load 2.0 into R[1] -[ 2] LT : R[0] R[0] K[2] ; -[ 3] JMP : R[0] 5 ; -[ 4] GETGLOBAL : R[2] K[3] ; -[ 5] ADD : R[3] R[0] R[1] ; -[ 6] CALL : R[2] 2 1 ; -[ 7] ADD : R[0] R[0] K[4] ; -[ 8] JMP : R[0] -7 ; -[ 9] RETURN : R[0] 1 0 ; +[ 2] GETGLOBAL : R[2] K[2] ; +[ 3] ADD : R[3] R[0] R[1] ; +[ 4] CALL : R[2] 2 1 ; +[ 5] ADD : R[0] R[0] K[3] ; +[ 6] LT : R[0] R[0] K[4] ; +[ 7] JMP : R[0] -6 ; +[ 8] RETURN : R[0] 1 0 ; ==== [[example.lua's decompiled source]] ==== - local i = 0.0 local x = 2.0 -while i < 10.0 do +repeat print((i + x)) i = (i + 1.0) -end +until i < 10.0 ``` \ No newline at end of file diff --git a/lparser.py b/lparser.py index 617f7ac..b85e84d 100644 --- a/lparser.py +++ b/lparser.py @@ -22,11 +22,19 @@ class _Traceback: self.uses = [] self.isConst = False +class _Line: + def __init__(self, startPC: int, endPC: int, src: str, scope: int): + self.startPC = startPC + self.endPC = endPC + self.src = src + self.scope = scope + class LuaDecomp: def __init__(self, chunk: Chunk): self.chunk = chunk self.pc = 0 - self.scope = [] + self.scope: list[_Scope] = [] + self.lines: list[_Line] = [] self.top = {} self.locals = {} self.traceback = {} @@ -35,6 +43,7 @@ class LuaDecomp: # configurations! self.aggressiveLocals = False # should *EVERY* set register be considered a local? + self.annotateLines = False self.indexWidth = 4 # how many spaces for indentions? self.__loadLocals() @@ -48,7 +57,11 @@ class LuaDecomp: self.__checkScope() print("\n==== [[" + str(self.chunk.name) + "'s decompiled source]] ====\n") - print(self.src) + + for line in self.lines: + if self.annotateLines: + print("-- PC: %d to PC: %d" % (line.startPC, line.endPC)) + print(((' ' * self.indexWidth) * line.scope) + line.src) # =======================================[[ Helpers ]]========================================= @@ -64,20 +77,42 @@ class LuaDecomp: def __getCurrInstr(self) -> Instruction: return self.__getInstrAtPC(self.pc) - # when we read from a register, call this - def __addUseTraceback(self, reg: int) -> None: + def __makeTracIfNotExist(self) -> None: if not self.pc in self.traceback: self.traceback[self.pc] = _Traceback() + # when we read from a register, call this + def __addUseTraceback(self, reg: int) -> None: + self.__makeTracIfNotExist() self.traceback[self.pc].uses.append(reg) # when we write from a register, call this def __addSetTraceback(self, reg: int) -> None: - if not self.pc in self.traceback: - self.traceback[self.pc] = _Traceback() - + self.__makeTracIfNotExist() self.traceback[self.pc].sets.append(reg) + def __addExpr(self, code: str) -> None: + self.src += code + + def __endStatement(self): + startPC = self.lines[len(self.lines) - 1].endPC + 1 if len(self.lines) > 0 else 0 + endPC = self.pc + + # make sure we don't write an empty line + if not self.src == "": + self.lines.append(_Line(startPC, endPC, self.src, len(self.scope))) + self.src = "" + + def __insertStatement(self, pc: int) -> None: + # insert current statement into lines at pc location + for i in range(len(self.lines)): + if self.lines[i].startPC <= pc and self.lines[i].endPC >= pc: + self.lines.insert(i, _Line(pc, pc, self.src, self.lines[i-1].scope if i > 0 else 0)) + self.src = "" + return i + + self.src = "" + # walks traceback, if local wasn't set before, the local needs to be defined def __needsDefined(self, reg) -> Boolean: for _, trace in self.traceback.items(): @@ -94,12 +129,6 @@ class LuaDecomp: else: self.__makeLocalIdentifier(i) - def __addExpr(self, code: str) -> None: - self.src += code - - def __startStatement(self): - self.src += '\n' + (' ' * self.indexWidth * len(self.scope)) - def __getReg(self, indx: int) -> str: self.__addUseTraceback(indx) @@ -112,8 +141,8 @@ class LuaDecomp: if self.__needsDefined(indx): self.__newLocal(indx, code) else: - self.__startStatement() self.__addExpr(self.locals[indx] + " = " + code) + self.__endStatement() elif self.aggressiveLocals: # 'every register is a local!!' self.__newLocal(indx, code) @@ -138,14 +167,15 @@ class LuaDecomp: # TODO: grab identifier from chunk(?) self.__makeLocalIdentifier(indx) - self.__startStatement() self.__addExpr("local " + self.locals[indx] + " = " + expr) + self.__endStatement() # ========================================[[ Scopes ]]========================================= - def __startScope(self, scopeType: str, size: int) -> None: + def __startScope(self, scopeType: str, start: int, size: int) -> None: self.__addExpr(scopeType) - self.scope.append(_Scope(self.pc, self.pc + size)) + self.__endStatement() + self.scope.append(_Scope(start, start + size)) # checks if we need to end a scope def __checkScope(self) -> None: @@ -156,9 +186,9 @@ class LuaDecomp: self.__endScope() def __endScope(self) -> None: - self.scope.pop() - self.__startStatement() + self.__endStatement() self.__addExpr("end") + self.scope.pop() # =====================================[[ Instructions ]]====================================== @@ -179,15 +209,33 @@ class LuaDecomp: if self.pc + jmp + jmpToInstr.B <= self.pc + 1: jmpType = "while" scopeStart = "do" + elif jmp < 0: + # 'repeat until' loop (probably) + jmpType = "until" + scopeStart = None - self.__startStatement() if instr.A > 0: self.__addExpr("%s not " % jmpType) else: self.__addExpr("%s " % jmpType) self.__addExpr(self.__readRK(instr.B) + op + self.__readRK(instr.C) + " ") - self.__startScope("%s " % scopeStart, jmp) self.pc += 1 # skip next instr + if scopeStart: + self.__startScope("%s " % scopeStart, self.pc - 1, jmp) + + # we end the statement *after* scopeStart + self.__endStatement() + else: + # end the statement prior to repeat + self.__endStatement() + + # it's a repeat until loop, insert 'repeat' at the jumpTo location + self.__addExpr("repeat") + insertedLine = self.__insertStatement(self.pc + jmp) + + # add scope to every line in-between + for i in range(insertedLine+1, len(self.lines)-1): + self.lines[i].scope += 1 # 'RK's are special in because can be a register or a konstant. a bitflag is read to determine which def __readRK(self, rk: int) -> str: @@ -215,11 +263,11 @@ class LuaDecomp: elif instr.opcode == Opcodes.GETTABLE: self.__setReg(instr.A, self.__getReg(instr.B) + "[" + self.__readRK(instr.C) + "]") elif instr.opcode == Opcodes.SETGLOBAL: - self.__startStatement() self.__addExpr(self.chunk.getConstant(instr.B).data + " = " + self.__getReg(instr.A)) + self.__endStatement() elif instr.opcode == Opcodes.SETTABLE: - self.__startStatement() self.__addExpr(self.__getReg(instr.A) + "[" + self.__readRK(instr.B) + "] = " + self.__readRK(instr.C)) + self.__endStatement() elif instr.opcode == Opcodes.ADD: self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " + ") elif instr.opcode == Opcodes.SUB: @@ -283,10 +331,10 @@ class LuaDecomp: preStr += ", " if not indx == instr.A + instr.C - 2 else "" preStr += " = " - self.__startStatement() self.__addExpr(preStr + callStr) + self.__endStatement() elif instr.opcode == Opcodes.RETURN: - self.__startStatement() + self.__endStatement() pass # no-op for now else: raise Exception("unsupported instruction: %s" % instr.toString()) \ No newline at end of file diff --git a/lundump.py b/lundump.py index c3250f5..9cbbe4d 100644 --- a/lundump.py +++ b/lundump.py @@ -204,9 +204,12 @@ class Chunk: def print(self): print("\n==== [[" + str(self.name) + "'s constants]] ====\n") - for z in range(len(self.constants)): - i = self.constants[z] - print(str(z) + ": " + i.toString()) + for i in range(len(self.constants)): + print("%d: %s" % (i, self.constants[i].toString())) + + print("\n==== [[" + str(self.name) + "'s locals]] ====\n") + for i in range(len(self.locals)): + print("R[%d]: %s" % (i, self.locals[i].name)) print("\n==== [[" + str(self.name) + "'s dissassembly]] ====\n") for i in range(len(self.instructions)):