diff --git a/README.md b/README.md index 436e8e1..403cf5d 100644 --- a/README.md +++ b/README.md @@ -12,10 +12,10 @@ Lua has a relatively small instruction set (only 38 different opcodes!). This ma ```sh > cat example.lua && luac5.1 -o example.luac example.lua -i = 0 +local i, x = 0, 2 while i < 10 do - print(i) + print(i + x) i = i + 1 end > python main.py example.luac @@ -23,34 +23,32 @@ example.luac ==== [[example.lua's constants]] ==== -0: [STRING] i -1: [NUMBER] 0.0 +0: [NUMBER] 0.0 +1: [NUMBER] 2.0 2: [NUMBER] 10.0 3: [STRING] print 4: [NUMBER] 1.0 ==== [[example.lua's dissassembly]] ==== -[ 0] LOADK : R[0] K[1] ; load 0.0 into R[0] -[ 1] SETGLOBAL : R[0] K[0] ; -[ 2] GETGLOBAL : R[0] K[0] ; -[ 3] LT : R[0] R[0] K[2] ; -[ 4] JMP : R[0] 7 ; -[ 5] GETGLOBAL : R[0] K[3] ; -[ 6] GETGLOBAL : R[1] K[0] ; -[ 7] CALL : R[0] 2 1 ; -[ 8] GETGLOBAL : R[0] K[0] ; -[ 9] ADD : R[0] R[0] K[4] ; -[ 10] SETGLOBAL : R[0] K[0] ; -[ 11] JMP : R[0] -10 ; -[ 12] RETURN : R[0] 1 0 ; +[ 0] LOADK : R[0] K[0] ; load 0.0 into R[0] +[ 1] LOADK : R[1] K[1] ; load 2.0 into R[1] +[ 2] LT : R[0] R[0] K[2] ; +[ 3] JMP : R[0] 5 ; +[ 4] GETGLOBAL : R[2] K[3] ; +[ 5] ADD : R[3] R[0] R[1] ; +[ 6] CALL : R[2] 2 1 ; +[ 7] ADD : R[0] R[0] K[4] ; +[ 8] JMP : R[0] -7 ; +[ 9] RETURN : R[0] 1 0 ; ==== [[example.lua's decompiled source]] ==== -i = 0.0 +local i = 0.0 +local x = 2.0 while i < 10.0 do - print(i) + print((i + x)) i = (i + 1.0) end diff --git a/lparser.py b/lparser.py index bd03f31..418047d 100644 --- a/lparser.py +++ b/lparser.py @@ -8,6 +8,7 @@ from operator import concat from subprocess import call +from xmlrpc.client import Boolean from lundump import Chunk, Constant, Instruction, Opcodes, whichRK, readRKasK class _Scope: @@ -15,6 +16,12 @@ class _Scope: self.startPC = startPC self.endPC = endPC +class _Traceback: + def __init__(self): + self.sets = [] + self.uses = [] + self.isConst = False + class LuaDecomp: def __init__(self, chunk: Chunk): self.chunk = chunk @@ -22,13 +29,16 @@ class LuaDecomp: self.scope = [] self.top = {} self.locals = {} + self.traceback = {} self.unknownLocalCount = 0 self.src: str = "" # configurations! - self.aggressiveLocals = False # should *EVERY* accessed register be considered a local? + self.aggressiveLocals = False # should *EVERY* set register be considered a local? self.indexWidth = 4 # how many spaces for indentions? + self.__loadLocals() + # parse instructions while self.pc < len(self.chunk.instructions): self.parseInstr() @@ -54,6 +64,33 @@ class LuaDecomp: def __getCurrInstr(self) -> Instruction: return self.__getInstrAtPC(self.pc) + # when we read from a register, call this + def __addUseTraceback(self, reg: int) -> None: + if not self.pc in self.traceback: + self.traceback[self.pc] = _Traceback() + + self.traceback[self.pc].uses.append(reg) + + # when we write from a register, call this + def __addSetTraceback(self, reg: int) -> None: + if not self.pc in self.traceback: + self.traceback[self.pc] = _Traceback() + + self.traceback[self.pc].sets.append(reg) + + # walks traceback, if local wasn't set before, the local needs to be defined + def __needsDefined(self, reg) -> Boolean: + for _, trace in self.traceback.items(): + if reg in trace.sets: + return False + + # wasn't set in traceback! needs defined! + return True + + def __loadLocals(self): + for i in range(len(self.chunk.locals)): + self.locals[i] = self.chunk.locals[i].name + def __addExpr(self, code: str) -> None: self.src += code @@ -61,22 +98,34 @@ class LuaDecomp: self.src += '\n' + (' ' * self.indexWidth * len(self.scope)) def __getReg(self, indx: int) -> str: + self.__addUseTraceback(indx) + # if the top indx is a local, get it - return self.locals[indx] if indx in self.locals else self.top[indx] + return self.locals[indx] if indx in self.locals else self.top[indx] def __setReg(self, indx: int, code: str) -> None: # if the top indx is a local, set it if indx in self.locals: - self.__startStatement() - self.__addExpr(self.locals[indx] + " = " + code) + if self.__needsDefined(indx): + self.__newLocal(indx, code) + else: + self.__startStatement() + self.__addExpr(self.locals[indx] + " = " + code) elif self.aggressiveLocals: # 'every register is a local!!' self.__newLocal(indx, code) + + self.__addSetTraceback(indx) self.top[indx] = code # ========================================[[ Locals ]]========================================= def __makeLocalIdentifier(self, indx: int) -> str: + # first, check if we have a local name already determined + if indx in self.locals: + return self.locals[indx] + + # otherwise, generate a local self.locals[indx] = "__unknLocal%d" % self.unknownLocalCount self.unknownLocalCount += 1