mirror of
				https://github.com/CPunch/LuaDecompy.git
				synced 2025-02-03 05:50:08 +00:00 
			
		
		
		
	Compare commits
	
		
			10 Commits
		
	
	
		
			ac0b7039d2
			...
			368ff62538
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 368ff62538 | |||
| a6623c8953 | |||
| 0f72e71a59 | |||
| b8bf02f7d0 | |||
| 95ca3bb26b | |||
| 78e137d033 | |||
| 875e91636b | |||
| 055af56e27 | |||
| eb1d3ffe87 | |||
| 2258888956 | 
							
								
								
									
										56
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										56
									
								
								README.md
									
									
									
									
									
								
							| @@ -2,48 +2,54 @@ | ||||
|  | ||||
| An experimental Lua 5.1 dump decompiler (typically dumped using `luac -o <out.luac> <script.lua>`). | ||||
|  | ||||
| You will quickly find that only **extremely** simple scripts are decompiled successfully right now. This is an experimental project and not all opcodes are properly handled for now. If you need a real decompiler I would recommend any of the handful of ones that exist already. | ||||
|  | ||||
| ## Why? | ||||
|  | ||||
| Lua has a relatively small instruction set (only 38 different opcodes!). This makes it pretty feasible for a weekend decompiler project. (real) Decompilers are extremely complex pieces of software, so being able to write a simpler one helps show the theory without *much* of the headache. | ||||
|  | ||||
| ## Example usage | ||||
|  | ||||
| ```sh | ||||
| > cat example.lua && luac5.1 -o example.luac example.lua | ||||
| pp = "pri" .. "nt" | ||||
| local i, x = 0, 2 | ||||
|  | ||||
| if 2 + 2 == 4 then | ||||
|     _G[pp]("Hello world") | ||||
| while i < 10 do | ||||
|     print(i + x) | ||||
|     i = i + 1 | ||||
| end | ||||
|  | ||||
| > python main.py example.luac | ||||
| example.luac | ||||
|  | ||||
| ==== [[example.lua's constants]] ==== | ||||
|  | ||||
| 0: [STRING] pp | ||||
| 1: [STRING] pri | ||||
| 2: [STRING] nt | ||||
| 3: [NUMBER] 4.0 | ||||
| 4: [STRING] _G | ||||
| 5: [STRING] Hello world | ||||
| 0: [NUMBER] 0.0 | ||||
| 1: [NUMBER] 2.0 | ||||
| 2: [NUMBER] 10.0 | ||||
| 3: [STRING] print | ||||
| 4: [NUMBER] 1.0 | ||||
|  | ||||
| ==== [[example.lua's dissassembly]] ==== | ||||
|  | ||||
| [  0]      LOADK : R[0] K[1] | ||||
| [  1]      LOADK : R[1] K[2] | ||||
| [  2]     CONCAT : R[0] R[0] R[1] | ||||
| [  3]  SETGLOBAL : R[0] R[0] | ||||
| [  4]         EQ : R[0] K[3] K[3] | ||||
| [  5]        JMP : R[0] R[5] | ||||
| [  6]  GETGLOBAL : R[0] K[4] | ||||
| [  7]  GETGLOBAL : R[1] K[0] | ||||
| [  8]   GETTABLE : R[0] R[0] R[1] | ||||
| [  9]      LOADK : R[1] K[5] | ||||
| [ 10]       CALL : R[0] R[2] R[1] | ||||
| [ 11]     RETURN : R[0] R[1] R[0] | ||||
| [  0]      LOADK :   R[0]   K[0]               ; load 0.0 into R[0] | ||||
| [  1]      LOADK :   R[1]   K[1]               ; load 2.0 into R[1] | ||||
| [  2]         LT :   R[0]   R[0]   K[2]        ;  | ||||
| [  3]        JMP :   R[0]      5               ;  | ||||
| [  4]  GETGLOBAL :   R[2]   K[3]               ;  | ||||
| [  5]        ADD :   R[3]   R[0]   R[1]        ;  | ||||
| [  6]       CALL :   R[2]      2      1        ;  | ||||
| [  7]        ADD :   R[0]   R[0]   K[4]        ;  | ||||
| [  8]        JMP :   R[0]     -7               ;  | ||||
| [  9]     RETURN :   R[0]      1      0        ;  | ||||
|  | ||||
| ==== [[example.lua's decompiled source]] ==== | ||||
|  | ||||
|  | ||||
| pp = "pri" .. "nt" | ||||
| if 4.0 == 4.0 then  | ||||
|     _G[pp]("Hello world") | ||||
| local i = 0.0 | ||||
| local x = 2.0 | ||||
| while i < 10.0 do  | ||||
|     print((i + x)) | ||||
|     i = (i + 1.0) | ||||
| end | ||||
|  | ||||
| ``` | ||||
							
								
								
									
										179
									
								
								lparser.py
									
									
									
									
									
								
							
							
						
						
									
										179
									
								
								lparser.py
									
									
									
									
									
								
							| @@ -1,20 +1,27 @@ | ||||
| ''' | ||||
|     lparser.py | ||||
|  | ||||
|     Depends on ldump.py for lua dump deserialization. | ||||
|     Depends on lundump.py for lua dump deserialization. | ||||
|  | ||||
|     An experimental bytecode decompiler. | ||||
| ''' | ||||
|  | ||||
| from operator import concat | ||||
| from subprocess import call | ||||
| from lundump import Chunk, LuaUndump, Constant, Instruction, InstructionType, Opcodes | ||||
| from xmlrpc.client import Boolean | ||||
| from lundump import Chunk, Constant, Instruction, Opcodes, whichRK, readRKasK | ||||
|  | ||||
| class _Scope: | ||||
|     def __init__(self, startPC: int, endPC: int): | ||||
|         self.startPC = startPC | ||||
|         self.endPC = endPC | ||||
|  | ||||
| class _Traceback: | ||||
|     def __init__(self): | ||||
|         self.sets = [] | ||||
|         self.uses = [] | ||||
|         self.isConst = False | ||||
|  | ||||
| class LuaDecomp: | ||||
|     def __init__(self, chunk: Chunk): | ||||
|         self.chunk = chunk | ||||
| @@ -22,16 +29,19 @@ class LuaDecomp: | ||||
|         self.scope = [] | ||||
|         self.top = {} | ||||
|         self.locals = {} | ||||
|         self.traceback = {} | ||||
|         self.unknownLocalCount = 0 | ||||
|         self.src: str = "" | ||||
|  | ||||
|         # configurations! | ||||
|         self.aggressiveLocals = False # should *EVERY* accessed register be considered a local?  | ||||
|         self.aggressiveLocals = False # should *EVERY* set register be considered a local?  | ||||
|         self.indexWidth = 4 # how many spaces for indentions? | ||||
|  | ||||
|         self.__loadLocals() | ||||
|  | ||||
|         # parse instructions | ||||
|         while self.pc < len(self.chunk.instructions): | ||||
|             self.parseExpr() | ||||
|             self.parseInstr() | ||||
|             self.pc += 1 | ||||
|  | ||||
|             # end the scope (if we're supposed too) | ||||
| @@ -40,7 +50,85 @@ class LuaDecomp: | ||||
|         print("\n==== [[" + str(self.chunk.name) + "'s decompiled source]] ====\n") | ||||
|         print(self.src) | ||||
|  | ||||
|     # =======================================[[ Helpers ]]========================================= | ||||
|  | ||||
|     def __getInstrAtPC(self, pc: int) -> Instruction: | ||||
|         if pc < len(self.chunk.instructions): | ||||
|             return self.chunk.instructions[pc] | ||||
|  | ||||
|         raise Exception("Decompilation failed!") | ||||
|  | ||||
|     def __getNextInstr(self) -> Instruction: | ||||
|         return self.__getInstrAtPC(self.pc + 1) | ||||
|  | ||||
|     def __getCurrInstr(self) -> Instruction: | ||||
|         return self.__getInstrAtPC(self.pc) | ||||
|  | ||||
|     # when we read from a register, call this | ||||
|     def __addUseTraceback(self, reg: int) -> None: | ||||
|         if not self.pc in self.traceback: | ||||
|             self.traceback[self.pc] = _Traceback() | ||||
|  | ||||
|         self.traceback[self.pc].uses.append(reg) | ||||
|  | ||||
|     # when we write from a register, call this | ||||
|     def __addSetTraceback(self, reg: int) -> None: | ||||
|         if not self.pc in self.traceback: | ||||
|             self.traceback[self.pc] = _Traceback() | ||||
|  | ||||
|         self.traceback[self.pc].sets.append(reg) | ||||
|  | ||||
|     # walks traceback, if local wasn't set before, the local needs to be defined | ||||
|     def __needsDefined(self, reg) -> Boolean: | ||||
|         for _, trace in self.traceback.items(): | ||||
|             if reg in trace.sets: | ||||
|                 return False | ||||
|  | ||||
|         # wasn't set in traceback! needs defined! | ||||
|         return True | ||||
|  | ||||
|     def __loadLocals(self): | ||||
|         for i in range(len(self.chunk.locals)): | ||||
|             if not self.chunk.locals[i].name == "": | ||||
|                 self.locals[i] = self.chunk.locals[i].name  | ||||
|             else: | ||||
|                 self.__makeLocalIdentifier(i) | ||||
|  | ||||
|     def __addExpr(self, code: str) -> None: | ||||
|         self.src += code | ||||
|  | ||||
|     def __startStatement(self): | ||||
|         self.src += '\n' + (' ' * self.indexWidth * len(self.scope)) | ||||
|  | ||||
|     def __getReg(self, indx: int) -> str: | ||||
|         self.__addUseTraceback(indx) | ||||
|  | ||||
|         # if the top indx is a local, get it | ||||
|         return self.locals[indx] if indx in self.locals else self.top[indx] | ||||
|  | ||||
|     def __setReg(self, indx: int, code: str) -> None: | ||||
|         # if the top indx is a local, set it | ||||
|         if indx in self.locals: | ||||
|             if self.__needsDefined(indx): | ||||
|                 self.__newLocal(indx, code) | ||||
|             else: | ||||
|                 self.__startStatement() | ||||
|                 self.__addExpr(self.locals[indx] + " = " + code) | ||||
|         elif self.aggressiveLocals: # 'every register is a local!!' | ||||
|             self.__newLocal(indx, code) | ||||
|  | ||||
|  | ||||
|         self.__addSetTraceback(indx) | ||||
|         self.top[indx] = code | ||||
|  | ||||
|     # ========================================[[ Locals ]]========================================= | ||||
|  | ||||
|     def __makeLocalIdentifier(self, indx: int) -> str: | ||||
|         # first, check if we have a local name already determined | ||||
|         if indx in self.locals: | ||||
|             return self.locals[indx] | ||||
|  | ||||
|         # otherwise, generate a local | ||||
|         self.locals[indx] = "__unknLocal%d" % self.unknownLocalCount | ||||
|         self.unknownLocalCount += 1 | ||||
|  | ||||
| @@ -53,34 +141,7 @@ class LuaDecomp: | ||||
|         self.__startStatement() | ||||
|         self.__addExpr("local " + self.locals[indx] + " = " + expr) | ||||
|  | ||||
|     def __getNextInstr(self) -> Instruction: | ||||
|         if self.pc + 1 < len(self.chunk.instructions): | ||||
|             return self.chunk.instructions[self.pc + 1] | ||||
|  | ||||
|         return None | ||||
|  | ||||
|     def __getCurrInstr(self) -> Instruction: | ||||
|         return self.chunk.instructions[self.pc] | ||||
|  | ||||
|     def __addExpr(self, code: str) -> None: | ||||
|         self.src += code | ||||
|  | ||||
|     def __startStatement(self): | ||||
|         self.src += '\n' + (' ' * self.indexWidth * len(self.scope)) | ||||
|  | ||||
|     def __getReg(self, indx: int) -> str: | ||||
|         # if the top indx is a local, get it | ||||
|         return self.locals[indx] if indx in self.locals else  self.top[indx] | ||||
|  | ||||
|     def __setReg(self, indx: int, code: str) -> None: | ||||
|         # if the top indx is a local, set it | ||||
|         if indx in self.locals: | ||||
|             self.__startStatement() | ||||
|             self.__addExpr(self.locals[indx] + " = " + code) | ||||
|         elif self.aggressiveLocals: # 'every register is a local!!' | ||||
|             self.__newLocal(indx, code) | ||||
|  | ||||
|         self.top[indx] = code | ||||
|     # ========================================[[ Scopes ]]========================================= | ||||
|  | ||||
|     def __startScope(self, scopeType: str, size: int) -> None: | ||||
|         self.__addExpr(scopeType) | ||||
| @@ -99,17 +160,43 @@ class LuaDecomp: | ||||
|         self.__startStatement() | ||||
|         self.__addExpr("end") | ||||
|  | ||||
|     # =====================================[[ Instructions ]]====================================== | ||||
|  | ||||
|     def __emitOperand(self, a: int, b: str, c: str, op: str) -> None: | ||||
|         self.__setReg(a, "(" + b + op + c + ")") | ||||
|  | ||||
|     def __compJmp(self, op: str): | ||||
|         instr = self.__getCurrInstr() | ||||
|         jmpType = "if" | ||||
|         scopeStart = "then" | ||||
|  | ||||
|         # we need to check if the jmp location has a jump back (if so, it's a while loop) | ||||
|         jmp = self.__getNextInstr().B + 1 | ||||
|         jmpToInstr = self.__getInstrAtPC(self.pc + jmp) | ||||
|  | ||||
|         if jmpToInstr.opcode == Opcodes.JMP: | ||||
|             # if this jump jumps back to this compJmp, it's a loop! | ||||
|             if self.pc + jmp + jmpToInstr.B <= self.pc + 1: | ||||
|                 jmpType = "while" | ||||
|                 scopeStart = "do" | ||||
|  | ||||
|         self.__startStatement() | ||||
|         if instr.A > 0: | ||||
|             self.__addExpr("%s not " % jmpType) | ||||
|         else: | ||||
|             self.__addExpr("%s " % jmpType) | ||||
|         self.__addExpr(self.__readRK(instr.B) + op + self.__readRK(instr.C) + " ") | ||||
|         self.__startScope("%s " % scopeStart, jmp) | ||||
|         self.pc += 1 # skip next instr | ||||
|  | ||||
|     # 'RK's are special in because can be a register or a konstant. a bitflag is read to determine which | ||||
|     def __readRK(self, rk: int) -> str: | ||||
|         if (rk & (1 << 8)) > 0: | ||||
|             return self.chunk.constants[(rk & ~(1 << 8))].toCode() | ||||
|         if (whichRK(rk)) > 0: | ||||
|             return self.chunk.getConstant(readRKasK(rk)).toCode() | ||||
|         else: | ||||
|             return self.__getReg(rk) | ||||
|  | ||||
|     def parseExpr(self): | ||||
|     def parseInstr(self): | ||||
|         instr = self.__getCurrInstr() | ||||
|  | ||||
|         # python, add switch statements *please* | ||||
| @@ -117,19 +204,19 @@ class LuaDecomp: | ||||
|             # move registers | ||||
|             self.__setReg(instr.A, self.__getReg(instr.B)) | ||||
|         elif instr.opcode == Opcodes.LOADK: | ||||
|             self.__setReg(instr.A, self.chunk.constants[instr.B].toCode()) | ||||
|             self.__setReg(instr.A, self.chunk.getConstant(instr.B).toCode()) | ||||
|         elif instr.opcode == Opcodes.LOADBOOL: | ||||
|             if instr.B == 0: | ||||
|                 self.__setReg(instr.A, "false") | ||||
|             else: | ||||
|                 self.__setReg(instr.A, "true") | ||||
|         elif instr.opcode == Opcodes.GETGLOBAL: | ||||
|             self.__setReg(instr.A, self.chunk.constants[instr.B].data) | ||||
|             self.__setReg(instr.A, self.chunk.getConstant(instr.B).data) | ||||
|         elif instr.opcode == Opcodes.GETTABLE: | ||||
|             self.__setReg(instr.A, self.__getReg(instr.B) + "[" + self.__readRK(instr.C) + "]") | ||||
|         elif instr.opcode == Opcodes.SETGLOBAL: | ||||
|             self.__startStatement() | ||||
|             self.__addExpr(self.chunk.constants[instr.B].data + " = " + self.__getReg(instr.A)) | ||||
|             self.__addExpr(self.chunk.getConstant(instr.B).data + " = " + self.__getReg(instr.A)) | ||||
|         elif instr.opcode == Opcodes.SETTABLE: | ||||
|             self.__startStatement() | ||||
|             self.__addExpr(self.__getReg(instr.A) + "[" + self.__readRK(instr.B) + "] = " + self.__readRK(instr.C)) | ||||
| @@ -148,7 +235,7 @@ class LuaDecomp: | ||||
|         elif instr.opcode == Opcodes.UNM: | ||||
|             self.__setReg(instr.A, "-" + self.__getReg(instr.B)) | ||||
|         elif instr.opcode == Opcodes.NOT: | ||||
|             self.__setReg(instr.A, "!" + self.__getReg(instr.B)) | ||||
|             self.__setReg(instr.A, "not " + self.__getReg(instr.B)) | ||||
|         elif instr.opcode == Opcodes.LEN: | ||||
|             self.__setReg(instr.A, "#" + self.__getCurrInstr(instr.B)) | ||||
|         elif instr.opcode == Opcodes.CONCAT: | ||||
| @@ -163,19 +250,11 @@ class LuaDecomp: | ||||
|         elif instr.opcode == Opcodes.JMP: | ||||
|             pass | ||||
|         elif instr.opcode == Opcodes.EQ: | ||||
|             self.__startStatement() | ||||
|             if instr.A > 0: | ||||
|                 self.__addExpr("if not ") | ||||
|             else: | ||||
|                 self.__addExpr("if ") | ||||
|             self.__addExpr(self.__readRK(instr.B) + " == " + self.__readRK(instr.C) + " ") | ||||
|             self.__startScope("then ", self.__getNextInstr().B + 1) | ||||
|  | ||||
|             self.pc += 1 # skip next instr | ||||
|             self.__compJmp(" == ") | ||||
|         elif instr.opcode == Opcodes.LT: | ||||
|             self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " < ") | ||||
|             self.__compJmp(" < ") | ||||
|         elif instr.opcode == Opcodes.LE: | ||||
|             self.__emitOperand(instr.A, instr.B, instr.C, " <= ") | ||||
|             self.__compJmp(" <= ") | ||||
|         elif instr.opcode == Opcodes.CALL: | ||||
|             preStr = "" | ||||
|             callStr = "" | ||||
|   | ||||
							
								
								
									
										48
									
								
								lundump.py
									
									
									
									
									
								
							
							
						
						
									
										48
									
								
								lundump.py
									
									
									
									
									
								
							| @@ -68,7 +68,15 @@ class ConstType(IntEnum): | ||||
|  | ||||
| _RKBCInstr = [Opcodes.SETTABLE, Opcodes.ADD, Opcodes.SUB, Opcodes.MUL, Opcodes.DIV, Opcodes.MOD, Opcodes.POW, Opcodes.EQ, Opcodes.LT] | ||||
| _RKCInstr = [Opcodes.GETTABLE, Opcodes.SELF] | ||||
| _KBx = [Opcodes.LOADK, Opcodes.GETGLOBAL] | ||||
| _KBx = [Opcodes.LOADK, Opcodes.GETGLOBAL, Opcodes.SETGLOBAL] | ||||
|  | ||||
| # is an 'RK' value a K? (result is true for K, false for R) | ||||
| def whichRK(rk: int): | ||||
|     return (rk & (1 << 8)) > 0 | ||||
|  | ||||
| # read an RK as a K | ||||
| def readRKasK(rk: int): | ||||
|     return (rk & ~(1 << 8)) | ||||
|  | ||||
| class Instruction: | ||||
|     def __init__(self, type: InstructionType, name: str) -> None: | ||||
| @@ -80,9 +88,9 @@ class Instruction: | ||||
|         self.C: int = None | ||||
|  | ||||
|     # 'RK's are special in because can be a register or a konstant. a bitflag is read to determine which | ||||
|     def __readRK(self, rk: int) -> str: | ||||
|         if (rk & (1 << 8)) > 0: | ||||
|             return "K[" + str((rk & ~(1 << 8))) + "]" | ||||
|     def __formatRK(self, rk: int) -> str: | ||||
|         if whichRK(rk): | ||||
|             return "K[" + str(readRKasK(rk)) + "]" | ||||
|         else: | ||||
|             return "R[" + str(rk) + "]" | ||||
|  | ||||
| @@ -93,28 +101,39 @@ class Instruction: | ||||
|         if self.type == InstructionType.ABC: | ||||
|             # by default, treat them as registers | ||||
|             A = "R[%d]" % self.A | ||||
|             B = "R[%d]" % self.B | ||||
|             C = "R[%d]" % self.C | ||||
|             B = "%d" % self.B | ||||
|             C = "%d" % self.C | ||||
|  | ||||
|             # these opcodes have RKs for B & C | ||||
|             if self.opcode in _RKBCInstr: | ||||
|                 B = self.__readRK(self.B) | ||||
|                 C = self.__readRK(self.C) | ||||
|                 B = self.__formatRK(self.B) | ||||
|                 C = self.__formatRK(self.C) | ||||
|             elif self.opcode in _RKCInstr: # just for C | ||||
|                 C = self.__readRK(self.C) | ||||
|                 C = self.__formatRK(self.C) | ||||
|  | ||||
|             regs = "%s %s %s" % (A, B, C)  | ||||
|             regs = "%6s %6s %6s" % (A, B, C)  | ||||
|         elif self.type == InstructionType.ABx or self.type == InstructionType.AsBx: | ||||
|             A = "R[%d]" % self.A | ||||
|             B = "R[%d]" % self.B | ||||
|             B = "%d" % self.B | ||||
|  | ||||
|             if self.opcode in _KBx: | ||||
|                 B = "K[%d]" % self.B | ||||
|  | ||||
|             regs = "%s %s" % (A, B) | ||||
|             regs = "%6s %6s" % (A, B) | ||||
|  | ||||
|         return "%s : %s" % (instr, regs) | ||||
|  | ||||
|     def getAnnotation(self, chunk): | ||||
|         if self.opcode == Opcodes.MOVE: | ||||
|             return "move R[%d] into R[%d]" % (self.B, self.A) | ||||
|         elif self.opcode == Opcodes.LOADK: | ||||
|             return "load %s into R[%d]" % (chunk.getConstant(self.B).toCode(), self.A) | ||||
|         elif self.opcode == Opcodes.CONCAT: | ||||
|             count = self.C - self.B + 1 | ||||
|             return "concat %d values from R[%d] to R[%d], store into R[%d]" % (count, self.B, self.C, self.A) | ||||
|         else: | ||||
|             return "" | ||||
|  | ||||
| class Constant: | ||||
|     def __init__(self, type: ConstType, data) -> None: | ||||
|         self.type = type | ||||
| @@ -180,6 +199,9 @@ class Chunk: | ||||
|         # there's no local information (may have been stripped) | ||||
|         return None | ||||
|  | ||||
|     def getConstant(self, indx: int) -> Constant: | ||||
|         return self.constants[indx] | ||||
|  | ||||
|     def print(self): | ||||
|         print("\n==== [[" + str(self.name) + "'s constants]] ====\n") | ||||
|         for z in range(len(self.constants)): | ||||
| @@ -188,7 +210,7 @@ class Chunk: | ||||
|  | ||||
|         print("\n==== [[" + str(self.name) + "'s dissassembly]] ====\n") | ||||
|         for i in range(len(self.instructions)): | ||||
|             print("[%3d] %s" % (i, self.instructions[i].toString())) | ||||
|             print("[%3d] %-40s ; %s" % (i, self.instructions[i].toString(), self.instructions[i].getAnnotation(self))) | ||||
|  | ||||
|         if len(self.protos) > 0: | ||||
|             print("\n==== [[" + str(self.name) + "'s protos]] ====\n") | ||||
|   | ||||
		Reference in New Issue
	
	Block a user