mirror of
				https://github.com/CPunch/LuaDecompy.git
				synced 2025-02-03 05:50:08 +00:00 
			
		
		
		
	Compare commits
	
		
			11 Commits
		
	
	
		
			9da0d0ffbd
			...
			main
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| df8e9f7e83 | |||
| a22aa808e0 | |||
| 935844f274 | |||
| c37e9a21d8 | |||
| 34b1ec7285 | |||
| f9f1d4af00 | |||
| 3be45f156a | |||
| b28edcba1d | |||
| bc4e762e26 | |||
| 19bed999ee | |||
| a248cc4807 | 
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -1,2 +1,3 @@ | ||||
| example.* | ||||
| __pycache__ | ||||
| NOTES.md | ||||
|   | ||||
							
								
								
									
										102
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										102
									
								
								README.md
									
									
									
									
									
								
							| @@ -12,51 +12,95 @@ Lua has a relatively small instruction set (only 38 different opcodes!). This ma | ||||
|  | ||||
| ```sh | ||||
| > cat example.lua && luac5.1 -o example.luac example.lua | ||||
| local total = 0 | ||||
| local printMsg = function(append) | ||||
|     local tbl = {"He", "llo", " ", "Wo"} | ||||
|     local str = "" | ||||
|  | ||||
| for i = 0, 9, 1 do | ||||
|     total = total + i | ||||
|     print(total) | ||||
|     for i = 1, #tbl do | ||||
|         str = str .. tbl[i] | ||||
|     end | ||||
|  | ||||
|     print(str .. append) | ||||
| end | ||||
|  | ||||
| printMsg("rld!") | ||||
| > python main.py example.luac | ||||
| example.luac | ||||
|  | ||||
| ==== [[example.lua's constants]] ==== | ||||
|  | ||||
| 0: [NUMBER] 0.0 | ||||
| 1: [NUMBER] 9.0 | ||||
| 2: [NUMBER] 1.0 | ||||
| 3: [STRING] print | ||||
| 0: [STRING] rld! | ||||
|  | ||||
| ==== [[example.lua's locals]] ==== | ||||
|  | ||||
| R[0]: total | ||||
| R[1]: (for index) | ||||
| R[2]: (for limit) | ||||
| R[3]: (for step) | ||||
| R[4]: i | ||||
| R[0]: printMsg | ||||
|  | ||||
| ==== [[example.lua's dissassembly]] ==== | ||||
|  | ||||
| [  0]      LOADK :   R[0]   K[0]               ; load 0.0 into R[0] | ||||
| [  1]      LOADK :   R[1]   K[0]               ; load 0.0 into R[1] | ||||
| [  2]      LOADK :   R[2]   K[1]               ; load 9.0 into R[2] | ||||
| [  3]      LOADK :   R[3]   K[2]               ; load 1.0 into R[3] | ||||
| [  4]    FORPREP :   R[1]      4               ;  | ||||
| [  5]        ADD :   R[0]   R[0]   R[4]        ; add R[4] to R[0], place into R[0] | ||||
| [  6]  GETGLOBAL :   R[5]   K[3]               ; move _G["print"] into R[5] | ||||
| [  7]       MOVE :      6      0      0        ; move R[0] into R[6] | ||||
| [  8]       CALL :      5      2      1        ;  | ||||
| [  9]    FORLOOP :   R[1]     -5               ;  | ||||
| [ 10]     RETURN :      0      1      0        ;  | ||||
| [  0]    CLOSURE :   R[0]      0               ;  | ||||
| [  1]       MOVE :      1      0      0        ; move R[0] into R[1] | ||||
| [  2]      LOADK :   R[2]   K[0]               ; load "rld!" into R[2] | ||||
| [  3]       CALL :      1      2      1        ;  | ||||
| [  4]     RETURN :      0      1      0        ;  | ||||
|  | ||||
| ==== [[example.lua's decompiled source]] ==== | ||||
| ==== [[example.lua's protos]] ==== | ||||
|  | ||||
| local total = 0.0 | ||||
| for i = 0.0, 9.0, 1.0 do | ||||
|     total = (total + i) | ||||
|     print(total) | ||||
|  | ||||
| ==== [['s constants]] ==== | ||||
|  | ||||
| 0: [STRING] He | ||||
| 1: [STRING] llo | ||||
| 2: [STRING]   | ||||
| 3: [STRING] Wo | ||||
| 4: [STRING]  | ||||
| 5: [NUMBER] 1.0 | ||||
| 6: [STRING] print | ||||
|  | ||||
| ==== [['s locals]] ==== | ||||
|  | ||||
| R[0]: append | ||||
| R[1]: tbl | ||||
| R[2]: str | ||||
| R[3]: (for index) | ||||
| R[4]: (for limit) | ||||
| R[5]: (for step) | ||||
| R[6]: i | ||||
|  | ||||
| ==== [['s dissassembly]] ==== | ||||
|  | ||||
| [  0]   NEWTABLE :      1      4      0        ;  | ||||
| [  1]      LOADK :   R[2]   K[0]               ; load "He" into R[2] | ||||
| [  2]      LOADK :   R[3]   K[1]               ; load "llo" into R[3] | ||||
| [  3]      LOADK :   R[4]   K[2]               ; load " " into R[4] | ||||
| [  4]      LOADK :   R[5]   K[3]               ; load "Wo" into R[5] | ||||
| [  5]    SETLIST :      1      4      1        ;  | ||||
| [  6]      LOADK :   R[2]   K[4]               ; load "" into R[2] | ||||
| [  7]      LOADK :   R[3]   K[5]               ; load 1 into R[3] | ||||
| [  8]        LEN :      4      1      0        ;  | ||||
| [  9]      LOADK :   R[5]   K[5]               ; load 1 into R[5] | ||||
| [ 10]    FORPREP :   R[3]      3               ;  | ||||
| [ 11]       MOVE :      7      2      0        ; move R[2] into R[7] | ||||
| [ 12]   GETTABLE :   R[8]      1   R[6]        ;  | ||||
| [ 13]     CONCAT :      2      7      8        ; concat 2 values from R[7] to R[8], store into R[2] | ||||
| [ 14]    FORLOOP :   R[3]     -4               ;  | ||||
| [ 15]  GETGLOBAL :   R[3]   K[6]               ; move _G["print"] into R[3] | ||||
| [ 16]       MOVE :      4      2      0        ; move R[2] into R[4] | ||||
| [ 17]       MOVE :      5      0      0        ; move R[0] into R[5] | ||||
| [ 18]     CONCAT :      4      4      5        ; concat 2 values from R[4] to R[5], store into R[4] | ||||
| [ 19]       CALL :      3      2      1        ;  | ||||
| [ 20]     RETURN :      0      1      0        ;  | ||||
|  | ||||
| ==== [[example.lua's pseudo-code]] ==== | ||||
|  | ||||
| local printMsg = function(append) | ||||
|     local tbl = {"He", "llo", " ", "Wo", } | ||||
|     local str = "" | ||||
|     for i = 1, #tbl, 1 do | ||||
|         str = str .. tbl[i] | ||||
|     end | ||||
|     print(str .. append) | ||||
| end | ||||
|  | ||||
| printMsg("rld!") | ||||
|  | ||||
| ``` | ||||
							
								
								
									
										299
									
								
								lparser.py
									
									
									
									
									
								
							
							
						
						
									
										299
									
								
								lparser.py
									
									
									
									
									
								
							| @@ -6,9 +6,6 @@ | ||||
|     An experimental bytecode decompiler. | ||||
| ''' | ||||
|  | ||||
| from operator import concat | ||||
| from subprocess import call | ||||
| from xmlrpc.client import Boolean | ||||
| from lundump import Chunk, Constant, Instruction, Opcodes, whichRK, readRKasK | ||||
|  | ||||
| class _Scope: | ||||
| @@ -30,14 +27,19 @@ class _Line: | ||||
|         self.scope = scope | ||||
|  | ||||
| def isValidLocal(ident: str) -> bool: | ||||
|     for c in ident: | ||||
|         if c not in "abcdefghijklmnopqrstuvwxyz1234567890_": | ||||
|     # has to start with an alpha or _ | ||||
|     if ident[0] not in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_": | ||||
|         return False | ||||
|  | ||||
|     # then it can be alphanum or _ | ||||
|     for c in ident[1:]: | ||||
|         if c not in "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ1234567890_": | ||||
|             return False | ||||
|  | ||||
|     return True | ||||
|  | ||||
| class LuaDecomp: | ||||
|     def __init__(self, chunk: Chunk): | ||||
|     def __init__(self, chunk: Chunk, headChunk: bool = True, scopeOffset: int = 0): | ||||
|         self.chunk = chunk | ||||
|         self.pc = 0 | ||||
|         self.scope: list[_Scope] = [] | ||||
| @@ -46,6 +48,8 @@ class LuaDecomp: | ||||
|         self.locals = {} | ||||
|         self.traceback = {} | ||||
|         self.unknownLocalCount = 0 | ||||
|         self.headChunk = headChunk | ||||
|         self.scopeOffset = scopeOffset # number of scopes this chunk/proto is in | ||||
|         self.src: str = "" | ||||
|  | ||||
|         # configurations! | ||||
| @@ -55,6 +59,20 @@ class LuaDecomp: | ||||
|  | ||||
|         self.__loadLocals() | ||||
|  | ||||
|         if not self.headChunk: | ||||
|             functionProto = "function(" | ||||
|  | ||||
|             # define params | ||||
|             for i in range(self.chunk.numParams): | ||||
|                 # add param to function prototype (also make a local in the register if it doesn't exist) | ||||
|                 functionProto += ("%s, " if i+1 < self.chunk.numParams else "%s") % self.__makeLocalIdentifier(i) | ||||
|  | ||||
|                 # mark local as defined | ||||
|                 self.__addSetTraceback(i) | ||||
|             functionProto += ")" | ||||
|  | ||||
|             self.__startScope(functionProto, 0, len(self.chunk.instructions)) | ||||
|  | ||||
|         # parse instructions | ||||
|         while self.pc < len(self.chunk.instructions): | ||||
|             self.parseInstr() | ||||
| @@ -63,12 +81,18 @@ class LuaDecomp: | ||||
|             # end the scope (if we're supposed too) | ||||
|             self.__checkScope() | ||||
|  | ||||
|         print("\n==== [[" + str(self.chunk.name) + "'s decompiled source]] ====\n") | ||||
|         if not self.headChunk: | ||||
|             self.__endScope() | ||||
|  | ||||
|     def getPseudoCode(self) -> str: | ||||
|         fullSrc = "" | ||||
|  | ||||
|         for line in self.lines: | ||||
|             if self.annotateLines: | ||||
|                 print("-- PC: %d to PC: %d" % (line.startPC, line.endPC)) | ||||
|             print(((' ' * self.indexWidth) * line.scope) + line.src) | ||||
|                 fullSrc += "-- PC: %d to PC: %d\n" % (line.startPC, line.endPC) | ||||
|             fullSrc += ((' ' * self.indexWidth) * (line.scope + self.scopeOffset)) + line.src + "\n" | ||||
|  | ||||
|         return fullSrc | ||||
|  | ||||
|     # =======================================[[ Helpers ]]========================================= | ||||
|  | ||||
| @@ -121,7 +145,7 @@ class LuaDecomp: | ||||
|         self.src = "" | ||||
|  | ||||
|     # walks traceback, if local wasn't set before, the local needs to be defined | ||||
|     def __needsDefined(self, reg) -> Boolean: | ||||
|     def __needsDefined(self, reg) -> bool: | ||||
|         for _, trace in self.traceback.items(): | ||||
|             if reg in trace.sets: | ||||
|                 return False | ||||
| @@ -147,7 +171,7 @@ class LuaDecomp: | ||||
|         # if the top indx is a local, get it | ||||
|         return self.locals[indx] if indx in self.locals else self.top[indx] | ||||
|  | ||||
|     def __setReg(self, indx: int, code: str) -> None: | ||||
|     def __setReg(self, indx: int, code: str, forceLocal: bool = False) -> None: | ||||
|         # if the top indx is a local, set it | ||||
|         if indx in self.locals: | ||||
|             if self.__needsDefined(indx): | ||||
| @@ -155,10 +179,9 @@ class LuaDecomp: | ||||
|             else: | ||||
|                 self.__addExpr(self.locals[indx] + " = " + code) | ||||
|                 self.__endStatement() | ||||
|         elif self.aggressiveLocals: # 'every register is a local!!' | ||||
|         elif self.aggressiveLocals or forceLocal: # 'every register is a local!!' | ||||
|             self.__newLocal(indx, code) | ||||
|  | ||||
|  | ||||
|         self.__addSetTraceback(indx) | ||||
|         self.top[indx] = code | ||||
|  | ||||
| @@ -176,7 +199,6 @@ class LuaDecomp: | ||||
|         return self.locals[indx] | ||||
|  | ||||
|     def __newLocal(self, indx: int, expr: str) -> None: | ||||
|         # TODO: grab identifier from chunk(?) | ||||
|         self.__makeLocalIdentifier(indx) | ||||
|  | ||||
|         self.__addExpr("local " + self.locals[indx] + " = " + expr) | ||||
| @@ -202,12 +224,15 @@ class LuaDecomp: | ||||
|         self.__addExpr("end") | ||||
|         self.scope.pop() | ||||
|  | ||||
|         self.__endStatement() | ||||
|  | ||||
|     # =====================================[[ Instructions ]]====================================== | ||||
|  | ||||
|     def __emitOperand(self, a: int, b: str, c: str, op: str) -> None: | ||||
|         self.__setReg(a, "(" + b + op + c + ")") | ||||
|  | ||||
|     def __compJmp(self, op: str): | ||||
|     # handles conditional jumps | ||||
|     def __condJmp(self, op: str, rkBC: bool = True): | ||||
|         instr = self.__getCurrInstr() | ||||
|         jmpType = "if" | ||||
|         scopeStart = "then" | ||||
| @@ -230,7 +255,13 @@ class LuaDecomp: | ||||
|             self.__addExpr("%s not " % jmpType) | ||||
|         else: | ||||
|             self.__addExpr("%s " % jmpType) | ||||
|         self.__addExpr(self.__readRK(instr.B) + op + self.__readRK(instr.C) + " ") | ||||
|  | ||||
|         # write actual comparison | ||||
|         if rkBC: | ||||
|             self.__addExpr(self.__readRK(instr.B) + op + self.__readRK(instr.C) + " ") | ||||
|         else: # just testing rkB | ||||
|             self.__addExpr(op + self.__readRK(instr.B)) | ||||
|  | ||||
|         self.pc += 1 # skip next instr | ||||
|         if scopeStart: | ||||
|             self.__startScope("%s " % scopeStart, self.pc - 1, jmp) | ||||
| @@ -256,102 +287,160 @@ class LuaDecomp: | ||||
|         else: | ||||
|             return self.__getReg(rk) | ||||
|  | ||||
|     # walk & peak ahead NEWTABLE | ||||
|     def __parseNewTable(self, indx: int): | ||||
|         # TODO: parse SETTABLE too? | ||||
|         tblOps = [Opcodes.LOADK, Opcodes.SETLIST] | ||||
|  | ||||
|         instr = self.__getNextInstr() | ||||
|         cachedRegs = {} | ||||
|         tbl = "{" | ||||
|         while instr.opcode in tblOps: | ||||
|             if instr.opcode == Opcodes.LOADK: # operate on registers | ||||
|                 cachedRegs[instr.A] = self.chunk.getConstant(instr.B).toCode() | ||||
|             elif instr.opcode == Opcodes.SETLIST: | ||||
|                 numElems = instr.B | ||||
|  | ||||
|                 for i in range(numElems): | ||||
|                     tbl += "%s, " % cachedRegs[instr.A + i + 1] | ||||
|                     del cachedRegs[instr.A + i + 1] | ||||
|  | ||||
|             self.pc += 1 | ||||
|             instr = self.__getNextInstr() | ||||
|         tbl += "}" | ||||
|  | ||||
|         # i use forceLocal here even though i don't know *for sure* that the register is a local. | ||||
|         # this does help later though if the table is reused (which is 99% of the time). the other 1% | ||||
|         # only affects syntax and may look a little weird but is fine and equivalent non-the-less | ||||
|         self.__setReg(indx, tbl, forceLocal=True) | ||||
|         self.__endStatement() | ||||
|  | ||||
|         # if we have leftovers... oops, set those | ||||
|         for i, v in cachedRegs.items(): | ||||
|             self.__setReg(i, v) | ||||
|  | ||||
|     def parseInstr(self): | ||||
|         instr = self.__getCurrInstr() | ||||
|  | ||||
|         # python, add switch statements *please* | ||||
|         if instr.opcode == Opcodes.MOVE: # move is a fake ABC instr, C is ignored | ||||
|             # move registers | ||||
|             self.__setReg(instr.A, self.__getReg(instr.B)) | ||||
|         elif instr.opcode == Opcodes.LOADK: | ||||
|             self.__setReg(instr.A, self.chunk.getConstant(instr.B).toCode()) | ||||
|         elif instr.opcode == Opcodes.LOADBOOL: | ||||
|             if instr.B == 0: | ||||
|                 self.__setReg(instr.A, "false") | ||||
|             else: | ||||
|                 self.__setReg(instr.A, "true") | ||||
|         elif instr.opcode == Opcodes.GETGLOBAL: | ||||
|             self.__setReg(instr.A, self.chunk.getConstant(instr.B).data) | ||||
|         elif instr.opcode == Opcodes.GETTABLE: | ||||
|             self.__setReg(instr.A, self.__getReg(instr.B) + "[" + self.__readRK(instr.C) + "]") | ||||
|         elif instr.opcode == Opcodes.SETGLOBAL: | ||||
|             self.__addExpr(self.chunk.getConstant(instr.B).data + " = " + self.__getReg(instr.A)) | ||||
|             self.__endStatement() | ||||
|         elif instr.opcode == Opcodes.SETTABLE: | ||||
|             self.__addExpr(self.__getReg(instr.A) + "[" + self.__readRK(instr.B) + "] = " + self.__readRK(instr.C)) | ||||
|             self.__endStatement() | ||||
|         elif instr.opcode == Opcodes.ADD: | ||||
|             self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " + ") | ||||
|         elif instr.opcode == Opcodes.SUB: | ||||
|             self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " - ") | ||||
|         elif instr.opcode == Opcodes.MUL: | ||||
|             self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " * ") | ||||
|         elif instr.opcode == Opcodes.DIV: | ||||
|             self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " / ") | ||||
|         elif instr.opcode == Opcodes.MOD: | ||||
|             self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " % ") | ||||
|         elif instr.opcode == Opcodes.POW: | ||||
|             self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " ^ ") | ||||
|         elif instr.opcode == Opcodes.UNM: | ||||
|             self.__setReg(instr.A, "-" + self.__getReg(instr.B)) | ||||
|         elif instr.opcode == Opcodes.NOT: | ||||
|             self.__setReg(instr.A, "not " + self.__getReg(instr.B)) | ||||
|         elif instr.opcode == Opcodes.LEN: | ||||
|             self.__setReg(instr.A, "#" + self.__getCurrInstr(instr.B)) | ||||
|         elif instr.opcode == Opcodes.CONCAT: | ||||
|             count = instr.C-instr.B+1 | ||||
|             concatStr = "" | ||||
|         match instr.opcode: | ||||
|             case Opcodes.MOVE: # move is a fake ABC instr, C is ignored | ||||
|                 # move registers | ||||
|                 self.__setReg(instr.A, self.__getReg(instr.B)) | ||||
|             case Opcodes.LOADK: | ||||
|                 self.__setReg(instr.A, self.chunk.getConstant(instr.B).toCode()) | ||||
|             case Opcodes.LOADBOOL: | ||||
|                 if instr.B == 0: | ||||
|                     self.__setReg(instr.A, "false") | ||||
|                 else: | ||||
|                     self.__setReg(instr.A, "true") | ||||
|             case Opcodes.GETGLOBAL: | ||||
|                 self.__setReg(instr.A, self.chunk.getConstant(instr.B).data) | ||||
|             case Opcodes.GETTABLE: | ||||
|                 self.__setReg(instr.A, self.__getReg(instr.B) + "[" + self.__readRK(instr.C) + "]") | ||||
|             case Opcodes.SETGLOBAL: | ||||
|                 self.__addExpr(self.chunk.getConstant(instr.B).data + " = " + self.__getReg(instr.A)) | ||||
|                 self.__endStatement() | ||||
|             case Opcodes.SETTABLE: | ||||
|                 self.__addExpr(self.__getReg(instr.A) + "[" + self.__readRK(instr.B) + "] = " + self.__readRK(instr.C)) | ||||
|                 self.__endStatement() | ||||
|             case Opcodes.NEWTABLE: | ||||
|                 self.__parseNewTable(instr.A) | ||||
|             case Opcodes.ADD: | ||||
|                 self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " + ") | ||||
|             case Opcodes.SUB: | ||||
|                 self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " - ") | ||||
|             case Opcodes.MUL: | ||||
|                 self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " * ") | ||||
|             case Opcodes.DIV: | ||||
|                 self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " / ") | ||||
|             case Opcodes.MOD: | ||||
|                 self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " % ") | ||||
|             case Opcodes.POW: | ||||
|                 self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " ^ ") | ||||
|             case Opcodes.UNM: | ||||
|                 self.__setReg(instr.A, "-" + self.__getReg(instr.B)) | ||||
|             case Opcodes.NOT: | ||||
|                 self.__setReg(instr.A, "not " + self.__getReg(instr.B)) | ||||
|             case Opcodes.LEN: | ||||
|                 self.__setReg(instr.A, "#" + self.__getReg(instr.B)) | ||||
|             case Opcodes.CONCAT: | ||||
|                 count = instr.C-instr.B+1 | ||||
|                 concatStr = "" | ||||
|  | ||||
|             # concat all items on stack from RC to RB | ||||
|             for i in range(count): | ||||
|                 concatStr += self.__getReg(instr.B + i) + (" .. " if not i == count - 1 else "") | ||||
|                 # concat all items on stack from RC to RB | ||||
|                 for i in range(count): | ||||
|                     concatStr += self.__getReg(instr.B + i) + (" .. " if not i == count - 1 else "") | ||||
|  | ||||
|             self.__setReg(instr.A, concatStr) | ||||
|         elif instr.opcode == Opcodes.JMP: | ||||
|             pass | ||||
|         elif instr.opcode == Opcodes.EQ: | ||||
|             self.__compJmp(" == ") | ||||
|         elif instr.opcode == Opcodes.LT: | ||||
|             self.__compJmp(" < ") | ||||
|         elif instr.opcode == Opcodes.LE: | ||||
|             self.__compJmp(" <= ") | ||||
|         elif instr.opcode == Opcodes.CALL: | ||||
|             preStr = "" | ||||
|             callStr = "" | ||||
|             ident = "" | ||||
|                 self.__setReg(instr.A, concatStr) | ||||
|             case Opcodes.JMP: | ||||
|                 pass | ||||
|             case Opcodes.EQ: | ||||
|                 self.__condJmp(" == ") | ||||
|             case Opcodes.LT: | ||||
|                 self.__condJmp(" < ") | ||||
|             case Opcodes.LE: | ||||
|                 self.__condJmp(" <= ") | ||||
|             case Opcodes.TEST: | ||||
|                 if instr.C == 0: | ||||
|                     self.__condJmp("", False) | ||||
|                 else: | ||||
|                     self.__condJmp("not ", False) | ||||
|             case Opcodes.CALL: | ||||
|                 preStr = "" | ||||
|                 callStr = "" | ||||
|                 ident = "" | ||||
|  | ||||
|             # parse arguments | ||||
|             callStr += self.__getReg(instr.A) + "(" | ||||
|             for i in range(instr.A + 1, instr.A + instr.B): | ||||
|                 callStr += self.__getReg(i) + (", " if not i + 1 == instr.A + instr.B else "") | ||||
|             callStr += ")" | ||||
|                 # parse arguments | ||||
|                 callStr += self.__getReg(instr.A) + "(" | ||||
|                 for i in range(instr.A + 1, instr.A + instr.B): | ||||
|                     callStr += self.__getReg(i) + (", " if not i + 1 == instr.A + instr.B else "") | ||||
|                 callStr += ")" | ||||
|  | ||||
|             # parse return values | ||||
|             if instr.C > 1: | ||||
|                 preStr = "local " | ||||
|                 for indx  in range(instr.A, instr.A + instr.C - 1): | ||||
|                     if indx in self.locals: | ||||
|                         ident = self.locals[indx] | ||||
|                     else: | ||||
|                         ident = self.__makeLocalIdentifier(indx) | ||||
|                     preStr += ident | ||||
|                 # parse return values | ||||
|                 if instr.C > 1: | ||||
|                     preStr = "local " | ||||
|                     for indx  in range(instr.A, instr.A + instr.C - 1): | ||||
|                         if indx in self.locals: | ||||
|                             ident = self.locals[indx] | ||||
|                         else: | ||||
|                             ident = self.__makeLocalIdentifier(indx) | ||||
|                         preStr += ident | ||||
|  | ||||
|                     # normally setReg() does this | ||||
|                     self.top[indx] = ident | ||||
|                         # normally setReg() does this | ||||
|                         self.top[indx] = ident | ||||
|  | ||||
|                     # just so we don't have a trailing ', ' | ||||
|                     preStr += ", " if not indx == instr.A + instr.C - 2 else "" | ||||
|                 preStr += " = " | ||||
|                         # just so we don't have a trailing ', ' | ||||
|                         preStr += ", " if not indx == instr.A + instr.C - 2 else "" | ||||
|                     preStr += " = " | ||||
|  | ||||
|             self.__addExpr(preStr + callStr) | ||||
|             self.__endStatement() | ||||
|         elif instr.opcode == Opcodes.RETURN: | ||||
|             self.__endStatement() | ||||
|             pass # no-op for now | ||||
|         elif instr.opcode == Opcodes.FORLOOP: | ||||
|             pass # no-op for now | ||||
|         elif instr.opcode == Opcodes.FORPREP: | ||||
|             self.__addExpr("for %s = %s, %s, %s " % (self.__getLocal(instr.A+3), self.__getReg(instr.A), self.__getReg(instr.A + 1), self.__getReg(instr.A + 2))) | ||||
|             self.__startScope("do", self.pc, instr.B) | ||||
|         else: | ||||
|             raise Exception("unsupported instruction: %s" % instr.toString()) | ||||
|                 self.__addExpr(preStr + callStr) | ||||
|                 self.__endStatement() | ||||
|             case Opcodes.RETURN: | ||||
|                 self.__endStatement() | ||||
|                 pass # no-op for now | ||||
|             case Opcodes.FORLOOP: | ||||
|                 pass # no-op for now | ||||
|             case Opcodes.FORPREP: | ||||
|                 self.__addExpr("for %s = %s, %s, %s " % (self.__getLocal(instr.A+3), self.__getReg(instr.A), self.__getReg(instr.A + 1), self.__getReg(instr.A + 2))) | ||||
|                 self.__startScope("do", self.pc, instr.B) | ||||
|             case Opcodes.SETLIST: | ||||
|                 # LFIELDS_PER_FLUSH (50) is the number of elements that *should* have been set in the list in the *last* SETLIST | ||||
|                 # eg. | ||||
|                 # [ 49]      LOADK :  R[49]   K[1]               ; load 0.0 into R[49] | ||||
|                 # [ 50]      LOADK :  R[50]   K[1]               ; load 0.0 into R[50] | ||||
|                 # [ 51]    SETLIST :      0     50      1        ; sets list[1..50] | ||||
|                 # [ 52]      LOADK :   R[1]   K[1]               ; load 0.0 into R[1] | ||||
|                 # [ 53]    SETLIST :      0      1      2        ; sets list[51..51] | ||||
|                 numElems = instr.B | ||||
|                 startAt = ((instr.C - 1) * 50) | ||||
|                 ident = self.__getLocal(instr.A) | ||||
|  | ||||
|                 # set each index (TODO: make tables less verbose) | ||||
|                 for i in range(numElems): | ||||
|                     self.__addExpr("%s[%d] = %s" % (ident, (startAt + i + 1), self.__getReg(instr.A + i + 1))) | ||||
|                     self.__endStatement() | ||||
|             case Opcodes.CLOSURE: | ||||
|                 proto = LuaDecomp(self.chunk.protos[instr.B], headChunk=False, scopeOffset=len(self.scope)) | ||||
|                 self.__setReg(instr.A, proto.getPseudoCode()) | ||||
|             case _: | ||||
|                 raise Exception("unsupported instruction: %s" % instr.toString()) | ||||
							
								
								
									
										265
									
								
								lundump.py
									
									
									
									
									
								
							
							
						
						
									
										265
									
								
								lundump.py
									
									
									
									
									
								
							| @@ -1,7 +1,7 @@ | ||||
| ''' | ||||
|     l(un)dump.py | ||||
|  | ||||
|     A Lua5.1 cross-platform bytecode deserializer. This module pulls int and size_t sizes from the | ||||
|     A Lua5.1 cross-platform bytecode deserializer && serializer. This module pulls int and size_t sizes from the | ||||
|     chunk header, meaning it should be able to deserialize lua bytecode dumps from most platforms, | ||||
|     regardless of the host machine. | ||||
|  | ||||
| @@ -9,11 +9,9 @@ | ||||
|     as well as read the lundump.c source file from the Lua5.1 source. | ||||
| ''' | ||||
|  | ||||
| from multiprocessing.spawn import get_executable | ||||
| import struct | ||||
| import array | ||||
| from enum import IntEnum, Enum, auto | ||||
| from typing_extensions import Self | ||||
|  | ||||
| class InstructionType(Enum): | ||||
|     ABC = auto(), | ||||
| @@ -70,6 +68,8 @@ _RKBCInstr = [Opcodes.SETTABLE, Opcodes.ADD, Opcodes.SUB, Opcodes.MUL, Opcodes.D | ||||
| _RKCInstr = [Opcodes.GETTABLE, Opcodes.SELF] | ||||
| _KBx = [Opcodes.LOADK, Opcodes.GETGLOBAL, Opcodes.SETGLOBAL] | ||||
|  | ||||
| _LUAMAGIC = b'\x1bLua' | ||||
|  | ||||
| # is an 'RK' value a K? (result is true for K, false for R) | ||||
| def whichRK(rk: int): | ||||
|     return (rk & (1 << 8)) > 0 | ||||
| @@ -152,7 +152,7 @@ class Constant: | ||||
|         self.data = data | ||||
|  | ||||
|     def toString(self): | ||||
|         return "[" + self.type.name + "] " + str(self.data) | ||||
|         return "[%s] %s" % (self.type.name, str(self.data)) | ||||
|  | ||||
|     # format the constant so that it is parsable by lua | ||||
|     def toCode(self): | ||||
| @@ -164,7 +164,7 @@ class Constant: | ||||
|             else: | ||||
|                 return "false" | ||||
|         elif self.type == ConstType.NUMBER: | ||||
|             return str(self.data) | ||||
|             return "%g" % self.data | ||||
|         else: | ||||
|             return "nil" | ||||
|  | ||||
| @@ -189,6 +189,7 @@ class Chunk: | ||||
|         self.maxStack: int = 0 | ||||
|  | ||||
|         self.upvalues: list[str] = [] | ||||
|         self.lineNums: list[int] = [] | ||||
|         self.locals: list[Local] = [] | ||||
|  | ||||
|     def appendInstruction(self, instr: Instruction): | ||||
| @@ -200,9 +201,15 @@ class Chunk: | ||||
|     def appendProto(self, proto): | ||||
|         self.protos.append(proto) | ||||
|  | ||||
|     def appendLine(self, line: int): | ||||
|         self.lineNums.append(line) | ||||
|  | ||||
|     def appendLocal(self, local: Local): | ||||
|         self.locals.append(local) | ||||
|  | ||||
|     def appendUpval(self, upval: str): | ||||
|         self.upvalues.append(upval) | ||||
|  | ||||
|     def findLocal(self, pc: int) -> Local: | ||||
|         for l in self.locals: | ||||
|             if l.start <= pc and l.end >= pc: | ||||
| @@ -298,11 +305,7 @@ class LuaUndump: | ||||
|         self.rootChunk: Chunk = None | ||||
|         self.index = 0 | ||||
|  | ||||
|     @staticmethod | ||||
|     def dis_chunk(chunk: Chunk): | ||||
|         chunk.print() | ||||
|      | ||||
|     def loadBlock(self, sz) -> bytearray: | ||||
|     def _loadBlock(self, sz) -> bytearray: | ||||
|         if self.index + sz > len(self.bytecode): | ||||
|             raise Exception("Malformed bytecode!") | ||||
|  | ||||
| @@ -310,82 +313,71 @@ class LuaUndump: | ||||
|         self.index = self.index + sz | ||||
|         return temp | ||||
|  | ||||
|     def get_byte(self) -> int: | ||||
|         return self.loadBlock(1)[0] | ||||
|     def _get_byte(self) -> int: | ||||
|         return self._loadBlock(1)[0] | ||||
|  | ||||
|     def get_int32(self) -> int: | ||||
|         if (self.big_endian): | ||||
|             return int.from_bytes(self.loadBlock(4), byteorder='big', signed=False) | ||||
|         else: | ||||
|             return int.from_bytes(self.loadBlock(4), byteorder='little', signed=False) | ||||
|     def _get_uint32(self) -> int: | ||||
|         order = 'big' if self.big_endian else 'little' | ||||
|         return int.from_bytes(self._loadBlock(4), byteorder=order, signed=False) | ||||
|  | ||||
|     def get_int(self) -> int: | ||||
|         if (self.big_endian): | ||||
|             return int.from_bytes(self.loadBlock(self.int_size), byteorder='big', signed=False) | ||||
|         else: | ||||
|             return int.from_bytes(self.loadBlock(self.int_size), byteorder='little', signed=False) | ||||
|     def _get_uint(self) -> int: | ||||
|         order = 'big' if self.big_endian else 'little' | ||||
|         return int.from_bytes(self._loadBlock(self.int_size), byteorder=order, signed=False) | ||||
|  | ||||
|     def get_size_t(self) -> int: | ||||
|         if (self.big_endian): | ||||
|             return int.from_bytes(self.loadBlock(self.size_t), byteorder='big', signed=False) | ||||
|         else: | ||||
|             return int.from_bytes(self.loadBlock(self.size_t), byteorder='little', signed=False) | ||||
|     def _get_size_t(self) -> int: | ||||
|         order = 'big' if self.big_endian else 'little' | ||||
|         return int.from_bytes(self._loadBlock(self.size_t), byteorder=order, signed=False) | ||||
|  | ||||
|     def get_double(self) -> int: | ||||
|         if self.big_endian: | ||||
|             return struct.unpack('>d', self.loadBlock(8))[0] | ||||
|         else: | ||||
|             return struct.unpack('<d', self.loadBlock(8))[0] | ||||
|     def _get_double(self) -> int: | ||||
|         order = '>d' if self.big_endian else '<d' | ||||
|         return struct.unpack(order, self._loadBlock(self.l_number_size))[0] | ||||
|  | ||||
|     def get_string(self, size) -> str: | ||||
|         if (size == None): | ||||
|             size = self.get_size_t() | ||||
|             if (size == 0): | ||||
|                 return "" | ||||
|     def _get_string(self) -> str: | ||||
|         size = self._get_size_t() | ||||
|         if (size == 0): | ||||
|             return "" | ||||
|  | ||||
|         return "".join(chr(x) for x in self.loadBlock(size)) | ||||
|         # [:-1] to remove the NULL terminator | ||||
|         return ("".join(chr(x) for x in self._loadBlock(size)))[:-1] | ||||
|  | ||||
|     def decode_chunk(self) -> Chunk: | ||||
|         chunk = Chunk() | ||||
|  | ||||
|         chunk.name = self.get_string(None) | ||||
|         chunk.frst_line = self.get_int() | ||||
|         chunk.last_line = self.get_int() | ||||
|  | ||||
|         chunk.numUpvals = self.get_byte() | ||||
|         chunk.numParams = self.get_byte() | ||||
|         chunk.isVarg = (self.get_byte() != 0) | ||||
|         chunk.maxStack = self.get_byte() | ||||
|  | ||||
|         if (not chunk.name == ""): | ||||
|             chunk.name = chunk.name[1:-1] | ||||
|         # chunk meta info | ||||
|         chunk.name = self._get_string() | ||||
|         chunk.frst_line = self._get_uint() | ||||
|         chunk.last_line = self._get_uint() | ||||
|         chunk.numUpvals = self._get_byte() | ||||
|         chunk.numParams = self._get_byte() | ||||
|         chunk.isVarg = (self._get_byte() != 0) | ||||
|         chunk.maxStack = self._get_byte() | ||||
|  | ||||
|         # parse instructions | ||||
|         num = self.get_int() | ||||
|         num = self._get_uint() | ||||
|         for i in range(num): | ||||
|             chunk.appendInstruction(_decode_instr(self.get_int32())) | ||||
|             chunk.appendInstruction(_decode_instr(self._get_uint32())) | ||||
|  | ||||
|         # get constants | ||||
|         num = self.get_int() | ||||
|         num = self._get_uint() | ||||
|         for i in range(num): | ||||
|             constant: Constant = None | ||||
|             type = self.get_byte() | ||||
|             type = self._get_byte() | ||||
|  | ||||
|             if type == 0: #nil | ||||
|             if type == 0: # nil | ||||
|                 constant = Constant(ConstType.NIL, None) | ||||
|             elif type == 1: # bool | ||||
|                 constant = Constant(ConstType.BOOL, (self.get_byte() != 0)) | ||||
|                 constant = Constant(ConstType.BOOL, (self._get_byte() != 0)) | ||||
|             elif type == 3: # number | ||||
|                 constant = Constant(ConstType.NUMBER, self.get_double()) | ||||
|                 constant = Constant(ConstType.NUMBER, self._get_double()) | ||||
|             elif type == 4: # string | ||||
|                 constant = Constant(ConstType.STRING, self.get_string(None)[:-1]) | ||||
|                 constant = Constant(ConstType.STRING, self._get_string()) | ||||
|             else: | ||||
|                 raise Exception("Unknown Datatype! [%d]" % type) | ||||
|  | ||||
|             chunk.appendConstant(constant) | ||||
|  | ||||
|         # parse protos | ||||
|         num = self.get_int() | ||||
|         num = self._get_uint() | ||||
|         for i in range(num): | ||||
|             chunk.appendProto(self.decode_chunk()) | ||||
|  | ||||
| @@ -393,47 +385,47 @@ class LuaUndump: | ||||
|         # eh, for now just consume the bytes. | ||||
|  | ||||
|         # line numbers | ||||
|         num = self.get_int() | ||||
|         num = self._get_uint() | ||||
|         for i in range(num): | ||||
|             self.get_int() | ||||
|             self._get_uint() | ||||
|  | ||||
|         # locals | ||||
|         num = self.get_int() | ||||
|         num = self._get_uint() | ||||
|         for i in range(num): | ||||
|             name = self.get_string(None)[:-1] # local name ([:-1] to remove the NULL terminator) | ||||
|             start = self.get_int() # local start PC | ||||
|             end = self.get_int() # local end PC | ||||
|             name = self._get_string() # local name | ||||
|             start = self._get_uint() # local start PC | ||||
|             end = self._get_uint() # local end PC | ||||
|             chunk.appendLocal(Local(name, start, end)) | ||||
|  | ||||
|         # upvalues | ||||
|         num = self.get_int() | ||||
|         num = self._get_uint() | ||||
|         for i in range(num): | ||||
|             self.get_string(None) # upvalue name | ||||
|             chunk.appendUpval(self._get_string()) # upvalue name | ||||
|  | ||||
|         return chunk | ||||
|  | ||||
|     def decode_rawbytecode(self, rawbytecode): | ||||
|         # bytecode sanity checks | ||||
|         if not rawbytecode[0:4] == b'\x1bLua': | ||||
|         if not rawbytecode[0:4] == _LUAMAGIC: | ||||
|             raise Exception("Lua Bytecode expected!") | ||||
|  | ||||
|         bytecode = array.array('b', rawbytecode) | ||||
|         return self.decode_bytecode(bytecode) | ||||
|  | ||||
|     def decode_bytecode(self, bytecode): | ||||
|         self.bytecode   = bytecode | ||||
|         self.bytecode = bytecode | ||||
|  | ||||
|         # aligns index, skips header | ||||
|         self.index = 4 | ||||
|          | ||||
|         self.vm_version = self.get_byte() | ||||
|         self.bytecode_format = self.get_byte() | ||||
|         self.big_endian = (self.get_byte() == 0) | ||||
|         self.int_size   = self.get_byte() | ||||
|         self.size_t     = self.get_byte() | ||||
|         self.instr_size = self.get_byte() # gets size of instructions | ||||
|         self.l_number_size = self.get_byte() # size of lua_Number | ||||
|         self.integral_flag = self.get_byte() | ||||
|  | ||||
|         self.vm_version = self._get_byte() | ||||
|         self.bytecode_format = self._get_byte() | ||||
|         self.big_endian = (self._get_byte() == 0) | ||||
|         self.int_size   = self._get_byte() | ||||
|         self.size_t     = self._get_byte() | ||||
|         self.instr_size = self._get_byte() # gets size of instructions | ||||
|         self.l_number_size = self._get_byte() # size of lua_Number | ||||
|         self.integral_flag = self._get_byte() # is lua_Number defined as an int? false = float/double, true = int/long/short/etc. | ||||
|  | ||||
|         self.rootChunk = self.decode_chunk() | ||||
|         return self.rootChunk | ||||
| @@ -444,5 +436,122 @@ class LuaUndump: | ||||
|             return self.decode_rawbytecode(bytecode) | ||||
|  | ||||
|     def print_dissassembly(self): | ||||
|         LuaUndump.dis_chunk(self.rootChunk) | ||||
|         self.rootChunk.print() | ||||
|  | ||||
| class LuaDump: | ||||
|     def __init__(self, rootChunk: Chunk): | ||||
|         self.rootChunk = rootChunk | ||||
|         self.bytecode = bytearray() | ||||
|  | ||||
|         # header info | ||||
|         self.vm_version = 0x51 | ||||
|         self.bytecode_format = 0x00 | ||||
|         self.big_endian = False | ||||
|  | ||||
|         # data sizes | ||||
|         self.int_size = 4 | ||||
|         self.size_t = 8 | ||||
|         self.instr_size = 4 | ||||
|         self.l_number_size = 8 | ||||
|         self.integral_flag = False # lua_Number is a double | ||||
|  | ||||
|     def _writeBlock(self, data: bytes): | ||||
|         self.bytecode += bytearray(data) | ||||
|  | ||||
|     def _set_byte(self, b: int): | ||||
|         self.bytecode.append(b) | ||||
|  | ||||
|     def _set_uint32(self, i: int): | ||||
|         order = 'big' if self.big_endian else 'little' | ||||
|         self._writeBlock(i.to_bytes(4, order, signed=False)) | ||||
|  | ||||
|     def _set_uint(self, i: int): | ||||
|         order = 'big' if self.big_endian else 'little' | ||||
|         self._writeBlock(i.to_bytes(self.int_size, order, signed=False)) | ||||
|  | ||||
|     def _set_size_t(self, i: int): | ||||
|         order = 'big' if self.big_endian else 'little' | ||||
|         self._writeBlock(i.to_bytes(self.size_t, order, signed=False)) | ||||
|  | ||||
|     def _set_double(self, f: float): | ||||
|         order = '>d' if self.big_endian else '<d' | ||||
|         self._writeBlock(struct.pack(order, f)) | ||||
|  | ||||
|     def _set_string(self, string: str): | ||||
|         self._set_size_t(len(string)+1) | ||||
|         self._writeBlock(string.encode('utf-8')) | ||||
|         self._set_byte(0x00) # write null terminator | ||||
|  | ||||
|     def _dumpChunk(self, chunk: Chunk): | ||||
|         # write meta info | ||||
|         self._set_string(chunk.name) | ||||
|         self._set_uint(chunk.frst_line) | ||||
|         self._set_uint(chunk.last_line) | ||||
|         self._set_byte(chunk.numUpvals) | ||||
|         self._set_byte(chunk.numParams) | ||||
|         self._set_byte(1 if chunk.isVarg else 1) | ||||
|         self._set_byte(chunk.maxStack) | ||||
|  | ||||
|         # write instructions | ||||
|         self._set_uint(len(chunk.instructions)) | ||||
|         for l in chunk.instructions: | ||||
|             self._set_uint32(_encode_instr(l)) | ||||
|  | ||||
|         # write constants | ||||
|         self._set_uint(len(chunk.constants)) | ||||
|         for constant in chunk.constants: | ||||
|             # write constant data | ||||
|             if constant.type == ConstType.NIL: | ||||
|                 self._set_byte(0) | ||||
|             elif constant.type == ConstType.BOOL: | ||||
|                 self._set_byte(1) | ||||
|                 self._set_byte(1 if constant.data else 0) | ||||
|             elif constant.type == ConstType.NUMBER: # number | ||||
|                 self._set_byte(3) | ||||
|                 self._set_double(constant.data) | ||||
|             elif constant.type == ConstType.STRING: # string | ||||
|                 self._set_byte(4) | ||||
|                 self._set_string(constant.data) | ||||
|             else: | ||||
|                 raise Exception("Unknown Datatype! [%s]" % str(constant.type)) | ||||
|  | ||||
|         # write child protos | ||||
|         self._set_uint(len(chunk.protos)) | ||||
|         for p in chunk.protos: | ||||
|             self._dumpChunk(p) | ||||
|  | ||||
|         # write line numbers | ||||
|         self._set_uint(len(chunk.lineNums)) | ||||
|         for l in chunk.lineNums: | ||||
|             self._set_uint(l) | ||||
|  | ||||
|         # write locals | ||||
|         self._set_uint(len(chunk.locals)) | ||||
|         for l in chunk.locals: | ||||
|             self._set_string(l.name) | ||||
|             self._set_uint(l.start) | ||||
|             self._set_uint(l.end) | ||||
|  | ||||
|         # write upvals | ||||
|         self._set_uint(len(chunk.upvalues)) | ||||
|         for u in chunk.upvalues: | ||||
|             self._set_string(u) | ||||
|  | ||||
|     def _dumpHeader(self): | ||||
|         self._writeBlock(_LUAMAGIC) | ||||
|  | ||||
|         # write header info | ||||
|         self._set_byte(self.vm_version) | ||||
|         self._set_byte(self.bytecode_format) | ||||
|         self._set_byte(0 if self.big_endian else 1) | ||||
|         self._set_byte(self.int_size) | ||||
|         self._set_byte(self.size_t) | ||||
|         self._set_byte(self.instr_size) | ||||
|         self._set_byte(self.l_number_size) | ||||
|         self._set_byte(self.integral_flag) | ||||
|  | ||||
|     def dump(self) -> bytearray: | ||||
|         self._dumpHeader() | ||||
|         self._dumpChunk(self.rootChunk) | ||||
|  | ||||
|         return self.bytecode | ||||
							
								
								
									
										7
									
								
								main.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							
							
						
						
									
										7
									
								
								main.py
									
									
									
									
									
										
										
										Normal file → Executable file
									
								
							| @@ -1,3 +1,4 @@ | ||||
| #!/usr/bin/env python3 | ||||
| import sys | ||||
| import lundump | ||||
| import lparser | ||||
| @@ -7,4 +8,8 @@ print(sys.argv[1]) | ||||
| chunk = lc.loadFile(sys.argv[1]) | ||||
|  | ||||
| lc.print_dissassembly() | ||||
| lp = lparser.LuaDecomp(chunk) | ||||
|  | ||||
| lp = lparser.LuaDecomp(chunk) | ||||
|  | ||||
| print("\n==== [[" + str(chunk.name) + "'s pseudo-code]] ====\n") | ||||
| print(lp.getPseudoCode()) | ||||
		Reference in New Issue
	
	Block a user