LuaDecompy/lparser.py

289 lines
10 KiB
Python

'''
lparser.py
Depends on lundump.py for lua dump deserialization.
An experimental bytecode decompiler.
'''
from operator import concat
from subprocess import call
from xmlrpc.client import Boolean
from lundump import Chunk, Constant, Instruction, Opcodes, whichRK, readRKasK
class _Scope:
def __init__(self, startPC: int, endPC: int):
self.startPC = startPC
self.endPC = endPC
class _Traceback:
def __init__(self):
self.sets = []
self.uses = []
self.isConst = False
class LuaDecomp:
def __init__(self, chunk: Chunk):
self.chunk = chunk
self.pc = 0
self.scope = []
self.top = {}
self.locals = {}
self.traceback = {}
self.unknownLocalCount = 0
self.src: str = ""
# configurations!
self.aggressiveLocals = False # should *EVERY* set register be considered a local?
self.indexWidth = 4 # how many spaces for indentions?
self.__loadLocals()
# parse instructions
while self.pc < len(self.chunk.instructions):
self.parseInstr()
self.pc += 1
# end the scope (if we're supposed too)
self.__checkScope()
print("\n==== [[" + str(self.chunk.name) + "'s decompiled source]] ====\n")
print(self.src)
# =======================================[[ Helpers ]]=========================================
def __getInstrAtPC(self, pc: int) -> Instruction:
if pc < len(self.chunk.instructions):
return self.chunk.instructions[pc]
raise Exception("Decompilation failed!")
def __getNextInstr(self) -> Instruction:
return self.__getInstrAtPC(self.pc + 1)
def __getCurrInstr(self) -> Instruction:
return self.__getInstrAtPC(self.pc)
# when we read from a register, call this
def __addUseTraceback(self, reg: int) -> None:
if not self.pc in self.traceback:
self.traceback[self.pc] = _Traceback()
self.traceback[self.pc].uses.append(reg)
# when we write from a register, call this
def __addSetTraceback(self, reg: int) -> None:
if not self.pc in self.traceback:
self.traceback[self.pc] = _Traceback()
self.traceback[self.pc].sets.append(reg)
# walks traceback, if local wasn't set before, the local needs to be defined
def __needsDefined(self, reg) -> Boolean:
for _, trace in self.traceback.items():
if reg in trace.sets:
return False
# wasn't set in traceback! needs defined!
return True
def __loadLocals(self):
for i in range(len(self.chunk.locals)):
self.locals[i] = self.chunk.locals[i].name
def __addExpr(self, code: str) -> None:
self.src += code
def __startStatement(self):
self.src += '\n' + (' ' * self.indexWidth * len(self.scope))
def __getReg(self, indx: int) -> str:
self.__addUseTraceback(indx)
# if the top indx is a local, get it
return self.locals[indx] if indx in self.locals else self.top[indx]
def __setReg(self, indx: int, code: str) -> None:
# if the top indx is a local, set it
if indx in self.locals:
if self.__needsDefined(indx):
self.__newLocal(indx, code)
else:
self.__startStatement()
self.__addExpr(self.locals[indx] + " = " + code)
elif self.aggressiveLocals: # 'every register is a local!!'
self.__newLocal(indx, code)
self.__addSetTraceback(indx)
self.top[indx] = code
# ========================================[[ Locals ]]=========================================
def __makeLocalIdentifier(self, indx: int) -> str:
# first, check if we have a local name already determined
if indx in self.locals:
return self.locals[indx]
# otherwise, generate a local
self.locals[indx] = "__unknLocal%d" % self.unknownLocalCount
self.unknownLocalCount += 1
return self.locals[indx]
def __newLocal(self, indx: int, expr: str) -> None:
# TODO: grab identifier from chunk(?)
self.__makeLocalIdentifier(indx)
self.__startStatement()
self.__addExpr("local " + self.locals[indx] + " = " + expr)
# ========================================[[ Scopes ]]=========================================
def __startScope(self, scopeType: str, size: int) -> None:
self.__addExpr(scopeType)
self.scope.append(_Scope(self.pc, self.pc + size))
# checks if we need to end a scope
def __checkScope(self) -> None:
if len(self.scope) == 0:
return
if self.pc > self.scope[len(self.scope) - 1].endPC:
self.__endScope()
def __endScope(self) -> None:
self.scope.pop()
self.__startStatement()
self.__addExpr("end")
# =====================================[[ Instructions ]]======================================
def __emitOperand(self, a: int, b: str, c: str, op: str) -> None:
self.__setReg(a, "(" + b + op + c + ")")
def __compJmp(self, op: str):
instr = self.__getCurrInstr()
jmpType = "if"
scopeStart = "then"
# we need to check if the jmp location has a jump back (if so, it's a while loop)
jmp = self.__getNextInstr().B + 1
jmpToInstr = self.__getInstrAtPC(self.pc + jmp)
if jmpToInstr.opcode == Opcodes.JMP:
# if this jump jumps back to this compJmp, it's a loop!
if self.pc + jmp + jmpToInstr.B <= self.pc + 1:
jmpType = "while"
scopeStart = "do"
self.__startStatement()
if instr.A > 0:
self.__addExpr("%s not " % jmpType)
else:
self.__addExpr("%s " % jmpType)
self.__addExpr(self.__readRK(instr.B) + op + self.__readRK(instr.C) + " ")
self.__startScope("%s " % scopeStart, jmp)
self.pc += 1 # skip next instr
# 'RK's are special in because can be a register or a konstant. a bitflag is read to determine which
def __readRK(self, rk: int) -> str:
if (whichRK(rk)) > 0:
return self.chunk.getConstant(readRKasK(rk)).toCode()
else:
return self.__getReg(rk)
def parseInstr(self):
instr = self.__getCurrInstr()
# python, add switch statements *please*
if instr.opcode == Opcodes.MOVE: # move is a fake ABC instr, C is ignored
# move registers
self.__setReg(instr.A, self.__getReg(instr.B))
elif instr.opcode == Opcodes.LOADK:
self.__setReg(instr.A, self.chunk.getConstant(instr.B).toCode())
elif instr.opcode == Opcodes.LOADBOOL:
if instr.B == 0:
self.__setReg(instr.A, "false")
else:
self.__setReg(instr.A, "true")
elif instr.opcode == Opcodes.GETGLOBAL:
self.__setReg(instr.A, self.chunk.getConstant(instr.B).data)
elif instr.opcode == Opcodes.GETTABLE:
self.__setReg(instr.A, self.__getReg(instr.B) + "[" + self.__readRK(instr.C) + "]")
elif instr.opcode == Opcodes.SETGLOBAL:
self.__startStatement()
self.__addExpr(self.chunk.getConstant(instr.B).data + " = " + self.__getReg(instr.A))
elif instr.opcode == Opcodes.SETTABLE:
self.__startStatement()
self.__addExpr(self.__getReg(instr.A) + "[" + self.__readRK(instr.B) + "] = " + self.__readRK(instr.C))
elif instr.opcode == Opcodes.ADD:
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " + ")
elif instr.opcode == Opcodes.SUB:
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " - ")
elif instr.opcode == Opcodes.MUL:
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " * ")
elif instr.opcode == Opcodes.DIV:
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " / ")
elif instr.opcode == Opcodes.MOD:
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " % ")
elif instr.opcode == Opcodes.POW:
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " ^ ")
elif instr.opcode == Opcodes.UNM:
self.__setReg(instr.A, "-" + self.__getReg(instr.B))
elif instr.opcode == Opcodes.NOT:
self.__setReg(instr.A, "!" + self.__getReg(instr.B))
elif instr.opcode == Opcodes.LEN:
self.__setReg(instr.A, "#" + self.__getCurrInstr(instr.B))
elif instr.opcode == Opcodes.CONCAT:
count = instr.C-instr.B+1
concatStr = ""
# concat all items on stack from RC to RB
for i in range(count):
concatStr += self.__getReg(instr.B + i) + (" .. " if not i == count - 1 else "")
self.__setReg(instr.A, concatStr)
elif instr.opcode == Opcodes.JMP:
pass
elif instr.opcode == Opcodes.EQ:
self.__compJmp(" == ")
elif instr.opcode == Opcodes.LT:
self.__compJmp(" < ")
elif instr.opcode == Opcodes.LE:
self.__compJmp(" <= ")
elif instr.opcode == Opcodes.CALL:
preStr = ""
callStr = ""
ident = ""
# parse arguments
callStr += self.__getReg(instr.A) + "("
for i in range(instr.A + 1, instr.A + instr.B):
callStr += self.__getReg(i) + (", " if not i + 1 == instr.A + instr.B else "")
callStr += ")"
# parse return values
if instr.C > 1:
preStr = "local "
for indx in range(instr.A, instr.A + instr.C - 1):
if indx in self.locals:
ident = self.locals[indx]
else:
ident = self.__makeLocalIdentifier(indx)
preStr += ident
# normally setReg() does this
self.top[indx] = ident
# just so we don't have a trailing ', '
preStr += ", " if not indx == instr.A + instr.C - 2 else ""
preStr += " = "
self.__startStatement()
self.__addExpr(preStr + callStr)
elif instr.opcode == Opcodes.RETURN:
self.__startStatement()
pass # no-op for now
else:
raise Exception("unsupported instruction: %s" % instr.toString())