2022-08-11 20:38:31 +00:00
|
|
|
'''
|
|
|
|
lparser.py
|
|
|
|
|
2022-08-11 21:45:05 +00:00
|
|
|
Depends on lundump.py for lua dump deserialization.
|
2022-08-11 20:38:31 +00:00
|
|
|
|
|
|
|
An experimental bytecode decompiler.
|
|
|
|
'''
|
|
|
|
|
|
|
|
from operator import concat
|
|
|
|
from subprocess import call
|
2022-08-12 04:58:21 +00:00
|
|
|
from xmlrpc.client import Boolean
|
2022-08-11 21:45:05 +00:00
|
|
|
from lundump import Chunk, Constant, Instruction, Opcodes, whichRK, readRKasK
|
2022-08-11 20:38:31 +00:00
|
|
|
|
|
|
|
class _Scope:
|
|
|
|
def __init__(self, startPC: int, endPC: int):
|
|
|
|
self.startPC = startPC
|
|
|
|
self.endPC = endPC
|
|
|
|
|
2022-08-12 04:58:21 +00:00
|
|
|
class _Traceback:
|
|
|
|
def __init__(self):
|
|
|
|
self.sets = []
|
|
|
|
self.uses = []
|
|
|
|
self.isConst = False
|
|
|
|
|
2022-08-12 22:08:31 +00:00
|
|
|
class _Line:
|
|
|
|
def __init__(self, startPC: int, endPC: int, src: str, scope: int):
|
|
|
|
self.startPC = startPC
|
|
|
|
self.endPC = endPC
|
|
|
|
self.src = src
|
|
|
|
self.scope = scope
|
|
|
|
|
2022-08-11 20:38:31 +00:00
|
|
|
class LuaDecomp:
|
|
|
|
def __init__(self, chunk: Chunk):
|
|
|
|
self.chunk = chunk
|
|
|
|
self.pc = 0
|
2022-08-12 22:08:31 +00:00
|
|
|
self.scope: list[_Scope] = []
|
|
|
|
self.lines: list[_Line] = []
|
2022-08-11 20:38:31 +00:00
|
|
|
self.top = {}
|
|
|
|
self.locals = {}
|
2022-08-12 04:58:21 +00:00
|
|
|
self.traceback = {}
|
2022-08-11 20:38:31 +00:00
|
|
|
self.unknownLocalCount = 0
|
|
|
|
self.src: str = ""
|
|
|
|
|
|
|
|
# configurations!
|
2022-08-12 04:58:21 +00:00
|
|
|
self.aggressiveLocals = False # should *EVERY* set register be considered a local?
|
2022-08-12 22:08:31 +00:00
|
|
|
self.annotateLines = False
|
2022-08-11 20:38:31 +00:00
|
|
|
self.indexWidth = 4 # how many spaces for indentions?
|
|
|
|
|
2022-08-12 04:58:21 +00:00
|
|
|
self.__loadLocals()
|
|
|
|
|
2022-08-11 20:38:31 +00:00
|
|
|
# parse instructions
|
|
|
|
while self.pc < len(self.chunk.instructions):
|
2022-08-12 04:10:36 +00:00
|
|
|
self.parseInstr()
|
2022-08-11 20:38:31 +00:00
|
|
|
self.pc += 1
|
|
|
|
|
|
|
|
# end the scope (if we're supposed too)
|
|
|
|
self.__checkScope()
|
|
|
|
|
|
|
|
print("\n==== [[" + str(self.chunk.name) + "'s decompiled source]] ====\n")
|
2022-08-12 22:08:31 +00:00
|
|
|
|
|
|
|
for line in self.lines:
|
|
|
|
if self.annotateLines:
|
|
|
|
print("-- PC: %d to PC: %d" % (line.startPC, line.endPC))
|
|
|
|
print(((' ' * self.indexWidth) * line.scope) + line.src)
|
2022-08-11 20:38:31 +00:00
|
|
|
|
2022-08-12 04:10:36 +00:00
|
|
|
# =======================================[[ Helpers ]]=========================================
|
2022-08-11 20:38:31 +00:00
|
|
|
|
2022-08-11 22:26:48 +00:00
|
|
|
def __getInstrAtPC(self, pc: int) -> Instruction:
|
|
|
|
if pc < len(self.chunk.instructions):
|
|
|
|
return self.chunk.instructions[pc]
|
|
|
|
|
|
|
|
raise Exception("Decompilation failed!")
|
|
|
|
|
2022-08-11 20:38:31 +00:00
|
|
|
def __getNextInstr(self) -> Instruction:
|
2022-08-12 04:10:36 +00:00
|
|
|
return self.__getInstrAtPC(self.pc + 1)
|
2022-08-11 20:38:31 +00:00
|
|
|
|
|
|
|
def __getCurrInstr(self) -> Instruction:
|
2022-08-12 04:10:36 +00:00
|
|
|
return self.__getInstrAtPC(self.pc)
|
2022-08-11 20:38:31 +00:00
|
|
|
|
2022-08-12 22:08:31 +00:00
|
|
|
def __makeTracIfNotExist(self) -> None:
|
2022-08-12 04:58:21 +00:00
|
|
|
if not self.pc in self.traceback:
|
|
|
|
self.traceback[self.pc] = _Traceback()
|
|
|
|
|
2022-08-12 22:08:31 +00:00
|
|
|
# when we read from a register, call this
|
|
|
|
def __addUseTraceback(self, reg: int) -> None:
|
|
|
|
self.__makeTracIfNotExist()
|
2022-08-12 04:58:21 +00:00
|
|
|
self.traceback[self.pc].uses.append(reg)
|
|
|
|
|
|
|
|
# when we write from a register, call this
|
|
|
|
def __addSetTraceback(self, reg: int) -> None:
|
2022-08-12 22:08:31 +00:00
|
|
|
self.__makeTracIfNotExist()
|
2022-08-12 04:58:21 +00:00
|
|
|
self.traceback[self.pc].sets.append(reg)
|
|
|
|
|
2022-08-12 22:08:31 +00:00
|
|
|
def __addExpr(self, code: str) -> None:
|
|
|
|
self.src += code
|
|
|
|
|
|
|
|
def __endStatement(self):
|
|
|
|
startPC = self.lines[len(self.lines) - 1].endPC + 1 if len(self.lines) > 0 else 0
|
|
|
|
endPC = self.pc
|
|
|
|
|
|
|
|
# make sure we don't write an empty line
|
|
|
|
if not self.src == "":
|
|
|
|
self.lines.append(_Line(startPC, endPC, self.src, len(self.scope)))
|
|
|
|
self.src = ""
|
|
|
|
|
|
|
|
def __insertStatement(self, pc: int) -> None:
|
|
|
|
# insert current statement into lines at pc location
|
|
|
|
for i in range(len(self.lines)):
|
|
|
|
if self.lines[i].startPC <= pc and self.lines[i].endPC >= pc:
|
|
|
|
self.lines.insert(i, _Line(pc, pc, self.src, self.lines[i-1].scope if i > 0 else 0))
|
|
|
|
self.src = ""
|
|
|
|
return i
|
|
|
|
|
|
|
|
self.src = ""
|
|
|
|
|
2022-08-12 04:58:21 +00:00
|
|
|
# walks traceback, if local wasn't set before, the local needs to be defined
|
|
|
|
def __needsDefined(self, reg) -> Boolean:
|
|
|
|
for _, trace in self.traceback.items():
|
|
|
|
if reg in trace.sets:
|
|
|
|
return False
|
|
|
|
|
|
|
|
# wasn't set in traceback! needs defined!
|
|
|
|
return True
|
|
|
|
|
|
|
|
def __loadLocals(self):
|
|
|
|
for i in range(len(self.chunk.locals)):
|
2022-08-12 05:04:11 +00:00
|
|
|
if not self.chunk.locals[i].name == "":
|
|
|
|
self.locals[i] = self.chunk.locals[i].name
|
|
|
|
else:
|
|
|
|
self.__makeLocalIdentifier(i)
|
2022-08-12 04:58:21 +00:00
|
|
|
|
2022-08-11 20:38:31 +00:00
|
|
|
def __getReg(self, indx: int) -> str:
|
2022-08-12 04:58:21 +00:00
|
|
|
self.__addUseTraceback(indx)
|
|
|
|
|
2022-08-11 20:38:31 +00:00
|
|
|
# if the top indx is a local, get it
|
2022-08-12 04:58:21 +00:00
|
|
|
return self.locals[indx] if indx in self.locals else self.top[indx]
|
2022-08-11 20:38:31 +00:00
|
|
|
|
|
|
|
def __setReg(self, indx: int, code: str) -> None:
|
|
|
|
# if the top indx is a local, set it
|
|
|
|
if indx in self.locals:
|
2022-08-12 04:58:21 +00:00
|
|
|
if self.__needsDefined(indx):
|
|
|
|
self.__newLocal(indx, code)
|
|
|
|
else:
|
|
|
|
self.__addExpr(self.locals[indx] + " = " + code)
|
2022-08-12 22:08:31 +00:00
|
|
|
self.__endStatement()
|
2022-08-11 20:38:31 +00:00
|
|
|
elif self.aggressiveLocals: # 'every register is a local!!'
|
|
|
|
self.__newLocal(indx, code)
|
|
|
|
|
2022-08-12 04:58:21 +00:00
|
|
|
|
|
|
|
self.__addSetTraceback(indx)
|
2022-08-11 20:38:31 +00:00
|
|
|
self.top[indx] = code
|
|
|
|
|
2022-08-12 04:10:36 +00:00
|
|
|
# ========================================[[ Locals ]]=========================================
|
|
|
|
|
|
|
|
def __makeLocalIdentifier(self, indx: int) -> str:
|
2022-08-12 04:58:21 +00:00
|
|
|
# first, check if we have a local name already determined
|
|
|
|
if indx in self.locals:
|
|
|
|
return self.locals[indx]
|
|
|
|
|
|
|
|
# otherwise, generate a local
|
2022-08-12 04:10:36 +00:00
|
|
|
self.locals[indx] = "__unknLocal%d" % self.unknownLocalCount
|
|
|
|
self.unknownLocalCount += 1
|
|
|
|
|
|
|
|
return self.locals[indx]
|
|
|
|
|
|
|
|
def __newLocal(self, indx: int, expr: str) -> None:
|
|
|
|
# TODO: grab identifier from chunk(?)
|
|
|
|
self.__makeLocalIdentifier(indx)
|
|
|
|
|
|
|
|
self.__addExpr("local " + self.locals[indx] + " = " + expr)
|
2022-08-12 22:08:31 +00:00
|
|
|
self.__endStatement()
|
2022-08-12 04:10:36 +00:00
|
|
|
|
|
|
|
# ========================================[[ Scopes ]]=========================================
|
|
|
|
|
2022-08-12 22:08:31 +00:00
|
|
|
def __startScope(self, scopeType: str, start: int, size: int) -> None:
|
2022-08-11 20:38:31 +00:00
|
|
|
self.__addExpr(scopeType)
|
2022-08-12 22:08:31 +00:00
|
|
|
self.__endStatement()
|
|
|
|
self.scope.append(_Scope(start, start + size))
|
2022-08-11 20:38:31 +00:00
|
|
|
|
|
|
|
# checks if we need to end a scope
|
|
|
|
def __checkScope(self) -> None:
|
|
|
|
if len(self.scope) == 0:
|
|
|
|
return
|
|
|
|
|
|
|
|
if self.pc > self.scope[len(self.scope) - 1].endPC:
|
|
|
|
self.__endScope()
|
|
|
|
|
|
|
|
def __endScope(self) -> None:
|
2022-08-12 22:08:31 +00:00
|
|
|
self.__endStatement()
|
2022-08-11 20:38:31 +00:00
|
|
|
self.__addExpr("end")
|
2022-08-12 22:08:31 +00:00
|
|
|
self.scope.pop()
|
2022-08-11 20:38:31 +00:00
|
|
|
|
2022-08-12 04:10:36 +00:00
|
|
|
# =====================================[[ Instructions ]]======================================
|
|
|
|
|
2022-08-11 20:38:31 +00:00
|
|
|
def __emitOperand(self, a: int, b: str, c: str, op: str) -> None:
|
|
|
|
self.__setReg(a, "(" + b + op + c + ")")
|
|
|
|
|
2022-08-11 22:26:48 +00:00
|
|
|
def __compJmp(self, op: str):
|
|
|
|
instr = self.__getCurrInstr()
|
|
|
|
jmpType = "if"
|
|
|
|
scopeStart = "then"
|
|
|
|
|
|
|
|
# we need to check if the jmp location has a jump back (if so, it's a while loop)
|
|
|
|
jmp = self.__getNextInstr().B + 1
|
|
|
|
jmpToInstr = self.__getInstrAtPC(self.pc + jmp)
|
|
|
|
|
|
|
|
if jmpToInstr.opcode == Opcodes.JMP:
|
|
|
|
# if this jump jumps back to this compJmp, it's a loop!
|
|
|
|
if self.pc + jmp + jmpToInstr.B <= self.pc + 1:
|
|
|
|
jmpType = "while"
|
|
|
|
scopeStart = "do"
|
2022-08-12 22:08:31 +00:00
|
|
|
elif jmp < 0:
|
|
|
|
# 'repeat until' loop (probably)
|
|
|
|
jmpType = "until"
|
|
|
|
scopeStart = None
|
2022-08-11 22:26:48 +00:00
|
|
|
|
|
|
|
if instr.A > 0:
|
|
|
|
self.__addExpr("%s not " % jmpType)
|
|
|
|
else:
|
|
|
|
self.__addExpr("%s " % jmpType)
|
|
|
|
self.__addExpr(self.__readRK(instr.B) + op + self.__readRK(instr.C) + " ")
|
|
|
|
self.pc += 1 # skip next instr
|
2022-08-12 22:08:31 +00:00
|
|
|
if scopeStart:
|
|
|
|
self.__startScope("%s " % scopeStart, self.pc - 1, jmp)
|
|
|
|
|
|
|
|
# we end the statement *after* scopeStart
|
|
|
|
self.__endStatement()
|
|
|
|
else:
|
|
|
|
# end the statement prior to repeat
|
|
|
|
self.__endStatement()
|
|
|
|
|
|
|
|
# it's a repeat until loop, insert 'repeat' at the jumpTo location
|
|
|
|
self.__addExpr("repeat")
|
|
|
|
insertedLine = self.__insertStatement(self.pc + jmp)
|
|
|
|
|
|
|
|
# add scope to every line in-between
|
|
|
|
for i in range(insertedLine+1, len(self.lines)-1):
|
|
|
|
self.lines[i].scope += 1
|
2022-08-11 22:26:48 +00:00
|
|
|
|
2022-08-11 20:38:31 +00:00
|
|
|
# 'RK's are special in because can be a register or a konstant. a bitflag is read to determine which
|
|
|
|
def __readRK(self, rk: int) -> str:
|
2022-08-11 21:45:05 +00:00
|
|
|
if (whichRK(rk)) > 0:
|
|
|
|
return self.chunk.getConstant(readRKasK(rk)).toCode()
|
2022-08-11 20:38:31 +00:00
|
|
|
else:
|
|
|
|
return self.__getReg(rk)
|
|
|
|
|
2022-08-12 04:10:36 +00:00
|
|
|
def parseInstr(self):
|
2022-08-11 20:38:31 +00:00
|
|
|
instr = self.__getCurrInstr()
|
|
|
|
|
|
|
|
# python, add switch statements *please*
|
|
|
|
if instr.opcode == Opcodes.MOVE: # move is a fake ABC instr, C is ignored
|
|
|
|
# move registers
|
|
|
|
self.__setReg(instr.A, self.__getReg(instr.B))
|
|
|
|
elif instr.opcode == Opcodes.LOADK:
|
2022-08-11 21:45:05 +00:00
|
|
|
self.__setReg(instr.A, self.chunk.getConstant(instr.B).toCode())
|
2022-08-11 20:38:31 +00:00
|
|
|
elif instr.opcode == Opcodes.LOADBOOL:
|
|
|
|
if instr.B == 0:
|
|
|
|
self.__setReg(instr.A, "false")
|
|
|
|
else:
|
|
|
|
self.__setReg(instr.A, "true")
|
|
|
|
elif instr.opcode == Opcodes.GETGLOBAL:
|
2022-08-11 21:45:05 +00:00
|
|
|
self.__setReg(instr.A, self.chunk.getConstant(instr.B).data)
|
2022-08-11 20:38:31 +00:00
|
|
|
elif instr.opcode == Opcodes.GETTABLE:
|
|
|
|
self.__setReg(instr.A, self.__getReg(instr.B) + "[" + self.__readRK(instr.C) + "]")
|
|
|
|
elif instr.opcode == Opcodes.SETGLOBAL:
|
2022-08-11 21:45:05 +00:00
|
|
|
self.__addExpr(self.chunk.getConstant(instr.B).data + " = " + self.__getReg(instr.A))
|
2022-08-12 22:08:31 +00:00
|
|
|
self.__endStatement()
|
2022-08-11 20:38:31 +00:00
|
|
|
elif instr.opcode == Opcodes.SETTABLE:
|
|
|
|
self.__addExpr(self.__getReg(instr.A) + "[" + self.__readRK(instr.B) + "] = " + self.__readRK(instr.C))
|
2022-08-12 22:08:31 +00:00
|
|
|
self.__endStatement()
|
2022-08-11 20:38:31 +00:00
|
|
|
elif instr.opcode == Opcodes.ADD:
|
|
|
|
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " + ")
|
|
|
|
elif instr.opcode == Opcodes.SUB:
|
|
|
|
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " - ")
|
|
|
|
elif instr.opcode == Opcodes.MUL:
|
|
|
|
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " * ")
|
|
|
|
elif instr.opcode == Opcodes.DIV:
|
|
|
|
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " / ")
|
|
|
|
elif instr.opcode == Opcodes.MOD:
|
|
|
|
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " % ")
|
|
|
|
elif instr.opcode == Opcodes.POW:
|
|
|
|
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " ^ ")
|
|
|
|
elif instr.opcode == Opcodes.UNM:
|
|
|
|
self.__setReg(instr.A, "-" + self.__getReg(instr.B))
|
|
|
|
elif instr.opcode == Opcodes.NOT:
|
2022-08-12 05:10:47 +00:00
|
|
|
self.__setReg(instr.A, "not " + self.__getReg(instr.B))
|
2022-08-11 20:38:31 +00:00
|
|
|
elif instr.opcode == Opcodes.LEN:
|
|
|
|
self.__setReg(instr.A, "#" + self.__getCurrInstr(instr.B))
|
|
|
|
elif instr.opcode == Opcodes.CONCAT:
|
|
|
|
count = instr.C-instr.B+1
|
|
|
|
concatStr = ""
|
|
|
|
|
|
|
|
# concat all items on stack from RC to RB
|
|
|
|
for i in range(count):
|
|
|
|
concatStr += self.__getReg(instr.B + i) + (" .. " if not i == count - 1 else "")
|
|
|
|
|
|
|
|
self.__setReg(instr.A, concatStr)
|
|
|
|
elif instr.opcode == Opcodes.JMP:
|
|
|
|
pass
|
|
|
|
elif instr.opcode == Opcodes.EQ:
|
2022-08-11 22:26:48 +00:00
|
|
|
self.__compJmp(" == ")
|
2022-08-11 20:38:31 +00:00
|
|
|
elif instr.opcode == Opcodes.LT:
|
2022-08-11 22:26:48 +00:00
|
|
|
self.__compJmp(" < ")
|
2022-08-11 20:38:31 +00:00
|
|
|
elif instr.opcode == Opcodes.LE:
|
2022-08-11 22:26:48 +00:00
|
|
|
self.__compJmp(" <= ")
|
2022-08-11 20:38:31 +00:00
|
|
|
elif instr.opcode == Opcodes.CALL:
|
|
|
|
preStr = ""
|
|
|
|
callStr = ""
|
|
|
|
ident = ""
|
|
|
|
|
|
|
|
# parse arguments
|
|
|
|
callStr += self.__getReg(instr.A) + "("
|
|
|
|
for i in range(instr.A + 1, instr.A + instr.B):
|
|
|
|
callStr += self.__getReg(i) + (", " if not i + 1 == instr.A + instr.B else "")
|
|
|
|
callStr += ")"
|
|
|
|
|
|
|
|
# parse return values
|
|
|
|
if instr.C > 1:
|
|
|
|
preStr = "local "
|
|
|
|
for indx in range(instr.A, instr.A + instr.C - 1):
|
|
|
|
if indx in self.locals:
|
|
|
|
ident = self.locals[indx]
|
|
|
|
else:
|
|
|
|
ident = self.__makeLocalIdentifier(indx)
|
|
|
|
preStr += ident
|
|
|
|
|
|
|
|
# normally setReg() does this
|
|
|
|
self.top[indx] = ident
|
|
|
|
|
|
|
|
# just so we don't have a trailing ', '
|
|
|
|
preStr += ", " if not indx == instr.A + instr.C - 2 else ""
|
|
|
|
preStr += " = "
|
|
|
|
|
|
|
|
self.__addExpr(preStr + callStr)
|
2022-08-12 22:08:31 +00:00
|
|
|
self.__endStatement()
|
2022-08-11 20:38:31 +00:00
|
|
|
elif instr.opcode == Opcodes.RETURN:
|
2022-08-12 22:08:31 +00:00
|
|
|
self.__endStatement()
|
2022-08-11 20:38:31 +00:00
|
|
|
pass # no-op for now
|
|
|
|
else:
|
|
|
|
raise Exception("unsupported instruction: %s" % instr.toString())
|