Compare commits

...

10 Commits

3 changed files with 195 additions and 88 deletions

View File

@ -2,48 +2,54 @@
An experimental Lua 5.1 dump decompiler (typically dumped using `luac -o <out.luac> <script.lua>`).
You will quickly find that only **extremely** simple scripts are decompiled successfully right now. This is an experimental project and not all opcodes are properly handled for now. If you need a real decompiler I would recommend any of the handful of ones that exist already.
## Why?
Lua has a relatively small instruction set (only 38 different opcodes!). This makes it pretty feasible for a weekend decompiler project. (real) Decompilers are extremely complex pieces of software, so being able to write a simpler one helps show the theory without *much* of the headache.
## Example usage
```sh
> cat example.lua && luac5.1 -o example.luac example.lua
pp = "pri" .. "nt"
local i, x = 0, 2
if 2 + 2 == 4 then
_G[pp]("Hello world")
while i < 10 do
print(i + x)
i = i + 1
end
> python main.py example.luac
example.luac
==== [[example.lua's constants]] ====
0: [STRING] pp
1: [STRING] pri
2: [STRING] nt
3: [NUMBER] 4.0
4: [STRING] _G
5: [STRING] Hello world
0: [NUMBER] 0.0
1: [NUMBER] 2.0
2: [NUMBER] 10.0
3: [STRING] print
4: [NUMBER] 1.0
==== [[example.lua's dissassembly]] ====
[ 0] LOADK : R[0] K[1]
[ 1] LOADK : R[1] K[2]
[ 2] CONCAT : R[0] R[0] R[1]
[ 3] SETGLOBAL : R[0] R[0]
[ 4] EQ : R[0] K[3] K[3]
[ 5] JMP : R[0] R[5]
[ 6] GETGLOBAL : R[0] K[4]
[ 7] GETGLOBAL : R[1] K[0]
[ 8] GETTABLE : R[0] R[0] R[1]
[ 9] LOADK : R[1] K[5]
[ 10] CALL : R[0] R[2] R[1]
[ 11] RETURN : R[0] R[1] R[0]
[ 0] LOADK : R[0] K[0] ; load 0.0 into R[0]
[ 1] LOADK : R[1] K[1] ; load 2.0 into R[1]
[ 2] LT : R[0] R[0] K[2] ;
[ 3] JMP : R[0] 5 ;
[ 4] GETGLOBAL : R[2] K[3] ;
[ 5] ADD : R[3] R[0] R[1] ;
[ 6] CALL : R[2] 2 1 ;
[ 7] ADD : R[0] R[0] K[4] ;
[ 8] JMP : R[0] -7 ;
[ 9] RETURN : R[0] 1 0 ;
==== [[example.lua's decompiled source]] ====
pp = "pri" .. "nt"
if 4.0 == 4.0 then
_G[pp]("Hello world")
local i = 0.0
local x = 2.0
while i < 10.0 do
print((i + x))
i = (i + 1.0)
end
```

View File

@ -1,20 +1,27 @@
'''
lparser.py
Depends on ldump.py for lua dump deserialization.
Depends on lundump.py for lua dump deserialization.
An experimental bytecode decompiler.
'''
from operator import concat
from subprocess import call
from lundump import Chunk, LuaUndump, Constant, Instruction, InstructionType, Opcodes
from xmlrpc.client import Boolean
from lundump import Chunk, Constant, Instruction, Opcodes, whichRK, readRKasK
class _Scope:
def __init__(self, startPC: int, endPC: int):
self.startPC = startPC
self.endPC = endPC
class _Traceback:
def __init__(self):
self.sets = []
self.uses = []
self.isConst = False
class LuaDecomp:
def __init__(self, chunk: Chunk):
self.chunk = chunk
@ -22,16 +29,19 @@ class LuaDecomp:
self.scope = []
self.top = {}
self.locals = {}
self.traceback = {}
self.unknownLocalCount = 0
self.src: str = ""
# configurations!
self.aggressiveLocals = False # should *EVERY* accessed register be considered a local?
self.aggressiveLocals = False # should *EVERY* set register be considered a local?
self.indexWidth = 4 # how many spaces for indentions?
self.__loadLocals()
# parse instructions
while self.pc < len(self.chunk.instructions):
self.parseExpr()
self.parseInstr()
self.pc += 1
# end the scope (if we're supposed too)
@ -40,7 +50,85 @@ class LuaDecomp:
print("\n==== [[" + str(self.chunk.name) + "'s decompiled source]] ====\n")
print(self.src)
# =======================================[[ Helpers ]]=========================================
def __getInstrAtPC(self, pc: int) -> Instruction:
if pc < len(self.chunk.instructions):
return self.chunk.instructions[pc]
raise Exception("Decompilation failed!")
def __getNextInstr(self) -> Instruction:
return self.__getInstrAtPC(self.pc + 1)
def __getCurrInstr(self) -> Instruction:
return self.__getInstrAtPC(self.pc)
# when we read from a register, call this
def __addUseTraceback(self, reg: int) -> None:
if not self.pc in self.traceback:
self.traceback[self.pc] = _Traceback()
self.traceback[self.pc].uses.append(reg)
# when we write from a register, call this
def __addSetTraceback(self, reg: int) -> None:
if not self.pc in self.traceback:
self.traceback[self.pc] = _Traceback()
self.traceback[self.pc].sets.append(reg)
# walks traceback, if local wasn't set before, the local needs to be defined
def __needsDefined(self, reg) -> Boolean:
for _, trace in self.traceback.items():
if reg in trace.sets:
return False
# wasn't set in traceback! needs defined!
return True
def __loadLocals(self):
for i in range(len(self.chunk.locals)):
if not self.chunk.locals[i].name == "":
self.locals[i] = self.chunk.locals[i].name
else:
self.__makeLocalIdentifier(i)
def __addExpr(self, code: str) -> None:
self.src += code
def __startStatement(self):
self.src += '\n' + (' ' * self.indexWidth * len(self.scope))
def __getReg(self, indx: int) -> str:
self.__addUseTraceback(indx)
# if the top indx is a local, get it
return self.locals[indx] if indx in self.locals else self.top[indx]
def __setReg(self, indx: int, code: str) -> None:
# if the top indx is a local, set it
if indx in self.locals:
if self.__needsDefined(indx):
self.__newLocal(indx, code)
else:
self.__startStatement()
self.__addExpr(self.locals[indx] + " = " + code)
elif self.aggressiveLocals: # 'every register is a local!!'
self.__newLocal(indx, code)
self.__addSetTraceback(indx)
self.top[indx] = code
# ========================================[[ Locals ]]=========================================
def __makeLocalIdentifier(self, indx: int) -> str:
# first, check if we have a local name already determined
if indx in self.locals:
return self.locals[indx]
# otherwise, generate a local
self.locals[indx] = "__unknLocal%d" % self.unknownLocalCount
self.unknownLocalCount += 1
@ -53,34 +141,7 @@ class LuaDecomp:
self.__startStatement()
self.__addExpr("local " + self.locals[indx] + " = " + expr)
def __getNextInstr(self) -> Instruction:
if self.pc + 1 < len(self.chunk.instructions):
return self.chunk.instructions[self.pc + 1]
return None
def __getCurrInstr(self) -> Instruction:
return self.chunk.instructions[self.pc]
def __addExpr(self, code: str) -> None:
self.src += code
def __startStatement(self):
self.src += '\n' + (' ' * self.indexWidth * len(self.scope))
def __getReg(self, indx: int) -> str:
# if the top indx is a local, get it
return self.locals[indx] if indx in self.locals else self.top[indx]
def __setReg(self, indx: int, code: str) -> None:
# if the top indx is a local, set it
if indx in self.locals:
self.__startStatement()
self.__addExpr(self.locals[indx] + " = " + code)
elif self.aggressiveLocals: # 'every register is a local!!'
self.__newLocal(indx, code)
self.top[indx] = code
# ========================================[[ Scopes ]]=========================================
def __startScope(self, scopeType: str, size: int) -> None:
self.__addExpr(scopeType)
@ -99,17 +160,43 @@ class LuaDecomp:
self.__startStatement()
self.__addExpr("end")
# =====================================[[ Instructions ]]======================================
def __emitOperand(self, a: int, b: str, c: str, op: str) -> None:
self.__setReg(a, "(" + b + op + c + ")")
def __compJmp(self, op: str):
instr = self.__getCurrInstr()
jmpType = "if"
scopeStart = "then"
# we need to check if the jmp location has a jump back (if so, it's a while loop)
jmp = self.__getNextInstr().B + 1
jmpToInstr = self.__getInstrAtPC(self.pc + jmp)
if jmpToInstr.opcode == Opcodes.JMP:
# if this jump jumps back to this compJmp, it's a loop!
if self.pc + jmp + jmpToInstr.B <= self.pc + 1:
jmpType = "while"
scopeStart = "do"
self.__startStatement()
if instr.A > 0:
self.__addExpr("%s not " % jmpType)
else:
self.__addExpr("%s " % jmpType)
self.__addExpr(self.__readRK(instr.B) + op + self.__readRK(instr.C) + " ")
self.__startScope("%s " % scopeStart, jmp)
self.pc += 1 # skip next instr
# 'RK's are special in because can be a register or a konstant. a bitflag is read to determine which
def __readRK(self, rk: int) -> str:
if (rk & (1 << 8)) > 0:
return self.chunk.constants[(rk & ~(1 << 8))].toCode()
if (whichRK(rk)) > 0:
return self.chunk.getConstant(readRKasK(rk)).toCode()
else:
return self.__getReg(rk)
def parseExpr(self):
def parseInstr(self):
instr = self.__getCurrInstr()
# python, add switch statements *please*
@ -117,19 +204,19 @@ class LuaDecomp:
# move registers
self.__setReg(instr.A, self.__getReg(instr.B))
elif instr.opcode == Opcodes.LOADK:
self.__setReg(instr.A, self.chunk.constants[instr.B].toCode())
self.__setReg(instr.A, self.chunk.getConstant(instr.B).toCode())
elif instr.opcode == Opcodes.LOADBOOL:
if instr.B == 0:
self.__setReg(instr.A, "false")
else:
self.__setReg(instr.A, "true")
elif instr.opcode == Opcodes.GETGLOBAL:
self.__setReg(instr.A, self.chunk.constants[instr.B].data)
self.__setReg(instr.A, self.chunk.getConstant(instr.B).data)
elif instr.opcode == Opcodes.GETTABLE:
self.__setReg(instr.A, self.__getReg(instr.B) + "[" + self.__readRK(instr.C) + "]")
elif instr.opcode == Opcodes.SETGLOBAL:
self.__startStatement()
self.__addExpr(self.chunk.constants[instr.B].data + " = " + self.__getReg(instr.A))
self.__addExpr(self.chunk.getConstant(instr.B).data + " = " + self.__getReg(instr.A))
elif instr.opcode == Opcodes.SETTABLE:
self.__startStatement()
self.__addExpr(self.__getReg(instr.A) + "[" + self.__readRK(instr.B) + "] = " + self.__readRK(instr.C))
@ -148,7 +235,7 @@ class LuaDecomp:
elif instr.opcode == Opcodes.UNM:
self.__setReg(instr.A, "-" + self.__getReg(instr.B))
elif instr.opcode == Opcodes.NOT:
self.__setReg(instr.A, "!" + self.__getReg(instr.B))
self.__setReg(instr.A, "not " + self.__getReg(instr.B))
elif instr.opcode == Opcodes.LEN:
self.__setReg(instr.A, "#" + self.__getCurrInstr(instr.B))
elif instr.opcode == Opcodes.CONCAT:
@ -163,19 +250,11 @@ class LuaDecomp:
elif instr.opcode == Opcodes.JMP:
pass
elif instr.opcode == Opcodes.EQ:
self.__startStatement()
if instr.A > 0:
self.__addExpr("if not ")
else:
self.__addExpr("if ")
self.__addExpr(self.__readRK(instr.B) + " == " + self.__readRK(instr.C) + " ")
self.__startScope("then ", self.__getNextInstr().B + 1)
self.pc += 1 # skip next instr
self.__compJmp(" == ")
elif instr.opcode == Opcodes.LT:
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " < ")
self.__compJmp(" < ")
elif instr.opcode == Opcodes.LE:
self.__emitOperand(instr.A, instr.B, instr.C, " <= ")
self.__compJmp(" <= ")
elif instr.opcode == Opcodes.CALL:
preStr = ""
callStr = ""

View File

@ -68,7 +68,15 @@ class ConstType(IntEnum):
_RKBCInstr = [Opcodes.SETTABLE, Opcodes.ADD, Opcodes.SUB, Opcodes.MUL, Opcodes.DIV, Opcodes.MOD, Opcodes.POW, Opcodes.EQ, Opcodes.LT]
_RKCInstr = [Opcodes.GETTABLE, Opcodes.SELF]
_KBx = [Opcodes.LOADK, Opcodes.GETGLOBAL]
_KBx = [Opcodes.LOADK, Opcodes.GETGLOBAL, Opcodes.SETGLOBAL]
# is an 'RK' value a K? (result is true for K, false for R)
def whichRK(rk: int):
return (rk & (1 << 8)) > 0
# read an RK as a K
def readRKasK(rk: int):
return (rk & ~(1 << 8))
class Instruction:
def __init__(self, type: InstructionType, name: str) -> None:
@ -80,9 +88,9 @@ class Instruction:
self.C: int = None
# 'RK's are special in because can be a register or a konstant. a bitflag is read to determine which
def __readRK(self, rk: int) -> str:
if (rk & (1 << 8)) > 0:
return "K[" + str((rk & ~(1 << 8))) + "]"
def __formatRK(self, rk: int) -> str:
if whichRK(rk):
return "K[" + str(readRKasK(rk)) + "]"
else:
return "R[" + str(rk) + "]"
@ -93,28 +101,39 @@ class Instruction:
if self.type == InstructionType.ABC:
# by default, treat them as registers
A = "R[%d]" % self.A
B = "R[%d]" % self.B
C = "R[%d]" % self.C
B = "%d" % self.B
C = "%d" % self.C
# these opcodes have RKs for B & C
if self.opcode in _RKBCInstr:
B = self.__readRK(self.B)
C = self.__readRK(self.C)
B = self.__formatRK(self.B)
C = self.__formatRK(self.C)
elif self.opcode in _RKCInstr: # just for C
C = self.__readRK(self.C)
C = self.__formatRK(self.C)
regs = "%s %s %s" % (A, B, C)
regs = "%6s %6s %6s" % (A, B, C)
elif self.type == InstructionType.ABx or self.type == InstructionType.AsBx:
A = "R[%d]" % self.A
B = "R[%d]" % self.B
B = "%d" % self.B
if self.opcode in _KBx:
B = "K[%d]" % self.B
regs = "%s %s" % (A, B)
regs = "%6s %6s" % (A, B)
return "%s : %s" % (instr, regs)
def getAnnotation(self, chunk):
if self.opcode == Opcodes.MOVE:
return "move R[%d] into R[%d]" % (self.B, self.A)
elif self.opcode == Opcodes.LOADK:
return "load %s into R[%d]" % (chunk.getConstant(self.B).toCode(), self.A)
elif self.opcode == Opcodes.CONCAT:
count = self.C - self.B + 1
return "concat %d values from R[%d] to R[%d], store into R[%d]" % (count, self.B, self.C, self.A)
else:
return ""
class Constant:
def __init__(self, type: ConstType, data) -> None:
self.type = type
@ -180,6 +199,9 @@ class Chunk:
# there's no local information (may have been stripped)
return None
def getConstant(self, indx: int) -> Constant:
return self.constants[indx]
def print(self):
print("\n==== [[" + str(self.name) + "'s constants]] ====\n")
for z in range(len(self.constants)):
@ -188,7 +210,7 @@ class Chunk:
print("\n==== [[" + str(self.name) + "'s dissassembly]] ====\n")
for i in range(len(self.instructions)):
print("[%3d] %s" % (i, self.instructions[i].toString()))
print("[%3d] %-40s ; %s" % (i, self.instructions[i].toString(), self.instructions[i].getAnnotation(self)))
if len(self.protos) > 0:
print("\n==== [[" + str(self.name) + "'s protos]] ====\n")