Compare commits

...

10 Commits

3 changed files with 195 additions and 88 deletions

View File

@ -2,48 +2,54 @@
An experimental Lua 5.1 dump decompiler (typically dumped using `luac -o <out.luac> <script.lua>`). An experimental Lua 5.1 dump decompiler (typically dumped using `luac -o <out.luac> <script.lua>`).
You will quickly find that only **extremely** simple scripts are decompiled successfully right now. This is an experimental project and not all opcodes are properly handled for now. If you need a real decompiler I would recommend any of the handful of ones that exist already.
## Why?
Lua has a relatively small instruction set (only 38 different opcodes!). This makes it pretty feasible for a weekend decompiler project. (real) Decompilers are extremely complex pieces of software, so being able to write a simpler one helps show the theory without *much* of the headache.
## Example usage ## Example usage
```sh ```sh
> cat example.lua && luac5.1 -o example.luac example.lua > cat example.lua && luac5.1 -o example.luac example.lua
pp = "pri" .. "nt" local i, x = 0, 2
if 2 + 2 == 4 then while i < 10 do
_G[pp]("Hello world") print(i + x)
i = i + 1
end end
> python main.py example.luac > python main.py example.luac
example.luac example.luac
==== [[example.lua's constants]] ==== ==== [[example.lua's constants]] ====
0: [STRING] pp 0: [NUMBER] 0.0
1: [STRING] pri 1: [NUMBER] 2.0
2: [STRING] nt 2: [NUMBER] 10.0
3: [NUMBER] 4.0 3: [STRING] print
4: [STRING] _G 4: [NUMBER] 1.0
5: [STRING] Hello world
==== [[example.lua's dissassembly]] ==== ==== [[example.lua's dissassembly]] ====
[ 0] LOADK : R[0] K[1] [ 0] LOADK : R[0] K[0] ; load 0.0 into R[0]
[ 1] LOADK : R[1] K[2] [ 1] LOADK : R[1] K[1] ; load 2.0 into R[1]
[ 2] CONCAT : R[0] R[0] R[1] [ 2] LT : R[0] R[0] K[2] ;
[ 3] SETGLOBAL : R[0] R[0] [ 3] JMP : R[0] 5 ;
[ 4] EQ : R[0] K[3] K[3] [ 4] GETGLOBAL : R[2] K[3] ;
[ 5] JMP : R[0] R[5] [ 5] ADD : R[3] R[0] R[1] ;
[ 6] GETGLOBAL : R[0] K[4] [ 6] CALL : R[2] 2 1 ;
[ 7] GETGLOBAL : R[1] K[0] [ 7] ADD : R[0] R[0] K[4] ;
[ 8] GETTABLE : R[0] R[0] R[1] [ 8] JMP : R[0] -7 ;
[ 9] LOADK : R[1] K[5] [ 9] RETURN : R[0] 1 0 ;
[ 10] CALL : R[0] R[2] R[1]
[ 11] RETURN : R[0] R[1] R[0]
==== [[example.lua's decompiled source]] ==== ==== [[example.lua's decompiled source]] ====
pp = "pri" .. "nt" local i = 0.0
if 4.0 == 4.0 then local x = 2.0
_G[pp]("Hello world") while i < 10.0 do
print((i + x))
i = (i + 1.0)
end end
``` ```

View File

@ -1,20 +1,27 @@
''' '''
lparser.py lparser.py
Depends on ldump.py for lua dump deserialization. Depends on lundump.py for lua dump deserialization.
An experimental bytecode decompiler. An experimental bytecode decompiler.
''' '''
from operator import concat from operator import concat
from subprocess import call from subprocess import call
from lundump import Chunk, LuaUndump, Constant, Instruction, InstructionType, Opcodes from xmlrpc.client import Boolean
from lundump import Chunk, Constant, Instruction, Opcodes, whichRK, readRKasK
class _Scope: class _Scope:
def __init__(self, startPC: int, endPC: int): def __init__(self, startPC: int, endPC: int):
self.startPC = startPC self.startPC = startPC
self.endPC = endPC self.endPC = endPC
class _Traceback:
def __init__(self):
self.sets = []
self.uses = []
self.isConst = False
class LuaDecomp: class LuaDecomp:
def __init__(self, chunk: Chunk): def __init__(self, chunk: Chunk):
self.chunk = chunk self.chunk = chunk
@ -22,16 +29,19 @@ class LuaDecomp:
self.scope = [] self.scope = []
self.top = {} self.top = {}
self.locals = {} self.locals = {}
self.traceback = {}
self.unknownLocalCount = 0 self.unknownLocalCount = 0
self.src: str = "" self.src: str = ""
# configurations! # configurations!
self.aggressiveLocals = False # should *EVERY* accessed register be considered a local? self.aggressiveLocals = False # should *EVERY* set register be considered a local?
self.indexWidth = 4 # how many spaces for indentions? self.indexWidth = 4 # how many spaces for indentions?
self.__loadLocals()
# parse instructions # parse instructions
while self.pc < len(self.chunk.instructions): while self.pc < len(self.chunk.instructions):
self.parseExpr() self.parseInstr()
self.pc += 1 self.pc += 1
# end the scope (if we're supposed too) # end the scope (if we're supposed too)
@ -40,7 +50,85 @@ class LuaDecomp:
print("\n==== [[" + str(self.chunk.name) + "'s decompiled source]] ====\n") print("\n==== [[" + str(self.chunk.name) + "'s decompiled source]] ====\n")
print(self.src) print(self.src)
# =======================================[[ Helpers ]]=========================================
def __getInstrAtPC(self, pc: int) -> Instruction:
if pc < len(self.chunk.instructions):
return self.chunk.instructions[pc]
raise Exception("Decompilation failed!")
def __getNextInstr(self) -> Instruction:
return self.__getInstrAtPC(self.pc + 1)
def __getCurrInstr(self) -> Instruction:
return self.__getInstrAtPC(self.pc)
# when we read from a register, call this
def __addUseTraceback(self, reg: int) -> None:
if not self.pc in self.traceback:
self.traceback[self.pc] = _Traceback()
self.traceback[self.pc].uses.append(reg)
# when we write from a register, call this
def __addSetTraceback(self, reg: int) -> None:
if not self.pc in self.traceback:
self.traceback[self.pc] = _Traceback()
self.traceback[self.pc].sets.append(reg)
# walks traceback, if local wasn't set before, the local needs to be defined
def __needsDefined(self, reg) -> Boolean:
for _, trace in self.traceback.items():
if reg in trace.sets:
return False
# wasn't set in traceback! needs defined!
return True
def __loadLocals(self):
for i in range(len(self.chunk.locals)):
if not self.chunk.locals[i].name == "":
self.locals[i] = self.chunk.locals[i].name
else:
self.__makeLocalIdentifier(i)
def __addExpr(self, code: str) -> None:
self.src += code
def __startStatement(self):
self.src += '\n' + (' ' * self.indexWidth * len(self.scope))
def __getReg(self, indx: int) -> str:
self.__addUseTraceback(indx)
# if the top indx is a local, get it
return self.locals[indx] if indx in self.locals else self.top[indx]
def __setReg(self, indx: int, code: str) -> None:
# if the top indx is a local, set it
if indx in self.locals:
if self.__needsDefined(indx):
self.__newLocal(indx, code)
else:
self.__startStatement()
self.__addExpr(self.locals[indx] + " = " + code)
elif self.aggressiveLocals: # 'every register is a local!!'
self.__newLocal(indx, code)
self.__addSetTraceback(indx)
self.top[indx] = code
# ========================================[[ Locals ]]=========================================
def __makeLocalIdentifier(self, indx: int) -> str: def __makeLocalIdentifier(self, indx: int) -> str:
# first, check if we have a local name already determined
if indx in self.locals:
return self.locals[indx]
# otherwise, generate a local
self.locals[indx] = "__unknLocal%d" % self.unknownLocalCount self.locals[indx] = "__unknLocal%d" % self.unknownLocalCount
self.unknownLocalCount += 1 self.unknownLocalCount += 1
@ -53,34 +141,7 @@ class LuaDecomp:
self.__startStatement() self.__startStatement()
self.__addExpr("local " + self.locals[indx] + " = " + expr) self.__addExpr("local " + self.locals[indx] + " = " + expr)
def __getNextInstr(self) -> Instruction: # ========================================[[ Scopes ]]=========================================
if self.pc + 1 < len(self.chunk.instructions):
return self.chunk.instructions[self.pc + 1]
return None
def __getCurrInstr(self) -> Instruction:
return self.chunk.instructions[self.pc]
def __addExpr(self, code: str) -> None:
self.src += code
def __startStatement(self):
self.src += '\n' + (' ' * self.indexWidth * len(self.scope))
def __getReg(self, indx: int) -> str:
# if the top indx is a local, get it
return self.locals[indx] if indx in self.locals else self.top[indx]
def __setReg(self, indx: int, code: str) -> None:
# if the top indx is a local, set it
if indx in self.locals:
self.__startStatement()
self.__addExpr(self.locals[indx] + " = " + code)
elif self.aggressiveLocals: # 'every register is a local!!'
self.__newLocal(indx, code)
self.top[indx] = code
def __startScope(self, scopeType: str, size: int) -> None: def __startScope(self, scopeType: str, size: int) -> None:
self.__addExpr(scopeType) self.__addExpr(scopeType)
@ -99,17 +160,43 @@ class LuaDecomp:
self.__startStatement() self.__startStatement()
self.__addExpr("end") self.__addExpr("end")
# =====================================[[ Instructions ]]======================================
def __emitOperand(self, a: int, b: str, c: str, op: str) -> None: def __emitOperand(self, a: int, b: str, c: str, op: str) -> None:
self.__setReg(a, "(" + b + op + c + ")") self.__setReg(a, "(" + b + op + c + ")")
def __compJmp(self, op: str):
instr = self.__getCurrInstr()
jmpType = "if"
scopeStart = "then"
# we need to check if the jmp location has a jump back (if so, it's a while loop)
jmp = self.__getNextInstr().B + 1
jmpToInstr = self.__getInstrAtPC(self.pc + jmp)
if jmpToInstr.opcode == Opcodes.JMP:
# if this jump jumps back to this compJmp, it's a loop!
if self.pc + jmp + jmpToInstr.B <= self.pc + 1:
jmpType = "while"
scopeStart = "do"
self.__startStatement()
if instr.A > 0:
self.__addExpr("%s not " % jmpType)
else:
self.__addExpr("%s " % jmpType)
self.__addExpr(self.__readRK(instr.B) + op + self.__readRK(instr.C) + " ")
self.__startScope("%s " % scopeStart, jmp)
self.pc += 1 # skip next instr
# 'RK's are special in because can be a register or a konstant. a bitflag is read to determine which # 'RK's are special in because can be a register or a konstant. a bitflag is read to determine which
def __readRK(self, rk: int) -> str: def __readRK(self, rk: int) -> str:
if (rk & (1 << 8)) > 0: if (whichRK(rk)) > 0:
return self.chunk.constants[(rk & ~(1 << 8))].toCode() return self.chunk.getConstant(readRKasK(rk)).toCode()
else: else:
return self.__getReg(rk) return self.__getReg(rk)
def parseExpr(self): def parseInstr(self):
instr = self.__getCurrInstr() instr = self.__getCurrInstr()
# python, add switch statements *please* # python, add switch statements *please*
@ -117,19 +204,19 @@ class LuaDecomp:
# move registers # move registers
self.__setReg(instr.A, self.__getReg(instr.B)) self.__setReg(instr.A, self.__getReg(instr.B))
elif instr.opcode == Opcodes.LOADK: elif instr.opcode == Opcodes.LOADK:
self.__setReg(instr.A, self.chunk.constants[instr.B].toCode()) self.__setReg(instr.A, self.chunk.getConstant(instr.B).toCode())
elif instr.opcode == Opcodes.LOADBOOL: elif instr.opcode == Opcodes.LOADBOOL:
if instr.B == 0: if instr.B == 0:
self.__setReg(instr.A, "false") self.__setReg(instr.A, "false")
else: else:
self.__setReg(instr.A, "true") self.__setReg(instr.A, "true")
elif instr.opcode == Opcodes.GETGLOBAL: elif instr.opcode == Opcodes.GETGLOBAL:
self.__setReg(instr.A, self.chunk.constants[instr.B].data) self.__setReg(instr.A, self.chunk.getConstant(instr.B).data)
elif instr.opcode == Opcodes.GETTABLE: elif instr.opcode == Opcodes.GETTABLE:
self.__setReg(instr.A, self.__getReg(instr.B) + "[" + self.__readRK(instr.C) + "]") self.__setReg(instr.A, self.__getReg(instr.B) + "[" + self.__readRK(instr.C) + "]")
elif instr.opcode == Opcodes.SETGLOBAL: elif instr.opcode == Opcodes.SETGLOBAL:
self.__startStatement() self.__startStatement()
self.__addExpr(self.chunk.constants[instr.B].data + " = " + self.__getReg(instr.A)) self.__addExpr(self.chunk.getConstant(instr.B).data + " = " + self.__getReg(instr.A))
elif instr.opcode == Opcodes.SETTABLE: elif instr.opcode == Opcodes.SETTABLE:
self.__startStatement() self.__startStatement()
self.__addExpr(self.__getReg(instr.A) + "[" + self.__readRK(instr.B) + "] = " + self.__readRK(instr.C)) self.__addExpr(self.__getReg(instr.A) + "[" + self.__readRK(instr.B) + "] = " + self.__readRK(instr.C))
@ -148,7 +235,7 @@ class LuaDecomp:
elif instr.opcode == Opcodes.UNM: elif instr.opcode == Opcodes.UNM:
self.__setReg(instr.A, "-" + self.__getReg(instr.B)) self.__setReg(instr.A, "-" + self.__getReg(instr.B))
elif instr.opcode == Opcodes.NOT: elif instr.opcode == Opcodes.NOT:
self.__setReg(instr.A, "!" + self.__getReg(instr.B)) self.__setReg(instr.A, "not " + self.__getReg(instr.B))
elif instr.opcode == Opcodes.LEN: elif instr.opcode == Opcodes.LEN:
self.__setReg(instr.A, "#" + self.__getCurrInstr(instr.B)) self.__setReg(instr.A, "#" + self.__getCurrInstr(instr.B))
elif instr.opcode == Opcodes.CONCAT: elif instr.opcode == Opcodes.CONCAT:
@ -163,19 +250,11 @@ class LuaDecomp:
elif instr.opcode == Opcodes.JMP: elif instr.opcode == Opcodes.JMP:
pass pass
elif instr.opcode == Opcodes.EQ: elif instr.opcode == Opcodes.EQ:
self.__startStatement() self.__compJmp(" == ")
if instr.A > 0:
self.__addExpr("if not ")
else:
self.__addExpr("if ")
self.__addExpr(self.__readRK(instr.B) + " == " + self.__readRK(instr.C) + " ")
self.__startScope("then ", self.__getNextInstr().B + 1)
self.pc += 1 # skip next instr
elif instr.opcode == Opcodes.LT: elif instr.opcode == Opcodes.LT:
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " < ") self.__compJmp(" < ")
elif instr.opcode == Opcodes.LE: elif instr.opcode == Opcodes.LE:
self.__emitOperand(instr.A, instr.B, instr.C, " <= ") self.__compJmp(" <= ")
elif instr.opcode == Opcodes.CALL: elif instr.opcode == Opcodes.CALL:
preStr = "" preStr = ""
callStr = "" callStr = ""

View File

@ -68,7 +68,15 @@ class ConstType(IntEnum):
_RKBCInstr = [Opcodes.SETTABLE, Opcodes.ADD, Opcodes.SUB, Opcodes.MUL, Opcodes.DIV, Opcodes.MOD, Opcodes.POW, Opcodes.EQ, Opcodes.LT] _RKBCInstr = [Opcodes.SETTABLE, Opcodes.ADD, Opcodes.SUB, Opcodes.MUL, Opcodes.DIV, Opcodes.MOD, Opcodes.POW, Opcodes.EQ, Opcodes.LT]
_RKCInstr = [Opcodes.GETTABLE, Opcodes.SELF] _RKCInstr = [Opcodes.GETTABLE, Opcodes.SELF]
_KBx = [Opcodes.LOADK, Opcodes.GETGLOBAL] _KBx = [Opcodes.LOADK, Opcodes.GETGLOBAL, Opcodes.SETGLOBAL]
# is an 'RK' value a K? (result is true for K, false for R)
def whichRK(rk: int):
return (rk & (1 << 8)) > 0
# read an RK as a K
def readRKasK(rk: int):
return (rk & ~(1 << 8))
class Instruction: class Instruction:
def __init__(self, type: InstructionType, name: str) -> None: def __init__(self, type: InstructionType, name: str) -> None:
@ -80,9 +88,9 @@ class Instruction:
self.C: int = None self.C: int = None
# 'RK's are special in because can be a register or a konstant. a bitflag is read to determine which # 'RK's are special in because can be a register or a konstant. a bitflag is read to determine which
def __readRK(self, rk: int) -> str: def __formatRK(self, rk: int) -> str:
if (rk & (1 << 8)) > 0: if whichRK(rk):
return "K[" + str((rk & ~(1 << 8))) + "]" return "K[" + str(readRKasK(rk)) + "]"
else: else:
return "R[" + str(rk) + "]" return "R[" + str(rk) + "]"
@ -93,28 +101,39 @@ class Instruction:
if self.type == InstructionType.ABC: if self.type == InstructionType.ABC:
# by default, treat them as registers # by default, treat them as registers
A = "R[%d]" % self.A A = "R[%d]" % self.A
B = "R[%d]" % self.B B = "%d" % self.B
C = "R[%d]" % self.C C = "%d" % self.C
# these opcodes have RKs for B & C # these opcodes have RKs for B & C
if self.opcode in _RKBCInstr: if self.opcode in _RKBCInstr:
B = self.__readRK(self.B) B = self.__formatRK(self.B)
C = self.__readRK(self.C) C = self.__formatRK(self.C)
elif self.opcode in _RKCInstr: # just for C elif self.opcode in _RKCInstr: # just for C
C = self.__readRK(self.C) C = self.__formatRK(self.C)
regs = "%s %s %s" % (A, B, C) regs = "%6s %6s %6s" % (A, B, C)
elif self.type == InstructionType.ABx or self.type == InstructionType.AsBx: elif self.type == InstructionType.ABx or self.type == InstructionType.AsBx:
A = "R[%d]" % self.A A = "R[%d]" % self.A
B = "R[%d]" % self.B B = "%d" % self.B
if self.opcode in _KBx: if self.opcode in _KBx:
B = "K[%d]" % self.B B = "K[%d]" % self.B
regs = "%s %s" % (A, B) regs = "%6s %6s" % (A, B)
return "%s : %s" % (instr, regs) return "%s : %s" % (instr, regs)
def getAnnotation(self, chunk):
if self.opcode == Opcodes.MOVE:
return "move R[%d] into R[%d]" % (self.B, self.A)
elif self.opcode == Opcodes.LOADK:
return "load %s into R[%d]" % (chunk.getConstant(self.B).toCode(), self.A)
elif self.opcode == Opcodes.CONCAT:
count = self.C - self.B + 1
return "concat %d values from R[%d] to R[%d], store into R[%d]" % (count, self.B, self.C, self.A)
else:
return ""
class Constant: class Constant:
def __init__(self, type: ConstType, data) -> None: def __init__(self, type: ConstType, data) -> None:
self.type = type self.type = type
@ -180,6 +199,9 @@ class Chunk:
# there's no local information (may have been stripped) # there's no local information (may have been stripped)
return None return None
def getConstant(self, indx: int) -> Constant:
return self.constants[indx]
def print(self): def print(self):
print("\n==== [[" + str(self.name) + "'s constants]] ====\n") print("\n==== [[" + str(self.name) + "'s constants]] ====\n")
for z in range(len(self.constants)): for z in range(len(self.constants)):
@ -188,7 +210,7 @@ class Chunk:
print("\n==== [[" + str(self.name) + "'s dissassembly]] ====\n") print("\n==== [[" + str(self.name) + "'s dissassembly]] ====\n")
for i in range(len(self.instructions)): for i in range(len(self.instructions)):
print("[%3d] %s" % (i, self.instructions[i].toString())) print("[%3d] %-40s ; %s" % (i, self.instructions[i].toString(), self.instructions[i].getAnnotation(self)))
if len(self.protos) > 0: if len(self.protos) > 0:
print("\n==== [[" + str(self.name) + "'s protos]] ====\n") print("\n==== [[" + str(self.name) + "'s protos]] ====\n")