mirror of
https://github.com/CPunch/LuaDecompy.git
synced 2024-12-04 21:55:08 +00:00
Compare commits
10 Commits
ac0b7039d2
...
368ff62538
Author | SHA1 | Date | |
---|---|---|---|
368ff62538 | |||
a6623c8953 | |||
0f72e71a59 | |||
b8bf02f7d0 | |||
95ca3bb26b | |||
78e137d033 | |||
875e91636b | |||
055af56e27 | |||
eb1d3ffe87 | |||
2258888956 |
56
README.md
56
README.md
@ -2,48 +2,54 @@
|
||||
|
||||
An experimental Lua 5.1 dump decompiler (typically dumped using `luac -o <out.luac> <script.lua>`).
|
||||
|
||||
You will quickly find that only **extremely** simple scripts are decompiled successfully right now. This is an experimental project and not all opcodes are properly handled for now. If you need a real decompiler I would recommend any of the handful of ones that exist already.
|
||||
|
||||
## Why?
|
||||
|
||||
Lua has a relatively small instruction set (only 38 different opcodes!). This makes it pretty feasible for a weekend decompiler project. (real) Decompilers are extremely complex pieces of software, so being able to write a simpler one helps show the theory without *much* of the headache.
|
||||
|
||||
## Example usage
|
||||
|
||||
```sh
|
||||
> cat example.lua && luac5.1 -o example.luac example.lua
|
||||
pp = "pri" .. "nt"
|
||||
local i, x = 0, 2
|
||||
|
||||
if 2 + 2 == 4 then
|
||||
_G[pp]("Hello world")
|
||||
while i < 10 do
|
||||
print(i + x)
|
||||
i = i + 1
|
||||
end
|
||||
|
||||
> python main.py example.luac
|
||||
example.luac
|
||||
|
||||
==== [[example.lua's constants]] ====
|
||||
|
||||
0: [STRING] pp
|
||||
1: [STRING] pri
|
||||
2: [STRING] nt
|
||||
3: [NUMBER] 4.0
|
||||
4: [STRING] _G
|
||||
5: [STRING] Hello world
|
||||
0: [NUMBER] 0.0
|
||||
1: [NUMBER] 2.0
|
||||
2: [NUMBER] 10.0
|
||||
3: [STRING] print
|
||||
4: [NUMBER] 1.0
|
||||
|
||||
==== [[example.lua's dissassembly]] ====
|
||||
|
||||
[ 0] LOADK : R[0] K[1]
|
||||
[ 1] LOADK : R[1] K[2]
|
||||
[ 2] CONCAT : R[0] R[0] R[1]
|
||||
[ 3] SETGLOBAL : R[0] R[0]
|
||||
[ 4] EQ : R[0] K[3] K[3]
|
||||
[ 5] JMP : R[0] R[5]
|
||||
[ 6] GETGLOBAL : R[0] K[4]
|
||||
[ 7] GETGLOBAL : R[1] K[0]
|
||||
[ 8] GETTABLE : R[0] R[0] R[1]
|
||||
[ 9] LOADK : R[1] K[5]
|
||||
[ 10] CALL : R[0] R[2] R[1]
|
||||
[ 11] RETURN : R[0] R[1] R[0]
|
||||
[ 0] LOADK : R[0] K[0] ; load 0.0 into R[0]
|
||||
[ 1] LOADK : R[1] K[1] ; load 2.0 into R[1]
|
||||
[ 2] LT : R[0] R[0] K[2] ;
|
||||
[ 3] JMP : R[0] 5 ;
|
||||
[ 4] GETGLOBAL : R[2] K[3] ;
|
||||
[ 5] ADD : R[3] R[0] R[1] ;
|
||||
[ 6] CALL : R[2] 2 1 ;
|
||||
[ 7] ADD : R[0] R[0] K[4] ;
|
||||
[ 8] JMP : R[0] -7 ;
|
||||
[ 9] RETURN : R[0] 1 0 ;
|
||||
|
||||
==== [[example.lua's decompiled source]] ====
|
||||
|
||||
|
||||
pp = "pri" .. "nt"
|
||||
if 4.0 == 4.0 then
|
||||
_G[pp]("Hello world")
|
||||
local i = 0.0
|
||||
local x = 2.0
|
||||
while i < 10.0 do
|
||||
print((i + x))
|
||||
i = (i + 1.0)
|
||||
end
|
||||
|
||||
```
|
179
lparser.py
179
lparser.py
@ -1,20 +1,27 @@
|
||||
'''
|
||||
lparser.py
|
||||
|
||||
Depends on ldump.py for lua dump deserialization.
|
||||
Depends on lundump.py for lua dump deserialization.
|
||||
|
||||
An experimental bytecode decompiler.
|
||||
'''
|
||||
|
||||
from operator import concat
|
||||
from subprocess import call
|
||||
from lundump import Chunk, LuaUndump, Constant, Instruction, InstructionType, Opcodes
|
||||
from xmlrpc.client import Boolean
|
||||
from lundump import Chunk, Constant, Instruction, Opcodes, whichRK, readRKasK
|
||||
|
||||
class _Scope:
|
||||
def __init__(self, startPC: int, endPC: int):
|
||||
self.startPC = startPC
|
||||
self.endPC = endPC
|
||||
|
||||
class _Traceback:
|
||||
def __init__(self):
|
||||
self.sets = []
|
||||
self.uses = []
|
||||
self.isConst = False
|
||||
|
||||
class LuaDecomp:
|
||||
def __init__(self, chunk: Chunk):
|
||||
self.chunk = chunk
|
||||
@ -22,16 +29,19 @@ class LuaDecomp:
|
||||
self.scope = []
|
||||
self.top = {}
|
||||
self.locals = {}
|
||||
self.traceback = {}
|
||||
self.unknownLocalCount = 0
|
||||
self.src: str = ""
|
||||
|
||||
# configurations!
|
||||
self.aggressiveLocals = False # should *EVERY* accessed register be considered a local?
|
||||
self.aggressiveLocals = False # should *EVERY* set register be considered a local?
|
||||
self.indexWidth = 4 # how many spaces for indentions?
|
||||
|
||||
self.__loadLocals()
|
||||
|
||||
# parse instructions
|
||||
while self.pc < len(self.chunk.instructions):
|
||||
self.parseExpr()
|
||||
self.parseInstr()
|
||||
self.pc += 1
|
||||
|
||||
# end the scope (if we're supposed too)
|
||||
@ -40,7 +50,85 @@ class LuaDecomp:
|
||||
print("\n==== [[" + str(self.chunk.name) + "'s decompiled source]] ====\n")
|
||||
print(self.src)
|
||||
|
||||
# =======================================[[ Helpers ]]=========================================
|
||||
|
||||
def __getInstrAtPC(self, pc: int) -> Instruction:
|
||||
if pc < len(self.chunk.instructions):
|
||||
return self.chunk.instructions[pc]
|
||||
|
||||
raise Exception("Decompilation failed!")
|
||||
|
||||
def __getNextInstr(self) -> Instruction:
|
||||
return self.__getInstrAtPC(self.pc + 1)
|
||||
|
||||
def __getCurrInstr(self) -> Instruction:
|
||||
return self.__getInstrAtPC(self.pc)
|
||||
|
||||
# when we read from a register, call this
|
||||
def __addUseTraceback(self, reg: int) -> None:
|
||||
if not self.pc in self.traceback:
|
||||
self.traceback[self.pc] = _Traceback()
|
||||
|
||||
self.traceback[self.pc].uses.append(reg)
|
||||
|
||||
# when we write from a register, call this
|
||||
def __addSetTraceback(self, reg: int) -> None:
|
||||
if not self.pc in self.traceback:
|
||||
self.traceback[self.pc] = _Traceback()
|
||||
|
||||
self.traceback[self.pc].sets.append(reg)
|
||||
|
||||
# walks traceback, if local wasn't set before, the local needs to be defined
|
||||
def __needsDefined(self, reg) -> Boolean:
|
||||
for _, trace in self.traceback.items():
|
||||
if reg in trace.sets:
|
||||
return False
|
||||
|
||||
# wasn't set in traceback! needs defined!
|
||||
return True
|
||||
|
||||
def __loadLocals(self):
|
||||
for i in range(len(self.chunk.locals)):
|
||||
if not self.chunk.locals[i].name == "":
|
||||
self.locals[i] = self.chunk.locals[i].name
|
||||
else:
|
||||
self.__makeLocalIdentifier(i)
|
||||
|
||||
def __addExpr(self, code: str) -> None:
|
||||
self.src += code
|
||||
|
||||
def __startStatement(self):
|
||||
self.src += '\n' + (' ' * self.indexWidth * len(self.scope))
|
||||
|
||||
def __getReg(self, indx: int) -> str:
|
||||
self.__addUseTraceback(indx)
|
||||
|
||||
# if the top indx is a local, get it
|
||||
return self.locals[indx] if indx in self.locals else self.top[indx]
|
||||
|
||||
def __setReg(self, indx: int, code: str) -> None:
|
||||
# if the top indx is a local, set it
|
||||
if indx in self.locals:
|
||||
if self.__needsDefined(indx):
|
||||
self.__newLocal(indx, code)
|
||||
else:
|
||||
self.__startStatement()
|
||||
self.__addExpr(self.locals[indx] + " = " + code)
|
||||
elif self.aggressiveLocals: # 'every register is a local!!'
|
||||
self.__newLocal(indx, code)
|
||||
|
||||
|
||||
self.__addSetTraceback(indx)
|
||||
self.top[indx] = code
|
||||
|
||||
# ========================================[[ Locals ]]=========================================
|
||||
|
||||
def __makeLocalIdentifier(self, indx: int) -> str:
|
||||
# first, check if we have a local name already determined
|
||||
if indx in self.locals:
|
||||
return self.locals[indx]
|
||||
|
||||
# otherwise, generate a local
|
||||
self.locals[indx] = "__unknLocal%d" % self.unknownLocalCount
|
||||
self.unknownLocalCount += 1
|
||||
|
||||
@ -53,34 +141,7 @@ class LuaDecomp:
|
||||
self.__startStatement()
|
||||
self.__addExpr("local " + self.locals[indx] + " = " + expr)
|
||||
|
||||
def __getNextInstr(self) -> Instruction:
|
||||
if self.pc + 1 < len(self.chunk.instructions):
|
||||
return self.chunk.instructions[self.pc + 1]
|
||||
|
||||
return None
|
||||
|
||||
def __getCurrInstr(self) -> Instruction:
|
||||
return self.chunk.instructions[self.pc]
|
||||
|
||||
def __addExpr(self, code: str) -> None:
|
||||
self.src += code
|
||||
|
||||
def __startStatement(self):
|
||||
self.src += '\n' + (' ' * self.indexWidth * len(self.scope))
|
||||
|
||||
def __getReg(self, indx: int) -> str:
|
||||
# if the top indx is a local, get it
|
||||
return self.locals[indx] if indx in self.locals else self.top[indx]
|
||||
|
||||
def __setReg(self, indx: int, code: str) -> None:
|
||||
# if the top indx is a local, set it
|
||||
if indx in self.locals:
|
||||
self.__startStatement()
|
||||
self.__addExpr(self.locals[indx] + " = " + code)
|
||||
elif self.aggressiveLocals: # 'every register is a local!!'
|
||||
self.__newLocal(indx, code)
|
||||
|
||||
self.top[indx] = code
|
||||
# ========================================[[ Scopes ]]=========================================
|
||||
|
||||
def __startScope(self, scopeType: str, size: int) -> None:
|
||||
self.__addExpr(scopeType)
|
||||
@ -99,17 +160,43 @@ class LuaDecomp:
|
||||
self.__startStatement()
|
||||
self.__addExpr("end")
|
||||
|
||||
# =====================================[[ Instructions ]]======================================
|
||||
|
||||
def __emitOperand(self, a: int, b: str, c: str, op: str) -> None:
|
||||
self.__setReg(a, "(" + b + op + c + ")")
|
||||
|
||||
def __compJmp(self, op: str):
|
||||
instr = self.__getCurrInstr()
|
||||
jmpType = "if"
|
||||
scopeStart = "then"
|
||||
|
||||
# we need to check if the jmp location has a jump back (if so, it's a while loop)
|
||||
jmp = self.__getNextInstr().B + 1
|
||||
jmpToInstr = self.__getInstrAtPC(self.pc + jmp)
|
||||
|
||||
if jmpToInstr.opcode == Opcodes.JMP:
|
||||
# if this jump jumps back to this compJmp, it's a loop!
|
||||
if self.pc + jmp + jmpToInstr.B <= self.pc + 1:
|
||||
jmpType = "while"
|
||||
scopeStart = "do"
|
||||
|
||||
self.__startStatement()
|
||||
if instr.A > 0:
|
||||
self.__addExpr("%s not " % jmpType)
|
||||
else:
|
||||
self.__addExpr("%s " % jmpType)
|
||||
self.__addExpr(self.__readRK(instr.B) + op + self.__readRK(instr.C) + " ")
|
||||
self.__startScope("%s " % scopeStart, jmp)
|
||||
self.pc += 1 # skip next instr
|
||||
|
||||
# 'RK's are special in because can be a register or a konstant. a bitflag is read to determine which
|
||||
def __readRK(self, rk: int) -> str:
|
||||
if (rk & (1 << 8)) > 0:
|
||||
return self.chunk.constants[(rk & ~(1 << 8))].toCode()
|
||||
if (whichRK(rk)) > 0:
|
||||
return self.chunk.getConstant(readRKasK(rk)).toCode()
|
||||
else:
|
||||
return self.__getReg(rk)
|
||||
|
||||
def parseExpr(self):
|
||||
def parseInstr(self):
|
||||
instr = self.__getCurrInstr()
|
||||
|
||||
# python, add switch statements *please*
|
||||
@ -117,19 +204,19 @@ class LuaDecomp:
|
||||
# move registers
|
||||
self.__setReg(instr.A, self.__getReg(instr.B))
|
||||
elif instr.opcode == Opcodes.LOADK:
|
||||
self.__setReg(instr.A, self.chunk.constants[instr.B].toCode())
|
||||
self.__setReg(instr.A, self.chunk.getConstant(instr.B).toCode())
|
||||
elif instr.opcode == Opcodes.LOADBOOL:
|
||||
if instr.B == 0:
|
||||
self.__setReg(instr.A, "false")
|
||||
else:
|
||||
self.__setReg(instr.A, "true")
|
||||
elif instr.opcode == Opcodes.GETGLOBAL:
|
||||
self.__setReg(instr.A, self.chunk.constants[instr.B].data)
|
||||
self.__setReg(instr.A, self.chunk.getConstant(instr.B).data)
|
||||
elif instr.opcode == Opcodes.GETTABLE:
|
||||
self.__setReg(instr.A, self.__getReg(instr.B) + "[" + self.__readRK(instr.C) + "]")
|
||||
elif instr.opcode == Opcodes.SETGLOBAL:
|
||||
self.__startStatement()
|
||||
self.__addExpr(self.chunk.constants[instr.B].data + " = " + self.__getReg(instr.A))
|
||||
self.__addExpr(self.chunk.getConstant(instr.B).data + " = " + self.__getReg(instr.A))
|
||||
elif instr.opcode == Opcodes.SETTABLE:
|
||||
self.__startStatement()
|
||||
self.__addExpr(self.__getReg(instr.A) + "[" + self.__readRK(instr.B) + "] = " + self.__readRK(instr.C))
|
||||
@ -148,7 +235,7 @@ class LuaDecomp:
|
||||
elif instr.opcode == Opcodes.UNM:
|
||||
self.__setReg(instr.A, "-" + self.__getReg(instr.B))
|
||||
elif instr.opcode == Opcodes.NOT:
|
||||
self.__setReg(instr.A, "!" + self.__getReg(instr.B))
|
||||
self.__setReg(instr.A, "not " + self.__getReg(instr.B))
|
||||
elif instr.opcode == Opcodes.LEN:
|
||||
self.__setReg(instr.A, "#" + self.__getCurrInstr(instr.B))
|
||||
elif instr.opcode == Opcodes.CONCAT:
|
||||
@ -163,19 +250,11 @@ class LuaDecomp:
|
||||
elif instr.opcode == Opcodes.JMP:
|
||||
pass
|
||||
elif instr.opcode == Opcodes.EQ:
|
||||
self.__startStatement()
|
||||
if instr.A > 0:
|
||||
self.__addExpr("if not ")
|
||||
else:
|
||||
self.__addExpr("if ")
|
||||
self.__addExpr(self.__readRK(instr.B) + " == " + self.__readRK(instr.C) + " ")
|
||||
self.__startScope("then ", self.__getNextInstr().B + 1)
|
||||
|
||||
self.pc += 1 # skip next instr
|
||||
self.__compJmp(" == ")
|
||||
elif instr.opcode == Opcodes.LT:
|
||||
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " < ")
|
||||
self.__compJmp(" < ")
|
||||
elif instr.opcode == Opcodes.LE:
|
||||
self.__emitOperand(instr.A, instr.B, instr.C, " <= ")
|
||||
self.__compJmp(" <= ")
|
||||
elif instr.opcode == Opcodes.CALL:
|
||||
preStr = ""
|
||||
callStr = ""
|
||||
|
48
lundump.py
48
lundump.py
@ -68,7 +68,15 @@ class ConstType(IntEnum):
|
||||
|
||||
_RKBCInstr = [Opcodes.SETTABLE, Opcodes.ADD, Opcodes.SUB, Opcodes.MUL, Opcodes.DIV, Opcodes.MOD, Opcodes.POW, Opcodes.EQ, Opcodes.LT]
|
||||
_RKCInstr = [Opcodes.GETTABLE, Opcodes.SELF]
|
||||
_KBx = [Opcodes.LOADK, Opcodes.GETGLOBAL]
|
||||
_KBx = [Opcodes.LOADK, Opcodes.GETGLOBAL, Opcodes.SETGLOBAL]
|
||||
|
||||
# is an 'RK' value a K? (result is true for K, false for R)
|
||||
def whichRK(rk: int):
|
||||
return (rk & (1 << 8)) > 0
|
||||
|
||||
# read an RK as a K
|
||||
def readRKasK(rk: int):
|
||||
return (rk & ~(1 << 8))
|
||||
|
||||
class Instruction:
|
||||
def __init__(self, type: InstructionType, name: str) -> None:
|
||||
@ -80,9 +88,9 @@ class Instruction:
|
||||
self.C: int = None
|
||||
|
||||
# 'RK's are special in because can be a register or a konstant. a bitflag is read to determine which
|
||||
def __readRK(self, rk: int) -> str:
|
||||
if (rk & (1 << 8)) > 0:
|
||||
return "K[" + str((rk & ~(1 << 8))) + "]"
|
||||
def __formatRK(self, rk: int) -> str:
|
||||
if whichRK(rk):
|
||||
return "K[" + str(readRKasK(rk)) + "]"
|
||||
else:
|
||||
return "R[" + str(rk) + "]"
|
||||
|
||||
@ -93,28 +101,39 @@ class Instruction:
|
||||
if self.type == InstructionType.ABC:
|
||||
# by default, treat them as registers
|
||||
A = "R[%d]" % self.A
|
||||
B = "R[%d]" % self.B
|
||||
C = "R[%d]" % self.C
|
||||
B = "%d" % self.B
|
||||
C = "%d" % self.C
|
||||
|
||||
# these opcodes have RKs for B & C
|
||||
if self.opcode in _RKBCInstr:
|
||||
B = self.__readRK(self.B)
|
||||
C = self.__readRK(self.C)
|
||||
B = self.__formatRK(self.B)
|
||||
C = self.__formatRK(self.C)
|
||||
elif self.opcode in _RKCInstr: # just for C
|
||||
C = self.__readRK(self.C)
|
||||
C = self.__formatRK(self.C)
|
||||
|
||||
regs = "%s %s %s" % (A, B, C)
|
||||
regs = "%6s %6s %6s" % (A, B, C)
|
||||
elif self.type == InstructionType.ABx or self.type == InstructionType.AsBx:
|
||||
A = "R[%d]" % self.A
|
||||
B = "R[%d]" % self.B
|
||||
B = "%d" % self.B
|
||||
|
||||
if self.opcode in _KBx:
|
||||
B = "K[%d]" % self.B
|
||||
|
||||
regs = "%s %s" % (A, B)
|
||||
regs = "%6s %6s" % (A, B)
|
||||
|
||||
return "%s : %s" % (instr, regs)
|
||||
|
||||
def getAnnotation(self, chunk):
|
||||
if self.opcode == Opcodes.MOVE:
|
||||
return "move R[%d] into R[%d]" % (self.B, self.A)
|
||||
elif self.opcode == Opcodes.LOADK:
|
||||
return "load %s into R[%d]" % (chunk.getConstant(self.B).toCode(), self.A)
|
||||
elif self.opcode == Opcodes.CONCAT:
|
||||
count = self.C - self.B + 1
|
||||
return "concat %d values from R[%d] to R[%d], store into R[%d]" % (count, self.B, self.C, self.A)
|
||||
else:
|
||||
return ""
|
||||
|
||||
class Constant:
|
||||
def __init__(self, type: ConstType, data) -> None:
|
||||
self.type = type
|
||||
@ -180,6 +199,9 @@ class Chunk:
|
||||
# there's no local information (may have been stripped)
|
||||
return None
|
||||
|
||||
def getConstant(self, indx: int) -> Constant:
|
||||
return self.constants[indx]
|
||||
|
||||
def print(self):
|
||||
print("\n==== [[" + str(self.name) + "'s constants]] ====\n")
|
||||
for z in range(len(self.constants)):
|
||||
@ -188,7 +210,7 @@ class Chunk:
|
||||
|
||||
print("\n==== [[" + str(self.name) + "'s dissassembly]] ====\n")
|
||||
for i in range(len(self.instructions)):
|
||||
print("[%3d] %s" % (i, self.instructions[i].toString()))
|
||||
print("[%3d] %-40s ; %s" % (i, self.instructions[i].toString(), self.instructions[i].getAnnotation(self)))
|
||||
|
||||
if len(self.protos) > 0:
|
||||
print("\n==== [[" + str(self.name) + "'s protos]] ====\n")
|
||||
|
Loading…
Reference in New Issue
Block a user