Compare commits

...

2 Commits

Author SHA1 Message Date
CPunch 5d91dbbc64 lundump.py: better instruction annotations 2022-08-12 17:31:15 -05:00
CPunch 0d947f4f3d Added 'repeat .. until' support
- lines are now tracked by start & end PC
- new config option: annotatedLines. if true line annotations show start & end PC will be emitted
- lundump.py now shows locals for each proto
2022-08-12 17:08:31 -05:00
3 changed files with 117 additions and 51 deletions

View File

@ -12,44 +12,47 @@ Lua has a relatively small instruction set (only 38 different opcodes!). This ma
```sh
> cat example.lua && luac5.1 -o example.luac example.lua
local i, x = 0, 2
local i, x = 10, 2
while i < 10 do
repeat
print(i + x)
i = i + 1
end
i = i - 1
until i < 0
> python main.py example.luac
example.luac
==== [[example.lua's constants]] ====
0: [NUMBER] 0.0
0: [NUMBER] 10.0
1: [NUMBER] 2.0
2: [NUMBER] 10.0
3: [STRING] print
4: [NUMBER] 1.0
2: [STRING] print
3: [NUMBER] 1.0
4: [NUMBER] 0.0
==== [[example.lua's locals]] ====
R[0]: i
R[1]: x
==== [[example.lua's dissassembly]] ====
[ 0] LOADK : R[0] K[0] ; load 0.0 into R[0]
[ 0] LOADK : R[0] K[0] ; load 10.0 into R[0]
[ 1] LOADK : R[1] K[1] ; load 2.0 into R[1]
[ 2] LT : R[0] R[0] K[2] ;
[ 3] JMP : R[0] 5 ;
[ 4] GETGLOBAL : R[2] K[3] ;
[ 5] ADD : R[3] R[0] R[1] ;
[ 6] CALL : R[2] 2 1 ;
[ 7] ADD : R[0] R[0] K[4] ;
[ 8] JMP : R[0] -7 ;
[ 9] RETURN : R[0] 1 0 ;
[ 2] GETGLOBAL : R[2] K[2] ; move _G["print"] into R[2]
[ 3] ADD : R[3] R[0] R[1] ; add R[1] to R[0], place into R[3]
[ 4] CALL : 2 2 1 ;
[ 5] SUB : R[0] R[0] K[3] ; sub K[3] from R[0], place into R[0]
[ 6] LT : R[0] R[0] K[4] ;
[ 7] JMP : R[0] -6 ;
[ 8] RETURN : 0 1 0 ;
==== [[example.lua's decompiled source]] ====
local i = 0.0
local i = 10.0
local x = 2.0
while i < 10.0 do
repeat
print((i + x))
i = (i + 1.0)
end
i = (i - 1.0)
until i < 0.0
```

View File

@ -22,11 +22,19 @@ class _Traceback:
self.uses = []
self.isConst = False
class _Line:
def __init__(self, startPC: int, endPC: int, src: str, scope: int):
self.startPC = startPC
self.endPC = endPC
self.src = src
self.scope = scope
class LuaDecomp:
def __init__(self, chunk: Chunk):
self.chunk = chunk
self.pc = 0
self.scope = []
self.scope: list[_Scope] = []
self.lines: list[_Line] = []
self.top = {}
self.locals = {}
self.traceback = {}
@ -35,6 +43,7 @@ class LuaDecomp:
# configurations!
self.aggressiveLocals = False # should *EVERY* set register be considered a local?
self.annotateLines = False
self.indexWidth = 4 # how many spaces for indentions?
self.__loadLocals()
@ -48,7 +57,11 @@ class LuaDecomp:
self.__checkScope()
print("\n==== [[" + str(self.chunk.name) + "'s decompiled source]] ====\n")
print(self.src)
for line in self.lines:
if self.annotateLines:
print("-- PC: %d to PC: %d" % (line.startPC, line.endPC))
print(((' ' * self.indexWidth) * line.scope) + line.src)
# =======================================[[ Helpers ]]=========================================
@ -64,20 +77,42 @@ class LuaDecomp:
def __getCurrInstr(self) -> Instruction:
return self.__getInstrAtPC(self.pc)
# when we read from a register, call this
def __addUseTraceback(self, reg: int) -> None:
def __makeTracIfNotExist(self) -> None:
if not self.pc in self.traceback:
self.traceback[self.pc] = _Traceback()
# when we read from a register, call this
def __addUseTraceback(self, reg: int) -> None:
self.__makeTracIfNotExist()
self.traceback[self.pc].uses.append(reg)
# when we write from a register, call this
def __addSetTraceback(self, reg: int) -> None:
if not self.pc in self.traceback:
self.traceback[self.pc] = _Traceback()
self.__makeTracIfNotExist()
self.traceback[self.pc].sets.append(reg)
def __addExpr(self, code: str) -> None:
self.src += code
def __endStatement(self):
startPC = self.lines[len(self.lines) - 1].endPC + 1 if len(self.lines) > 0 else 0
endPC = self.pc
# make sure we don't write an empty line
if not self.src == "":
self.lines.append(_Line(startPC, endPC, self.src, len(self.scope)))
self.src = ""
def __insertStatement(self, pc: int) -> None:
# insert current statement into lines at pc location
for i in range(len(self.lines)):
if self.lines[i].startPC <= pc and self.lines[i].endPC >= pc:
self.lines.insert(i, _Line(pc, pc, self.src, self.lines[i-1].scope if i > 0 else 0))
self.src = ""
return i
self.src = ""
# walks traceback, if local wasn't set before, the local needs to be defined
def __needsDefined(self, reg) -> Boolean:
for _, trace in self.traceback.items():
@ -94,12 +129,6 @@ class LuaDecomp:
else:
self.__makeLocalIdentifier(i)
def __addExpr(self, code: str) -> None:
self.src += code
def __startStatement(self):
self.src += '\n' + (' ' * self.indexWidth * len(self.scope))
def __getReg(self, indx: int) -> str:
self.__addUseTraceback(indx)
@ -112,8 +141,8 @@ class LuaDecomp:
if self.__needsDefined(indx):
self.__newLocal(indx, code)
else:
self.__startStatement()
self.__addExpr(self.locals[indx] + " = " + code)
self.__endStatement()
elif self.aggressiveLocals: # 'every register is a local!!'
self.__newLocal(indx, code)
@ -138,14 +167,15 @@ class LuaDecomp:
# TODO: grab identifier from chunk(?)
self.__makeLocalIdentifier(indx)
self.__startStatement()
self.__addExpr("local " + self.locals[indx] + " = " + expr)
self.__endStatement()
# ========================================[[ Scopes ]]=========================================
def __startScope(self, scopeType: str, size: int) -> None:
def __startScope(self, scopeType: str, start: int, size: int) -> None:
self.__addExpr(scopeType)
self.scope.append(_Scope(self.pc, self.pc + size))
self.__endStatement()
self.scope.append(_Scope(start, start + size))
# checks if we need to end a scope
def __checkScope(self) -> None:
@ -156,9 +186,9 @@ class LuaDecomp:
self.__endScope()
def __endScope(self) -> None:
self.scope.pop()
self.__startStatement()
self.__endStatement()
self.__addExpr("end")
self.scope.pop()
# =====================================[[ Instructions ]]======================================
@ -179,15 +209,33 @@ class LuaDecomp:
if self.pc + jmp + jmpToInstr.B <= self.pc + 1:
jmpType = "while"
scopeStart = "do"
elif jmp < 0:
# 'repeat until' loop (probably)
jmpType = "until"
scopeStart = None
self.__startStatement()
if instr.A > 0:
self.__addExpr("%s not " % jmpType)
else:
self.__addExpr("%s " % jmpType)
self.__addExpr(self.__readRK(instr.B) + op + self.__readRK(instr.C) + " ")
self.__startScope("%s " % scopeStart, jmp)
self.pc += 1 # skip next instr
if scopeStart:
self.__startScope("%s " % scopeStart, self.pc - 1, jmp)
# we end the statement *after* scopeStart
self.__endStatement()
else:
# end the statement prior to repeat
self.__endStatement()
# it's a repeat until loop, insert 'repeat' at the jumpTo location
self.__addExpr("repeat")
insertedLine = self.__insertStatement(self.pc + jmp)
# add scope to every line in-between
for i in range(insertedLine+1, len(self.lines)-1):
self.lines[i].scope += 1
# 'RK's are special in because can be a register or a konstant. a bitflag is read to determine which
def __readRK(self, rk: int) -> str:
@ -215,11 +263,11 @@ class LuaDecomp:
elif instr.opcode == Opcodes.GETTABLE:
self.__setReg(instr.A, self.__getReg(instr.B) + "[" + self.__readRK(instr.C) + "]")
elif instr.opcode == Opcodes.SETGLOBAL:
self.__startStatement()
self.__addExpr(self.chunk.getConstant(instr.B).data + " = " + self.__getReg(instr.A))
self.__endStatement()
elif instr.opcode == Opcodes.SETTABLE:
self.__startStatement()
self.__addExpr(self.__getReg(instr.A) + "[" + self.__readRK(instr.B) + "] = " + self.__readRK(instr.C))
self.__endStatement()
elif instr.opcode == Opcodes.ADD:
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " + ")
elif instr.opcode == Opcodes.SUB:
@ -283,10 +331,10 @@ class LuaDecomp:
preStr += ", " if not indx == instr.A + instr.C - 2 else ""
preStr += " = "
self.__startStatement()
self.__addExpr(preStr + callStr)
self.__endStatement()
elif instr.opcode == Opcodes.RETURN:
self.__startStatement()
self.__endStatement()
pass # no-op for now
else:
raise Exception("unsupported instruction: %s" % instr.toString())

View File

@ -100,15 +100,17 @@ class Instruction:
if self.type == InstructionType.ABC:
# by default, treat them as registers
A = "R[%d]" % self.A
A = "%d" % self.A
B = "%d" % self.B
C = "%d" % self.C
# these opcodes have RKs for B & C
if self.opcode in _RKBCInstr:
A = "R[%d]" % self.A
B = self.__formatRK(self.B)
C = self.__formatRK(self.C)
elif self.opcode in _RKCInstr: # just for C
A = "R[%d]" % self.A
C = self.__formatRK(self.C)
regs = "%6s %6s %6s" % (A, B, C)
@ -128,6 +130,16 @@ class Instruction:
return "move R[%d] into R[%d]" % (self.B, self.A)
elif self.opcode == Opcodes.LOADK:
return "load %s into R[%d]" % (chunk.getConstant(self.B).toCode(), self.A)
elif self.opcode == Opcodes.GETGLOBAL:
return 'move _G[%s] into R[%d]' % (chunk.getConstant(self.B).toCode(), self.A)
elif self.opcode == Opcodes.ADD:
return 'add %s to %s, place into R[%d]' % (self.__formatRK(self.C), self.__formatRK(self.B), self.A)
elif self.opcode == Opcodes.SUB:
return 'sub %s from %s, place into R[%d]' % (self.__formatRK(self.C), self.__formatRK(self.B), self.A)
elif self.opcode == Opcodes.MUL:
return 'mul %s to %s, place into R[%d]' % (self.__formatRK(self.C), self.__formatRK(self.B), self.A)
elif self.opcode == Opcodes.DIV:
return 'div %s from %s, place into R[%d]' % (self.__formatRK(self.C), self.__formatRK(self.B), self.A)
elif self.opcode == Opcodes.CONCAT:
count = self.C - self.B + 1
return "concat %d values from R[%d] to R[%d], store into R[%d]" % (count, self.B, self.C, self.A)
@ -204,9 +216,12 @@ class Chunk:
def print(self):
print("\n==== [[" + str(self.name) + "'s constants]] ====\n")
for z in range(len(self.constants)):
i = self.constants[z]
print(str(z) + ": " + i.toString())
for i in range(len(self.constants)):
print("%d: %s" % (i, self.constants[i].toString()))
print("\n==== [[" + str(self.name) + "'s locals]] ====\n")
for i in range(len(self.locals)):
print("R[%d]: %s" % (i, self.locals[i].name))
print("\n==== [[" + str(self.name) + "'s dissassembly]] ====\n")
for i in range(len(self.instructions)):