Added 'repeat .. until' support

- lines are now tracked by start & end PC
- new config option: annotatedLines. if true line annotations show start & end PC will be emitted
- lundump.py now shows locals for each proto
This commit is contained in:
CPunch 2022-08-12 17:08:31 -05:00
parent 368ff62538
commit 0d947f4f3d
3 changed files with 99 additions and 44 deletions

View File

@ -14,10 +14,11 @@ Lua has a relatively small instruction set (only 38 different opcodes!). This ma
> cat example.lua && luac5.1 -o example.luac example.lua > cat example.lua && luac5.1 -o example.luac example.lua
local i, x = 0, 2 local i, x = 0, 2
while i < 10 do repeat
print(i + x) print(i + x)
i = i + 1 i = i + 1
end until i < 10
> python main.py example.luac > python main.py example.luac
example.luac example.luac
@ -25,31 +26,34 @@ example.luac
0: [NUMBER] 0.0 0: [NUMBER] 0.0
1: [NUMBER] 2.0 1: [NUMBER] 2.0
2: [NUMBER] 10.0 2: [STRING] print
3: [STRING] print 3: [NUMBER] 1.0
4: [NUMBER] 1.0 4: [NUMBER] 10.0
==== [[example.lua's locals]] ====
R[0]: i
R[1]: x
==== [[example.lua's dissassembly]] ==== ==== [[example.lua's dissassembly]] ====
[ 0] LOADK : R[0] K[0] ; load 0.0 into R[0] [ 0] LOADK : R[0] K[0] ; load 0.0 into R[0]
[ 1] LOADK : R[1] K[1] ; load 2.0 into R[1] [ 1] LOADK : R[1] K[1] ; load 2.0 into R[1]
[ 2] LT : R[0] R[0] K[2] ; [ 2] GETGLOBAL : R[2] K[2] ;
[ 3] JMP : R[0] 5 ; [ 3] ADD : R[3] R[0] R[1] ;
[ 4] GETGLOBAL : R[2] K[3] ; [ 4] CALL : R[2] 2 1 ;
[ 5] ADD : R[3] R[0] R[1] ; [ 5] ADD : R[0] R[0] K[3] ;
[ 6] CALL : R[2] 2 1 ; [ 6] LT : R[0] R[0] K[4] ;
[ 7] ADD : R[0] R[0] K[4] ; [ 7] JMP : R[0] -6 ;
[ 8] JMP : R[0] -7 ; [ 8] RETURN : R[0] 1 0 ;
[ 9] RETURN : R[0] 1 0 ;
==== [[example.lua's decompiled source]] ==== ==== [[example.lua's decompiled source]] ====
local i = 0.0 local i = 0.0
local x = 2.0 local x = 2.0
while i < 10.0 do repeat
print((i + x)) print((i + x))
i = (i + 1.0) i = (i + 1.0)
end until i < 10.0
``` ```

View File

@ -22,11 +22,19 @@ class _Traceback:
self.uses = [] self.uses = []
self.isConst = False self.isConst = False
class _Line:
def __init__(self, startPC: int, endPC: int, src: str, scope: int):
self.startPC = startPC
self.endPC = endPC
self.src = src
self.scope = scope
class LuaDecomp: class LuaDecomp:
def __init__(self, chunk: Chunk): def __init__(self, chunk: Chunk):
self.chunk = chunk self.chunk = chunk
self.pc = 0 self.pc = 0
self.scope = [] self.scope: list[_Scope] = []
self.lines: list[_Line] = []
self.top = {} self.top = {}
self.locals = {} self.locals = {}
self.traceback = {} self.traceback = {}
@ -35,6 +43,7 @@ class LuaDecomp:
# configurations! # configurations!
self.aggressiveLocals = False # should *EVERY* set register be considered a local? self.aggressiveLocals = False # should *EVERY* set register be considered a local?
self.annotateLines = False
self.indexWidth = 4 # how many spaces for indentions? self.indexWidth = 4 # how many spaces for indentions?
self.__loadLocals() self.__loadLocals()
@ -48,7 +57,11 @@ class LuaDecomp:
self.__checkScope() self.__checkScope()
print("\n==== [[" + str(self.chunk.name) + "'s decompiled source]] ====\n") print("\n==== [[" + str(self.chunk.name) + "'s decompiled source]] ====\n")
print(self.src)
for line in self.lines:
if self.annotateLines:
print("-- PC: %d to PC: %d" % (line.startPC, line.endPC))
print(((' ' * self.indexWidth) * line.scope) + line.src)
# =======================================[[ Helpers ]]========================================= # =======================================[[ Helpers ]]=========================================
@ -64,20 +77,42 @@ class LuaDecomp:
def __getCurrInstr(self) -> Instruction: def __getCurrInstr(self) -> Instruction:
return self.__getInstrAtPC(self.pc) return self.__getInstrAtPC(self.pc)
# when we read from a register, call this def __makeTracIfNotExist(self) -> None:
def __addUseTraceback(self, reg: int) -> None:
if not self.pc in self.traceback: if not self.pc in self.traceback:
self.traceback[self.pc] = _Traceback() self.traceback[self.pc] = _Traceback()
# when we read from a register, call this
def __addUseTraceback(self, reg: int) -> None:
self.__makeTracIfNotExist()
self.traceback[self.pc].uses.append(reg) self.traceback[self.pc].uses.append(reg)
# when we write from a register, call this # when we write from a register, call this
def __addSetTraceback(self, reg: int) -> None: def __addSetTraceback(self, reg: int) -> None:
if not self.pc in self.traceback: self.__makeTracIfNotExist()
self.traceback[self.pc] = _Traceback()
self.traceback[self.pc].sets.append(reg) self.traceback[self.pc].sets.append(reg)
def __addExpr(self, code: str) -> None:
self.src += code
def __endStatement(self):
startPC = self.lines[len(self.lines) - 1].endPC + 1 if len(self.lines) > 0 else 0
endPC = self.pc
# make sure we don't write an empty line
if not self.src == "":
self.lines.append(_Line(startPC, endPC, self.src, len(self.scope)))
self.src = ""
def __insertStatement(self, pc: int) -> None:
# insert current statement into lines at pc location
for i in range(len(self.lines)):
if self.lines[i].startPC <= pc and self.lines[i].endPC >= pc:
self.lines.insert(i, _Line(pc, pc, self.src, self.lines[i-1].scope if i > 0 else 0))
self.src = ""
return i
self.src = ""
# walks traceback, if local wasn't set before, the local needs to be defined # walks traceback, if local wasn't set before, the local needs to be defined
def __needsDefined(self, reg) -> Boolean: def __needsDefined(self, reg) -> Boolean:
for _, trace in self.traceback.items(): for _, trace in self.traceback.items():
@ -94,12 +129,6 @@ class LuaDecomp:
else: else:
self.__makeLocalIdentifier(i) self.__makeLocalIdentifier(i)
def __addExpr(self, code: str) -> None:
self.src += code
def __startStatement(self):
self.src += '\n' + (' ' * self.indexWidth * len(self.scope))
def __getReg(self, indx: int) -> str: def __getReg(self, indx: int) -> str:
self.__addUseTraceback(indx) self.__addUseTraceback(indx)
@ -112,8 +141,8 @@ class LuaDecomp:
if self.__needsDefined(indx): if self.__needsDefined(indx):
self.__newLocal(indx, code) self.__newLocal(indx, code)
else: else:
self.__startStatement()
self.__addExpr(self.locals[indx] + " = " + code) self.__addExpr(self.locals[indx] + " = " + code)
self.__endStatement()
elif self.aggressiveLocals: # 'every register is a local!!' elif self.aggressiveLocals: # 'every register is a local!!'
self.__newLocal(indx, code) self.__newLocal(indx, code)
@ -138,14 +167,15 @@ class LuaDecomp:
# TODO: grab identifier from chunk(?) # TODO: grab identifier from chunk(?)
self.__makeLocalIdentifier(indx) self.__makeLocalIdentifier(indx)
self.__startStatement()
self.__addExpr("local " + self.locals[indx] + " = " + expr) self.__addExpr("local " + self.locals[indx] + " = " + expr)
self.__endStatement()
# ========================================[[ Scopes ]]========================================= # ========================================[[ Scopes ]]=========================================
def __startScope(self, scopeType: str, size: int) -> None: def __startScope(self, scopeType: str, start: int, size: int) -> None:
self.__addExpr(scopeType) self.__addExpr(scopeType)
self.scope.append(_Scope(self.pc, self.pc + size)) self.__endStatement()
self.scope.append(_Scope(start, start + size))
# checks if we need to end a scope # checks if we need to end a scope
def __checkScope(self) -> None: def __checkScope(self) -> None:
@ -156,9 +186,9 @@ class LuaDecomp:
self.__endScope() self.__endScope()
def __endScope(self) -> None: def __endScope(self) -> None:
self.scope.pop() self.__endStatement()
self.__startStatement()
self.__addExpr("end") self.__addExpr("end")
self.scope.pop()
# =====================================[[ Instructions ]]====================================== # =====================================[[ Instructions ]]======================================
@ -179,15 +209,33 @@ class LuaDecomp:
if self.pc + jmp + jmpToInstr.B <= self.pc + 1: if self.pc + jmp + jmpToInstr.B <= self.pc + 1:
jmpType = "while" jmpType = "while"
scopeStart = "do" scopeStart = "do"
elif jmp < 0:
# 'repeat until' loop (probably)
jmpType = "until"
scopeStart = None
self.__startStatement()
if instr.A > 0: if instr.A > 0:
self.__addExpr("%s not " % jmpType) self.__addExpr("%s not " % jmpType)
else: else:
self.__addExpr("%s " % jmpType) self.__addExpr("%s " % jmpType)
self.__addExpr(self.__readRK(instr.B) + op + self.__readRK(instr.C) + " ") self.__addExpr(self.__readRK(instr.B) + op + self.__readRK(instr.C) + " ")
self.__startScope("%s " % scopeStart, jmp)
self.pc += 1 # skip next instr self.pc += 1 # skip next instr
if scopeStart:
self.__startScope("%s " % scopeStart, self.pc - 1, jmp)
# we end the statement *after* scopeStart
self.__endStatement()
else:
# end the statement prior to repeat
self.__endStatement()
# it's a repeat until loop, insert 'repeat' at the jumpTo location
self.__addExpr("repeat")
insertedLine = self.__insertStatement(self.pc + jmp)
# add scope to every line in-between
for i in range(insertedLine+1, len(self.lines)-1):
self.lines[i].scope += 1
# 'RK's are special in because can be a register or a konstant. a bitflag is read to determine which # 'RK's are special in because can be a register or a konstant. a bitflag is read to determine which
def __readRK(self, rk: int) -> str: def __readRK(self, rk: int) -> str:
@ -215,11 +263,11 @@ class LuaDecomp:
elif instr.opcode == Opcodes.GETTABLE: elif instr.opcode == Opcodes.GETTABLE:
self.__setReg(instr.A, self.__getReg(instr.B) + "[" + self.__readRK(instr.C) + "]") self.__setReg(instr.A, self.__getReg(instr.B) + "[" + self.__readRK(instr.C) + "]")
elif instr.opcode == Opcodes.SETGLOBAL: elif instr.opcode == Opcodes.SETGLOBAL:
self.__startStatement()
self.__addExpr(self.chunk.getConstant(instr.B).data + " = " + self.__getReg(instr.A)) self.__addExpr(self.chunk.getConstant(instr.B).data + " = " + self.__getReg(instr.A))
self.__endStatement()
elif instr.opcode == Opcodes.SETTABLE: elif instr.opcode == Opcodes.SETTABLE:
self.__startStatement()
self.__addExpr(self.__getReg(instr.A) + "[" + self.__readRK(instr.B) + "] = " + self.__readRK(instr.C)) self.__addExpr(self.__getReg(instr.A) + "[" + self.__readRK(instr.B) + "] = " + self.__readRK(instr.C))
self.__endStatement()
elif instr.opcode == Opcodes.ADD: elif instr.opcode == Opcodes.ADD:
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " + ") self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " + ")
elif instr.opcode == Opcodes.SUB: elif instr.opcode == Opcodes.SUB:
@ -283,10 +331,10 @@ class LuaDecomp:
preStr += ", " if not indx == instr.A + instr.C - 2 else "" preStr += ", " if not indx == instr.A + instr.C - 2 else ""
preStr += " = " preStr += " = "
self.__startStatement()
self.__addExpr(preStr + callStr) self.__addExpr(preStr + callStr)
self.__endStatement()
elif instr.opcode == Opcodes.RETURN: elif instr.opcode == Opcodes.RETURN:
self.__startStatement() self.__endStatement()
pass # no-op for now pass # no-op for now
else: else:
raise Exception("unsupported instruction: %s" % instr.toString()) raise Exception("unsupported instruction: %s" % instr.toString())

View File

@ -204,9 +204,12 @@ class Chunk:
def print(self): def print(self):
print("\n==== [[" + str(self.name) + "'s constants]] ====\n") print("\n==== [[" + str(self.name) + "'s constants]] ====\n")
for z in range(len(self.constants)): for i in range(len(self.constants)):
i = self.constants[z] print("%d: %s" % (i, self.constants[i].toString()))
print(str(z) + ": " + i.toString())
print("\n==== [[" + str(self.name) + "'s locals]] ====\n")
for i in range(len(self.locals)):
print("R[%d]: %s" % (i, self.locals[i].name))
print("\n==== [[" + str(self.name) + "'s dissassembly]] ====\n") print("\n==== [[" + str(self.name) + "'s dissassembly]] ====\n")
for i in range(len(self.instructions)): for i in range(len(self.instructions)):