Compare commits

...

3 Commits

Author SHA1 Message Date
CPunch bc4e762e26 lp: bug fix (forgot to transfer registers) 2022-08-16 00:26:50 -05:00
CPunch 19bed999ee lp: added __parseNewTable(), better table pseudo-code 2022-08-16 00:12:26 -05:00
CPunch a248cc4807 lp: added NEWTABLE && SETLIST
- tables can now be (mostly) decompiled
- changed 'decompiled source' to 'pseudo-code' since the output doesn't typically match the compiled script source.
- misc. refactoring
2022-08-15 23:30:32 -05:00
3 changed files with 113 additions and 39 deletions

View File

@ -12,51 +12,70 @@ Lua has a relatively small instruction set (only 38 different opcodes!). This ma
```sh ```sh
> cat example.lua && luac5.1 -o example.luac example.lua > cat example.lua && luac5.1 -o example.luac example.lua
local total = 0 local tbl = {"He", "llo", " ", "Wo", "rld", "!"}
local str = ""
for i = 0, 9, 1 do for i = 1, #tbl do
total = total + i str = str .. tbl[i]
print(total)
end end
print(str)
> python main.py example.luac > python main.py example.luac
example.luac example.luac
==== [[example.lua's constants]] ==== ==== [[example.lua's constants]] ====
0: [NUMBER] 0.0 0: [STRING] He
1: [NUMBER] 9.0 1: [STRING] llo
2: [NUMBER] 1.0 2: [STRING]
3: [STRING] print 3: [STRING] Wo
4: [STRING] rld
5: [STRING] !
6: [STRING]
7: [NUMBER] 1.0
8: [STRING] print
==== [[example.lua's locals]] ==== ==== [[example.lua's locals]] ====
R[0]: total R[0]: tbl
R[1]: (for index) R[1]: str
R[2]: (for limit) R[2]: (for index)
R[3]: (for step) R[3]: (for limit)
R[4]: i R[4]: (for step)
R[5]: i
==== [[example.lua's dissassembly]] ==== ==== [[example.lua's dissassembly]] ====
[ 0] LOADK : R[0] K[0] ; load 0.0 into R[0] [ 0] NEWTABLE : 0 6 0 ;
[ 1] LOADK : R[1] K[0] ; load 0.0 into R[1] [ 1] LOADK : R[1] K[0] ; load "He" into R[1]
[ 2] LOADK : R[2] K[1] ; load 9.0 into R[2] [ 2] LOADK : R[2] K[1] ; load "llo" into R[2]
[ 3] LOADK : R[3] K[2] ; load 1.0 into R[3] [ 3] LOADK : R[3] K[2] ; load " " into R[3]
[ 4] FORPREP : R[1] 4 ; [ 4] LOADK : R[4] K[3] ; load "Wo" into R[4]
[ 5] ADD : R[0] R[0] R[4] ; add R[4] to R[0], place into R[0] [ 5] LOADK : R[5] K[4] ; load "rld" into R[5]
[ 6] GETGLOBAL : R[5] K[3] ; move _G["print"] into R[5] [ 6] LOADK : R[6] K[5] ; load "!" into R[6]
[ 7] MOVE : 6 0 0 ; move R[0] into R[6] [ 7] SETLIST : 0 6 1 ;
[ 8] CALL : 5 2 1 ; [ 8] LOADK : R[1] K[6] ; load "" into R[1]
[ 9] FORLOOP : R[1] -5 ; [ 9] LOADK : R[2] K[7] ; load 1 into R[2]
[ 10] RETURN : 0 1 0 ; [ 10] LEN : 3 0 0 ;
[ 11] LOADK : R[4] K[7] ; load 1 into R[4]
[ 12] FORPREP : R[2] 3 ;
[ 13] MOVE : 6 1 0 ; move R[1] into R[6]
[ 14] GETTABLE : R[7] 0 R[5] ;
[ 15] CONCAT : 1 6 7 ; concat 2 values from R[6] to R[7], store into R[1]
[ 16] FORLOOP : R[2] -4 ;
[ 17] GETGLOBAL : R[2] K[8] ; move _G["print"] into R[2]
[ 18] MOVE : 3 1 0 ; move R[1] into R[3]
[ 19] CALL : 2 2 1 ;
[ 20] RETURN : 0 1 0 ;
==== [[example.lua's decompiled source]] ==== ==== [[example.lua's pseudo-code]] ====
local total = 0.0 local tbl = {"He", "llo", " ", "Wo", "rld", "!", }
for i = 0.0, 9.0, 1.0 do local str = ""
total = (total + i) for i = 1, #tbl, 1 do
print(total) str = str .. tbl[i]
end end
print(str)
``` ```

View File

@ -8,7 +8,6 @@
from operator import concat from operator import concat
from subprocess import call from subprocess import call
from xmlrpc.client import Boolean
from lundump import Chunk, Constant, Instruction, Opcodes, whichRK, readRKasK from lundump import Chunk, Constant, Instruction, Opcodes, whichRK, readRKasK
class _Scope: class _Scope:
@ -30,7 +29,12 @@ class _Line:
self.scope = scope self.scope = scope
def isValidLocal(ident: str) -> bool: def isValidLocal(ident: str) -> bool:
for c in ident: # has to start with an alpha or _
if ident[0] not in "abcdefghijklmnopqrstuvwxyz_":
return False
# then it can be alphanum or _
for c in ident[1:]:
if c not in "abcdefghijklmnopqrstuvwxyz1234567890_": if c not in "abcdefghijklmnopqrstuvwxyz1234567890_":
return False return False
@ -63,7 +67,7 @@ class LuaDecomp:
# end the scope (if we're supposed too) # end the scope (if we're supposed too)
self.__checkScope() self.__checkScope()
print("\n==== [[" + str(self.chunk.name) + "'s decompiled source]] ====\n") print("\n==== [[" + str(self.chunk.name) + "'s pseudo-code]] ====\n")
for line in self.lines: for line in self.lines:
if self.annotateLines: if self.annotateLines:
@ -121,7 +125,7 @@ class LuaDecomp:
self.src = "" self.src = ""
# walks traceback, if local wasn't set before, the local needs to be defined # walks traceback, if local wasn't set before, the local needs to be defined
def __needsDefined(self, reg) -> Boolean: def __needsDefined(self, reg) -> bool:
for _, trace in self.traceback.items(): for _, trace in self.traceback.items():
if reg in trace.sets: if reg in trace.sets:
return False return False
@ -147,7 +151,7 @@ class LuaDecomp:
# if the top indx is a local, get it # if the top indx is a local, get it
return self.locals[indx] if indx in self.locals else self.top[indx] return self.locals[indx] if indx in self.locals else self.top[indx]
def __setReg(self, indx: int, code: str) -> None: def __setReg(self, indx: int, code: str, forceLocal: bool = False) -> None:
# if the top indx is a local, set it # if the top indx is a local, set it
if indx in self.locals: if indx in self.locals:
if self.__needsDefined(indx): if self.__needsDefined(indx):
@ -155,10 +159,9 @@ class LuaDecomp:
else: else:
self.__addExpr(self.locals[indx] + " = " + code) self.__addExpr(self.locals[indx] + " = " + code)
self.__endStatement() self.__endStatement()
elif self.aggressiveLocals: # 'every register is a local!!' elif self.aggressiveLocals or forceLocal: # 'every register is a local!!'
self.__newLocal(indx, code) self.__newLocal(indx, code)
self.__addSetTraceback(indx) self.__addSetTraceback(indx)
self.top[indx] = code self.top[indx] = code
@ -202,6 +205,8 @@ class LuaDecomp:
self.__addExpr("end") self.__addExpr("end")
self.scope.pop() self.scope.pop()
self.__endStatement()
# =====================================[[ Instructions ]]====================================== # =====================================[[ Instructions ]]======================================
def __emitOperand(self, a: int, b: str, c: str, op: str) -> None: def __emitOperand(self, a: int, b: str, c: str, op: str) -> None:
@ -256,6 +261,38 @@ class LuaDecomp:
else: else:
return self.__getReg(rk) return self.__getReg(rk)
# walk & peak ahead NEWTABLE
def __parseNewTable(self, indx: int):
# TODO: parse SETTABLE too?
tblOps = [Opcodes.LOADK, Opcodes.SETLIST]
instr = self.__getNextInstr()
cachedRegs = {}
tbl = "{"
while instr.opcode in tblOps:
if instr.opcode == Opcodes.LOADK: # operate on registers
cachedRegs[instr.A] = self.chunk.getConstant(instr.B).toCode()
elif instr.opcode == Opcodes.SETLIST:
numElems = instr.B
for i in range(numElems):
tbl += "%s, " % cachedRegs[instr.A + i + 1]
del cachedRegs[instr.A + i + 1]
self.pc += 1
instr = self.__getNextInstr()
tbl += "}"
# i use forceLocal here even though i don't know *for sure* that the register is a local.
# this does help later though if the table is reused (which is 99% of the time). the other 1%
# only affects syntax and may look a little weird but is fine and equivalent non-the-less
self.__setReg(indx, tbl, forceLocal=True)
self.__endStatement()
# if we have leftovers... oops, set those
for i, v in cachedRegs.items():
self.__setReg(i, v)
def parseInstr(self): def parseInstr(self):
instr = self.__getCurrInstr() instr = self.__getCurrInstr()
@ -280,6 +317,8 @@ class LuaDecomp:
elif instr.opcode == Opcodes.SETTABLE: elif instr.opcode == Opcodes.SETTABLE:
self.__addExpr(self.__getReg(instr.A) + "[" + self.__readRK(instr.B) + "] = " + self.__readRK(instr.C)) self.__addExpr(self.__getReg(instr.A) + "[" + self.__readRK(instr.B) + "] = " + self.__readRK(instr.C))
self.__endStatement() self.__endStatement()
elif instr.opcode == Opcodes.NEWTABLE:
self.__parseNewTable(instr.A)
elif instr.opcode == Opcodes.ADD: elif instr.opcode == Opcodes.ADD:
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " + ") self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " + ")
elif instr.opcode == Opcodes.SUB: elif instr.opcode == Opcodes.SUB:
@ -297,7 +336,7 @@ class LuaDecomp:
elif instr.opcode == Opcodes.NOT: elif instr.opcode == Opcodes.NOT:
self.__setReg(instr.A, "not " + self.__getReg(instr.B)) self.__setReg(instr.A, "not " + self.__getReg(instr.B))
elif instr.opcode == Opcodes.LEN: elif instr.opcode == Opcodes.LEN:
self.__setReg(instr.A, "#" + self.__getCurrInstr(instr.B)) self.__setReg(instr.A, "#" + self.__getReg(instr.B))
elif instr.opcode == Opcodes.CONCAT: elif instr.opcode == Opcodes.CONCAT:
count = instr.C-instr.B+1 count = instr.C-instr.B+1
concatStr = "" concatStr = ""
@ -353,5 +392,21 @@ class LuaDecomp:
elif instr.opcode == Opcodes.FORPREP: elif instr.opcode == Opcodes.FORPREP:
self.__addExpr("for %s = %s, %s, %s " % (self.__getLocal(instr.A+3), self.__getReg(instr.A), self.__getReg(instr.A + 1), self.__getReg(instr.A + 2))) self.__addExpr("for %s = %s, %s, %s " % (self.__getLocal(instr.A+3), self.__getReg(instr.A), self.__getReg(instr.A + 1), self.__getReg(instr.A + 2)))
self.__startScope("do", self.pc, instr.B) self.__startScope("do", self.pc, instr.B)
elif instr.opcode == Opcodes.SETLIST:
# LFIELDS_PER_FLUSH (50) is the number of elements that *should* have been set in the list in the *last* SETLIST
# eg.
# [ 49] LOADK : R[49] K[1] ; load 0.0 into R[49]
# [ 50] LOADK : R[50] K[1] ; load 0.0 into R[50]
# [ 51] SETLIST : 0 50 1 ; sets list[1..50]
# [ 52] LOADK : R[1] K[1] ; load 0.0 into R[1]
# [ 53] SETLIST : 0 1 2 ; sets list[51..51]
numElems = instr.B
startAt = ((instr.C - 1) * 50)
ident = self.__getLocal(instr.A)
# set each index (TODO: make tables less verbose)
for i in range(numElems):
self.__addExpr("%s[%d] = %s" % (ident, (startAt + i + 1), self.__getReg(instr.A + i + 1)))
self.__endStatement()
else: else:
raise Exception("unsupported instruction: %s" % instr.toString()) raise Exception("unsupported instruction: %s" % instr.toString())

View File

@ -152,7 +152,7 @@ class Constant:
self.data = data self.data = data
def toString(self): def toString(self):
return "[" + self.type.name + "] " + str(self.data) return "[%s] %s" % (self.type.name, str(self.data))
# format the constant so that it is parsable by lua # format the constant so that it is parsable by lua
def toCode(self): def toCode(self):
@ -164,7 +164,7 @@ class Constant:
else: else:
return "false" return "false"
elif self.type == ConstType.NUMBER: elif self.type == ConstType.NUMBER:
return str(self.data) return "%g" % self.data
else: else:
return "nil" return "nil"