Compare commits

...

3 Commits

Author SHA1 Message Date
CPunch bc4e762e26 lp: bug fix (forgot to transfer registers) 2022-08-16 00:26:50 -05:00
CPunch 19bed999ee lp: added __parseNewTable(), better table pseudo-code 2022-08-16 00:12:26 -05:00
CPunch a248cc4807 lp: added NEWTABLE && SETLIST
- tables can now be (mostly) decompiled
- changed 'decompiled source' to 'pseudo-code' since the output doesn't typically match the compiled script source.
- misc. refactoring
2022-08-15 23:30:32 -05:00
3 changed files with 113 additions and 39 deletions

View File

@ -12,51 +12,70 @@ Lua has a relatively small instruction set (only 38 different opcodes!). This ma
```sh
> cat example.lua && luac5.1 -o example.luac example.lua
local total = 0
local tbl = {"He", "llo", " ", "Wo", "rld", "!"}
local str = ""
for i = 0, 9, 1 do
total = total + i
print(total)
for i = 1, #tbl do
str = str .. tbl[i]
end
print(str)
> python main.py example.luac
example.luac
==== [[example.lua's constants]] ====
0: [NUMBER] 0.0
1: [NUMBER] 9.0
2: [NUMBER] 1.0
3: [STRING] print
0: [STRING] He
1: [STRING] llo
2: [STRING]
3: [STRING] Wo
4: [STRING] rld
5: [STRING] !
6: [STRING]
7: [NUMBER] 1.0
8: [STRING] print
==== [[example.lua's locals]] ====
R[0]: total
R[1]: (for index)
R[2]: (for limit)
R[3]: (for step)
R[4]: i
R[0]: tbl
R[1]: str
R[2]: (for index)
R[3]: (for limit)
R[4]: (for step)
R[5]: i
==== [[example.lua's dissassembly]] ====
[ 0] LOADK : R[0] K[0] ; load 0.0 into R[0]
[ 1] LOADK : R[1] K[0] ; load 0.0 into R[1]
[ 2] LOADK : R[2] K[1] ; load 9.0 into R[2]
[ 3] LOADK : R[3] K[2] ; load 1.0 into R[3]
[ 4] FORPREP : R[1] 4 ;
[ 5] ADD : R[0] R[0] R[4] ; add R[4] to R[0], place into R[0]
[ 6] GETGLOBAL : R[5] K[3] ; move _G["print"] into R[5]
[ 7] MOVE : 6 0 0 ; move R[0] into R[6]
[ 8] CALL : 5 2 1 ;
[ 9] FORLOOP : R[1] -5 ;
[ 10] RETURN : 0 1 0 ;
[ 0] NEWTABLE : 0 6 0 ;
[ 1] LOADK : R[1] K[0] ; load "He" into R[1]
[ 2] LOADK : R[2] K[1] ; load "llo" into R[2]
[ 3] LOADK : R[3] K[2] ; load " " into R[3]
[ 4] LOADK : R[4] K[3] ; load "Wo" into R[4]
[ 5] LOADK : R[5] K[4] ; load "rld" into R[5]
[ 6] LOADK : R[6] K[5] ; load "!" into R[6]
[ 7] SETLIST : 0 6 1 ;
[ 8] LOADK : R[1] K[6] ; load "" into R[1]
[ 9] LOADK : R[2] K[7] ; load 1 into R[2]
[ 10] LEN : 3 0 0 ;
[ 11] LOADK : R[4] K[7] ; load 1 into R[4]
[ 12] FORPREP : R[2] 3 ;
[ 13] MOVE : 6 1 0 ; move R[1] into R[6]
[ 14] GETTABLE : R[7] 0 R[5] ;
[ 15] CONCAT : 1 6 7 ; concat 2 values from R[6] to R[7], store into R[1]
[ 16] FORLOOP : R[2] -4 ;
[ 17] GETGLOBAL : R[2] K[8] ; move _G["print"] into R[2]
[ 18] MOVE : 3 1 0 ; move R[1] into R[3]
[ 19] CALL : 2 2 1 ;
[ 20] RETURN : 0 1 0 ;
==== [[example.lua's decompiled source]] ====
==== [[example.lua's pseudo-code]] ====
local total = 0.0
for i = 0.0, 9.0, 1.0 do
total = (total + i)
print(total)
local tbl = {"He", "llo", " ", "Wo", "rld", "!", }
local str = ""
for i = 1, #tbl, 1 do
str = str .. tbl[i]
end
print(str)
```

View File

@ -8,7 +8,6 @@
from operator import concat
from subprocess import call
from xmlrpc.client import Boolean
from lundump import Chunk, Constant, Instruction, Opcodes, whichRK, readRKasK
class _Scope:
@ -30,7 +29,12 @@ class _Line:
self.scope = scope
def isValidLocal(ident: str) -> bool:
for c in ident:
# has to start with an alpha or _
if ident[0] not in "abcdefghijklmnopqrstuvwxyz_":
return False
# then it can be alphanum or _
for c in ident[1:]:
if c not in "abcdefghijklmnopqrstuvwxyz1234567890_":
return False
@ -63,7 +67,7 @@ class LuaDecomp:
# end the scope (if we're supposed too)
self.__checkScope()
print("\n==== [[" + str(self.chunk.name) + "'s decompiled source]] ====\n")
print("\n==== [[" + str(self.chunk.name) + "'s pseudo-code]] ====\n")
for line in self.lines:
if self.annotateLines:
@ -121,7 +125,7 @@ class LuaDecomp:
self.src = ""
# walks traceback, if local wasn't set before, the local needs to be defined
def __needsDefined(self, reg) -> Boolean:
def __needsDefined(self, reg) -> bool:
for _, trace in self.traceback.items():
if reg in trace.sets:
return False
@ -147,7 +151,7 @@ class LuaDecomp:
# if the top indx is a local, get it
return self.locals[indx] if indx in self.locals else self.top[indx]
def __setReg(self, indx: int, code: str) -> None:
def __setReg(self, indx: int, code: str, forceLocal: bool = False) -> None:
# if the top indx is a local, set it
if indx in self.locals:
if self.__needsDefined(indx):
@ -155,10 +159,9 @@ class LuaDecomp:
else:
self.__addExpr(self.locals[indx] + " = " + code)
self.__endStatement()
elif self.aggressiveLocals: # 'every register is a local!!'
elif self.aggressiveLocals or forceLocal: # 'every register is a local!!'
self.__newLocal(indx, code)
self.__addSetTraceback(indx)
self.top[indx] = code
@ -202,6 +205,8 @@ class LuaDecomp:
self.__addExpr("end")
self.scope.pop()
self.__endStatement()
# =====================================[[ Instructions ]]======================================
def __emitOperand(self, a: int, b: str, c: str, op: str) -> None:
@ -256,6 +261,38 @@ class LuaDecomp:
else:
return self.__getReg(rk)
# walk & peak ahead NEWTABLE
def __parseNewTable(self, indx: int):
# TODO: parse SETTABLE too?
tblOps = [Opcodes.LOADK, Opcodes.SETLIST]
instr = self.__getNextInstr()
cachedRegs = {}
tbl = "{"
while instr.opcode in tblOps:
if instr.opcode == Opcodes.LOADK: # operate on registers
cachedRegs[instr.A] = self.chunk.getConstant(instr.B).toCode()
elif instr.opcode == Opcodes.SETLIST:
numElems = instr.B
for i in range(numElems):
tbl += "%s, " % cachedRegs[instr.A + i + 1]
del cachedRegs[instr.A + i + 1]
self.pc += 1
instr = self.__getNextInstr()
tbl += "}"
# i use forceLocal here even though i don't know *for sure* that the register is a local.
# this does help later though if the table is reused (which is 99% of the time). the other 1%
# only affects syntax and may look a little weird but is fine and equivalent non-the-less
self.__setReg(indx, tbl, forceLocal=True)
self.__endStatement()
# if we have leftovers... oops, set those
for i, v in cachedRegs.items():
self.__setReg(i, v)
def parseInstr(self):
instr = self.__getCurrInstr()
@ -280,6 +317,8 @@ class LuaDecomp:
elif instr.opcode == Opcodes.SETTABLE:
self.__addExpr(self.__getReg(instr.A) + "[" + self.__readRK(instr.B) + "] = " + self.__readRK(instr.C))
self.__endStatement()
elif instr.opcode == Opcodes.NEWTABLE:
self.__parseNewTable(instr.A)
elif instr.opcode == Opcodes.ADD:
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " + ")
elif instr.opcode == Opcodes.SUB:
@ -297,7 +336,7 @@ class LuaDecomp:
elif instr.opcode == Opcodes.NOT:
self.__setReg(instr.A, "not " + self.__getReg(instr.B))
elif instr.opcode == Opcodes.LEN:
self.__setReg(instr.A, "#" + self.__getCurrInstr(instr.B))
self.__setReg(instr.A, "#" + self.__getReg(instr.B))
elif instr.opcode == Opcodes.CONCAT:
count = instr.C-instr.B+1
concatStr = ""
@ -353,5 +392,21 @@ class LuaDecomp:
elif instr.opcode == Opcodes.FORPREP:
self.__addExpr("for %s = %s, %s, %s " % (self.__getLocal(instr.A+3), self.__getReg(instr.A), self.__getReg(instr.A + 1), self.__getReg(instr.A + 2)))
self.__startScope("do", self.pc, instr.B)
elif instr.opcode == Opcodes.SETLIST:
# LFIELDS_PER_FLUSH (50) is the number of elements that *should* have been set in the list in the *last* SETLIST
# eg.
# [ 49] LOADK : R[49] K[1] ; load 0.0 into R[49]
# [ 50] LOADK : R[50] K[1] ; load 0.0 into R[50]
# [ 51] SETLIST : 0 50 1 ; sets list[1..50]
# [ 52] LOADK : R[1] K[1] ; load 0.0 into R[1]
# [ 53] SETLIST : 0 1 2 ; sets list[51..51]
numElems = instr.B
startAt = ((instr.C - 1) * 50)
ident = self.__getLocal(instr.A)
# set each index (TODO: make tables less verbose)
for i in range(numElems):
self.__addExpr("%s[%d] = %s" % (ident, (startAt + i + 1), self.__getReg(instr.A + i + 1)))
self.__endStatement()
else:
raise Exception("unsupported instruction: %s" % instr.toString())

View File

@ -152,7 +152,7 @@ class Constant:
self.data = data
def toString(self):
return "[" + self.type.name + "] " + str(self.data)
return "[%s] %s" % (self.type.name, str(self.data))
# format the constant so that it is parsable by lua
def toCode(self):
@ -164,7 +164,7 @@ class Constant:
else:
return "false"
elif self.type == ConstType.NUMBER:
return str(self.data)
return "%g" % self.data
else:
return "nil"