Compare commits

...

4 Commits

Author SHA1 Message Date
6c98c3c5a0 lundump: prettier instruction annotations 2023-12-25 17:54:46 -06:00
df8e9f7e83 refactoring: switched to match/case
wow! python actually added switch cases! too bad this is just syntax sugar...
2023-12-09 12:01:04 -06:00
a22aa808e0 lp: added support for OP_TEST 2022-08-26 01:18:24 -05:00
935844f274 more minor refactoring 2022-08-22 00:59:21 -05:00
3 changed files with 141 additions and 131 deletions

View File

@@ -231,7 +231,8 @@ class LuaDecomp:
def __emitOperand(self, a: int, b: str, c: str, op: str) -> None: def __emitOperand(self, a: int, b: str, c: str, op: str) -> None:
self.__setReg(a, "(" + b + op + c + ")") self.__setReg(a, "(" + b + op + c + ")")
def __compJmp(self, op: str): # handles conditional jumps
def __condJmp(self, op: str, rkBC: bool = True):
instr = self.__getCurrInstr() instr = self.__getCurrInstr()
jmpType = "if" jmpType = "if"
scopeStart = "then" scopeStart = "then"
@@ -254,7 +255,13 @@ class LuaDecomp:
self.__addExpr("%s not " % jmpType) self.__addExpr("%s not " % jmpType)
else: else:
self.__addExpr("%s " % jmpType) self.__addExpr("%s " % jmpType)
self.__addExpr(self.__readRK(instr.B) + op + self.__readRK(instr.C) + " ")
# write actual comparison
if rkBC:
self.__addExpr(self.__readRK(instr.B) + op + self.__readRK(instr.C) + " ")
else: # just testing rkB
self.__addExpr(op + self.__readRK(instr.B))
self.pc += 1 # skip next instr self.pc += 1 # skip next instr
if scopeStart: if scopeStart:
self.__startScope("%s " % scopeStart, self.pc - 1, jmp) self.__startScope("%s " % scopeStart, self.pc - 1, jmp)
@@ -315,120 +322,125 @@ class LuaDecomp:
def parseInstr(self): def parseInstr(self):
instr = self.__getCurrInstr() instr = self.__getCurrInstr()
# python, add switch statements *please* match instr.opcode:
if instr.opcode == Opcodes.MOVE: # move is a fake ABC instr, C is ignored case Opcodes.MOVE: # move is a fake ABC instr, C is ignored
# move registers # move registers
self.__setReg(instr.A, self.__getReg(instr.B)) self.__setReg(instr.A, self.__getReg(instr.B))
elif instr.opcode == Opcodes.LOADK: case Opcodes.LOADK:
self.__setReg(instr.A, self.chunk.getConstant(instr.B).toCode()) self.__setReg(instr.A, self.chunk.getConstant(instr.B).toCode())
elif instr.opcode == Opcodes.LOADBOOL: case Opcodes.LOADBOOL:
if instr.B == 0: if instr.B == 0:
self.__setReg(instr.A, "false") self.__setReg(instr.A, "false")
else: else:
self.__setReg(instr.A, "true") self.__setReg(instr.A, "true")
elif instr.opcode == Opcodes.GETGLOBAL: case Opcodes.GETGLOBAL:
self.__setReg(instr.A, self.chunk.getConstant(instr.B).data) self.__setReg(instr.A, self.chunk.getConstant(instr.B).data)
elif instr.opcode == Opcodes.GETTABLE: case Opcodes.GETTABLE:
self.__setReg(instr.A, self.__getReg(instr.B) + "[" + self.__readRK(instr.C) + "]") self.__setReg(instr.A, self.__getReg(instr.B) + "[" + self.__readRK(instr.C) + "]")
elif instr.opcode == Opcodes.SETGLOBAL: case Opcodes.SETGLOBAL:
self.__addExpr(self.chunk.getConstant(instr.B).data + " = " + self.__getReg(instr.A)) self.__addExpr(self.chunk.getConstant(instr.B).data + " = " + self.__getReg(instr.A))
self.__endStatement()
elif instr.opcode == Opcodes.SETTABLE:
self.__addExpr(self.__getReg(instr.A) + "[" + self.__readRK(instr.B) + "] = " + self.__readRK(instr.C))
self.__endStatement()
elif instr.opcode == Opcodes.NEWTABLE:
self.__parseNewTable(instr.A)
elif instr.opcode == Opcodes.ADD:
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " + ")
elif instr.opcode == Opcodes.SUB:
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " - ")
elif instr.opcode == Opcodes.MUL:
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " * ")
elif instr.opcode == Opcodes.DIV:
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " / ")
elif instr.opcode == Opcodes.MOD:
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " % ")
elif instr.opcode == Opcodes.POW:
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " ^ ")
elif instr.opcode == Opcodes.UNM:
self.__setReg(instr.A, "-" + self.__getReg(instr.B))
elif instr.opcode == Opcodes.NOT:
self.__setReg(instr.A, "not " + self.__getReg(instr.B))
elif instr.opcode == Opcodes.LEN:
self.__setReg(instr.A, "#" + self.__getReg(instr.B))
elif instr.opcode == Opcodes.CONCAT:
count = instr.C-instr.B+1
concatStr = ""
# concat all items on stack from RC to RB
for i in range(count):
concatStr += self.__getReg(instr.B + i) + (" .. " if not i == count - 1 else "")
self.__setReg(instr.A, concatStr)
elif instr.opcode == Opcodes.JMP:
pass
elif instr.opcode == Opcodes.EQ:
self.__compJmp(" == ")
elif instr.opcode == Opcodes.LT:
self.__compJmp(" < ")
elif instr.opcode == Opcodes.LE:
self.__compJmp(" <= ")
elif instr.opcode == Opcodes.CALL:
preStr = ""
callStr = ""
ident = ""
# parse arguments
callStr += self.__getReg(instr.A) + "("
for i in range(instr.A + 1, instr.A + instr.B):
callStr += self.__getReg(i) + (", " if not i + 1 == instr.A + instr.B else "")
callStr += ")"
# parse return values
if instr.C > 1:
preStr = "local "
for indx in range(instr.A, instr.A + instr.C - 1):
if indx in self.locals:
ident = self.locals[indx]
else:
ident = self.__makeLocalIdentifier(indx)
preStr += ident
# normally setReg() does this
self.top[indx] = ident
# just so we don't have a trailing ', '
preStr += ", " if not indx == instr.A + instr.C - 2 else ""
preStr += " = "
self.__addExpr(preStr + callStr)
self.__endStatement()
elif instr.opcode == Opcodes.RETURN:
self.__endStatement()
pass # no-op for now
elif instr.opcode == Opcodes.FORLOOP:
pass # no-op for now
elif instr.opcode == Opcodes.FORPREP:
self.__addExpr("for %s = %s, %s, %s " % (self.__getLocal(instr.A+3), self.__getReg(instr.A), self.__getReg(instr.A + 1), self.__getReg(instr.A + 2)))
self.__startScope("do", self.pc, instr.B)
elif instr.opcode == Opcodes.SETLIST:
# LFIELDS_PER_FLUSH (50) is the number of elements that *should* have been set in the list in the *last* SETLIST
# eg.
# [ 49] LOADK : R[49] K[1] ; load 0.0 into R[49]
# [ 50] LOADK : R[50] K[1] ; load 0.0 into R[50]
# [ 51] SETLIST : 0 50 1 ; sets list[1..50]
# [ 52] LOADK : R[1] K[1] ; load 0.0 into R[1]
# [ 53] SETLIST : 0 1 2 ; sets list[51..51]
numElems = instr.B
startAt = ((instr.C - 1) * 50)
ident = self.__getLocal(instr.A)
# set each index (TODO: make tables less verbose)
for i in range(numElems):
self.__addExpr("%s[%d] = %s" % (ident, (startAt + i + 1), self.__getReg(instr.A + i + 1)))
self.__endStatement() self.__endStatement()
elif instr.opcode == Opcodes.CLOSURE: case Opcodes.SETTABLE:
proto = LuaDecomp(self.chunk.protos[instr.B], headChunk=False, scopeOffset=len(self.scope)) self.__addExpr(self.__getReg(instr.A) + "[" + self.__readRK(instr.B) + "] = " + self.__readRK(instr.C))
self.__setReg(instr.A, proto.getPseudoCode()) self.__endStatement()
else: case Opcodes.NEWTABLE:
raise Exception("unsupported instruction: %s" % instr.toString()) self.__parseNewTable(instr.A)
case Opcodes.ADD:
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " + ")
case Opcodes.SUB:
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " - ")
case Opcodes.MUL:
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " * ")
case Opcodes.DIV:
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " / ")
case Opcodes.MOD:
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " % ")
case Opcodes.POW:
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " ^ ")
case Opcodes.UNM:
self.__setReg(instr.A, "-" + self.__getReg(instr.B))
case Opcodes.NOT:
self.__setReg(instr.A, "not " + self.__getReg(instr.B))
case Opcodes.LEN:
self.__setReg(instr.A, "#" + self.__getReg(instr.B))
case Opcodes.CONCAT:
count = instr.C-instr.B+1
concatStr = ""
# concat all items on stack from RC to RB
for i in range(count):
concatStr += self.__getReg(instr.B + i) + (" .. " if not i == count - 1 else "")
self.__setReg(instr.A, concatStr)
case Opcodes.JMP:
pass
case Opcodes.EQ:
self.__condJmp(" == ")
case Opcodes.LT:
self.__condJmp(" < ")
case Opcodes.LE:
self.__condJmp(" <= ")
case Opcodes.TEST:
if instr.C == 0:
self.__condJmp("", False)
else:
self.__condJmp("not ", False)
case Opcodes.CALL:
preStr = ""
callStr = ""
ident = ""
# parse arguments
callStr += self.__getReg(instr.A) + "("
for i in range(instr.A + 1, instr.A + instr.B):
callStr += self.__getReg(i) + (", " if not i + 1 == instr.A + instr.B else "")
callStr += ")"
# parse return values
if instr.C > 1:
preStr = "local "
for indx in range(instr.A, instr.A + instr.C - 1):
if indx in self.locals:
ident = self.locals[indx]
else:
ident = self.__makeLocalIdentifier(indx)
preStr += ident
# normally setReg() does this
self.top[indx] = ident
# just so we don't have a trailing ', '
preStr += ", " if not indx == instr.A + instr.C - 2 else ""
preStr += " = "
self.__addExpr(preStr + callStr)
self.__endStatement()
case Opcodes.RETURN:
self.__endStatement()
pass # no-op for now
case Opcodes.FORLOOP:
pass # no-op for now
case Opcodes.FORPREP:
self.__addExpr("for %s = %s, %s, %s " % (self.__getLocal(instr.A+3), self.__getReg(instr.A), self.__getReg(instr.A + 1), self.__getReg(instr.A + 2)))
self.__startScope("do", self.pc, instr.B)
case Opcodes.SETLIST:
# LFIELDS_PER_FLUSH (50) is the number of elements that *should* have been set in the list in the *last* SETLIST
# eg.
# [ 49] LOADK : R[49] K[1] ; load 0.0 into R[49]
# [ 50] LOADK : R[50] K[1] ; load 0.0 into R[50]
# [ 51] SETLIST : 0 50 1 ; sets list[1..50]
# [ 52] LOADK : R[1] K[1] ; load 0.0 into R[1]
# [ 53] SETLIST : 0 1 2 ; sets list[51..51]
numElems = instr.B
startAt = ((instr.C - 1) * 50)
ident = self.__getLocal(instr.A)
# set each index (TODO: make tables less verbose)
for i in range(numElems):
self.__addExpr("%s[%d] = %s" % (ident, (startAt + i + 1), self.__getReg(instr.A + i + 1)))
self.__endStatement()
case Opcodes.CLOSURE:
proto = LuaDecomp(self.chunk.protos[instr.B], headChunk=False, scopeOffset=len(self.scope))
self.__setReg(instr.A, proto.getPseudoCode())
case _:
raise Exception("unsupported instruction: %s" % instr.toString())

View File

@@ -1,7 +1,7 @@
''' '''
l(un)dump.py l(un)dump.py
A Lua5.1 cross-platform bytecode deserializer. This module pulls int and size_t sizes from the A Lua5.1 cross-platform bytecode deserializer && serializer. This module pulls int and size_t sizes from the
chunk header, meaning it should be able to deserialize lua bytecode dumps from most platforms, chunk header, meaning it should be able to deserialize lua bytecode dumps from most platforms,
regardless of the host machine. regardless of the host machine.
@@ -9,11 +9,9 @@
as well as read the lundump.c source file from the Lua5.1 source. as well as read the lundump.c source file from the Lua5.1 source.
''' '''
from multiprocessing.spawn import get_executable
import struct import struct
import array import array
from enum import IntEnum, Enum, auto from enum import IntEnum, Enum, auto
from typing_extensions import Self
class InstructionType(Enum): class InstructionType(Enum):
ABC = auto(), ABC = auto(),
@@ -129,22 +127,21 @@ class Instruction:
def getAnnotation(self, chunk): def getAnnotation(self, chunk):
if self.opcode == Opcodes.MOVE: if self.opcode == Opcodes.MOVE:
return "move R[%d] into R[%d]" % (self.B, self.A) return "R[%d] := R[%d]" % (self.A, self.B)
elif self.opcode == Opcodes.LOADK: elif self.opcode == Opcodes.LOADK:
return "load %s into R[%d]" % (chunk.getConstant(self.B).toCode(), self.A) return "R[%d] := K[%d] (%s)" % (self.A, self.B, chunk.getConstant(self.B).toCode())
elif self.opcode == Opcodes.GETGLOBAL: elif self.opcode == Opcodes.GETGLOBAL:
return 'move _G[%s] into R[%d]' % (chunk.getConstant(self.B).toCode(), self.A) return 'R[%d] := _G[%s]' % (self.A, chunk.getConstant(self.B).toCode())
elif self.opcode == Opcodes.ADD: elif self.opcode == Opcodes.ADD:
return 'add %s to %s, place into R[%d]' % (self.__formatRK(self.C), self.__formatRK(self.B), self.A) return 'R[%d] := %s + %s' % (self.A, self.__formatRK(self.B), self.__formatRK(self.C))
elif self.opcode == Opcodes.SUB: elif self.opcode == Opcodes.SUB:
return 'sub %s from %s, place into R[%d]' % (self.__formatRK(self.C), self.__formatRK(self.B), self.A) return 'R[%d] := %s - %s' % (self.A, self.__formatRK(self.B), self.__formatRK(self.C))
elif self.opcode == Opcodes.MUL: elif self.opcode == Opcodes.MUL:
return 'mul %s to %s, place into R[%d]' % (self.__formatRK(self.C), self.__formatRK(self.B), self.A) return 'R[%d] := %s * %s' % (self.A, self.__formatRK(self.B), self.__formatRK(self.C))
elif self.opcode == Opcodes.DIV: elif self.opcode == Opcodes.DIV:
return 'div %s from %s, place into R[%d]' % (self.__formatRK(self.C), self.__formatRK(self.B), self.A) return 'R[%d] := %s / %s' % (self.A, self.__formatRK(self.B), self.__formatRK(self.C))
elif self.opcode == Opcodes.CONCAT: elif self.opcode == Opcodes.CONCAT:
count = self.C - self.B + 1 return "R[%d] := R[%d] .. R[%d]" % (self.A, self.B, self.C)
return "concat %d values from R[%d] to R[%d], store into R[%d]" % (count, self.B, self.C, self.A)
else: else:
return "" return ""
@@ -478,7 +475,7 @@ class LuaDump:
def _set_double(self, f: float): def _set_double(self, f: float):
order = '>d' if self.big_endian else '<d' order = '>d' if self.big_endian else '<d'
self._writeBlock(struct.pack(order, f)) self._writeBlock(struct.pack(order, f))
def _set_string(self, string: str): def _set_string(self, string: str):
self._set_size_t(len(string)+1) self._set_size_t(len(string)+1)
self._writeBlock(string.encode('utf-8')) self._writeBlock(string.encode('utf-8'))

1
main.py Normal file → Executable file
View File

@@ -1,3 +1,4 @@
#!/usr/bin/env python3
import sys import sys
import lundump import lundump
import lparser import lparser