ported refactoring from LuaDecompy

This commit is contained in:
CPunch 2022-09-01 15:12:21 -05:00
parent 379efb0f82
commit 63756b3bcd

462
luac.py
View File

@ -1,7 +1,7 @@
''' '''
Luac.py l(un)dump.py
A Lua5.1 cross-platform bytecode deserializer. This module pulls int and size_t sizes from the A Lua5.1 cross-platform bytecode deserializer && serializer. This module pulls int and size_t sizes from the
chunk header, meaning it should be able to deserialize lua bytecode dumps from most platforms, chunk header, meaning it should be able to deserialize lua bytecode dumps from most platforms,
regardless of the host machine. regardless of the host machine.
@ -18,12 +18,66 @@ class InstructionType(Enum):
ABx = auto(), ABx = auto(),
AsBx = auto() AsBx = auto()
class Opcodes(IntEnum):
MOVE = 0,
LOADK = 1,
LOADBOOL = 2,
LOADNIL = 3,
GETUPVAL = 4,
GETGLOBAL = 5,
GETTABLE = 6,
SETGLOBAL = 7,
SETUPVAL = 8,
SETTABLE = 9,
NEWTABLE = 10,
SELF = 11,
ADD = 12,
SUB = 13,
MUL = 14,
DIV = 15,
MOD = 16,
POW = 17,
UNM = 18,
NOT = 19,
LEN = 20,
CONCAT = 21,
JMP = 22,
EQ = 23,
LT = 24,
LE = 25,
TEST = 26,
TESTSET = 27,
CALL = 28,
TAILCALL = 29,
RETURN = 30,
FORLOOP = 31,
FORPREP = 32,
TFORLOOP = 33,
SETLIST = 34,
CLOSE = 35,
CLOSURE = 36,
VARARG = 37
class ConstType(IntEnum): class ConstType(IntEnum):
NIL = 0, NIL = 0,
BOOL = 1, BOOL = 1,
NUMBER = 3, NUMBER = 3,
STRING = 4, STRING = 4,
_RKBCInstr = [Opcodes.SETTABLE, Opcodes.ADD, Opcodes.SUB, Opcodes.MUL, Opcodes.DIV, Opcodes.MOD, Opcodes.POW, Opcodes.EQ, Opcodes.LT]
_RKCInstr = [Opcodes.GETTABLE, Opcodes.SELF]
_KBx = [Opcodes.LOADK, Opcodes.GETGLOBAL, Opcodes.SETGLOBAL]
_LUAMAGIC = b'\x1bLua'
# is an 'RK' value a K? (result is true for K, false for R)
def whichRK(rk: int):
return (rk & (1 << 8)) > 0
# read an RK as a K
def readRKasK(rk: int):
return (rk & ~(1 << 8))
class Instruction: class Instruction:
def __init__(self, type: InstructionType, name: str) -> None: def __init__(self, type: InstructionType, name: str) -> None:
self.type = type self.type = type
@ -33,27 +87,89 @@ class Instruction:
self.B: int = None self.B: int = None
self.C: int = None self.C: int = None
# 'RK's are special in because can be a register or a konstant. a bitflag is read to determine which
def __formatRK(self, rk: int) -> str:
if whichRK(rk):
return "K[" + str(readRKasK(rk)) + "]"
else:
return "R[" + str(rk) + "]"
def toString(self): def toString(self):
instr = "%10s" % self.name instr = "%10s" % self.name
regs = "" regs = ""
if self.type == InstructionType.ABC: if self.type == InstructionType.ABC:
regs = "%d %d %d" % (self.A, self.B, self.C) # by default, treat them as registers
A = "%d" % self.A
B = "%d" % self.B
C = "%d" % self.C
# these opcodes have RKs for B & C
if self.opcode in _RKBCInstr:
A = "R[%d]" % self.A
B = self.__formatRK(self.B)
C = self.__formatRK(self.C)
elif self.opcode in _RKCInstr: # just for C
A = "R[%d]" % self.A
C = self.__formatRK(self.C)
regs = "%6s %6s %6s" % (A, B, C)
elif self.type == InstructionType.ABx or self.type == InstructionType.AsBx: elif self.type == InstructionType.ABx or self.type == InstructionType.AsBx:
regs = "%d %d" % (self.A, self.B) A = "R[%d]" % self.A
B = "%d" % self.B
if self.opcode in _KBx:
B = "K[%d]" % self.B
regs = "%6s %6s" % (A, B)
return "%s : %s" % (instr, regs) return "%s : %s" % (instr, regs)
def getAnnotation(self, chunk):
if self.opcode == Opcodes.MOVE:
return "move R[%d] into R[%d]" % (self.B, self.A)
elif self.opcode == Opcodes.LOADK:
return "load %s into R[%d]" % (chunk.getConstant(self.B).toCode(), self.A)
elif self.opcode == Opcodes.GETGLOBAL:
return 'move _G[%s] into R[%d]' % (chunk.getConstant(self.B).toCode(), self.A)
elif self.opcode == Opcodes.ADD:
return 'add %s to %s, place into R[%d]' % (self.__formatRK(self.C), self.__formatRK(self.B), self.A)
elif self.opcode == Opcodes.SUB:
return 'sub %s from %s, place into R[%d]' % (self.__formatRK(self.C), self.__formatRK(self.B), self.A)
elif self.opcode == Opcodes.MUL:
return 'mul %s to %s, place into R[%d]' % (self.__formatRK(self.C), self.__formatRK(self.B), self.A)
elif self.opcode == Opcodes.DIV:
return 'div %s from %s, place into R[%d]' % (self.__formatRK(self.C), self.__formatRK(self.B), self.A)
elif self.opcode == Opcodes.CONCAT:
count = self.C - self.B + 1
return "concat %d values from R[%d] to R[%d], store into R[%d]" % (count, self.B, self.C, self.A)
else:
return ""
class Constant: class Constant:
def __init__(self, type: ConstType, data) -> None: def __init__(self, type: ConstType, data) -> None:
self.type = type self.type = type
self.data = data self.data = data
def toString(self): def toString(self):
return "[" + self.type.name + "] " + str(self.data) return "[%s] %s" % (self.type.name, str(self.data))
# format the constant so that it is parsable by lua
def toCode(self):
if self.type == ConstType.STRING:
return "\"" + self.data + "\""
elif self.type == ConstType.BOOL:
if self.data:
return "true"
else:
return "false"
elif self.type == ConstType.NUMBER:
return "%g" % self.data
else:
return "nil"
class Local: class Local:
def __init(self, name: str, start: int, end: int): def __init__(self, name: str, start: int, end: int):
self.name = name self.name = name
self.start = start self.start = start
self.end = end self.end = end
@ -73,6 +189,7 @@ class Chunk:
self.maxStack: int = 0 self.maxStack: int = 0
self.upvalues: list[str] = [] self.upvalues: list[str] = []
self.lineNums: list[int] = []
self.locals: list[Local] = [] self.locals: list[Local] = []
def appendInstruction(self, instr: Instruction): def appendInstruction(self, instr: Instruction):
@ -84,16 +201,40 @@ class Chunk:
def appendProto(self, proto): def appendProto(self, proto):
self.protos.append(proto) self.protos.append(proto)
def appendLine(self, line: int):
self.lineNums.append(line)
def appendLocal(self, local: Local):
self.locals.append(local)
def appendUpval(self, upval: str):
self.upvalues.append(upval)
def findLocal(self, pc: int) -> Local:
for l in self.locals:
if l.start <= pc and l.end >= pc:
return l
# there's no local information (may have been stripped)
return None
def getConstant(self, indx: int) -> Constant:
return self.constants[indx]
def print(self): def print(self):
print("\n==== [[" + str(self.name) + "'s constants]] ====\n") print("\n==== [[" + str(self.name) + "'s constants]] ====\n")
for z in range(len(self.constants)): for i in range(len(self.constants)):
i = self.constants[z] print("%d: %s" % (i, self.constants[i].toString()))
print(str(z) + ": " + i.toString())
print("\n==== [[" + str(self.name) + "'s locals]] ====\n")
for i in range(len(self.locals)):
print("R[%d]: %s" % (i, self.locals[i].name))
print("\n==== [[" + str(self.name) + "'s dissassembly]] ====\n") print("\n==== [[" + str(self.name) + "'s dissassembly]] ====\n")
for i in range(len(self.instructions)): for i in range(len(self.instructions)):
print("[%3d] %s" % (i, self.instructions[i].toString())) print("[%3d] %-40s ; %s" % (i, self.instructions[i].toString(), self.instructions[i].getAnnotation(self)))
if len(self.protos) > 0:
print("\n==== [[" + str(self.name) + "'s protos]] ====\n") print("\n==== [[" + str(self.name) + "'s protos]] ====\n")
for z in self.protos: for z in self.protos:
z.print() z.print()
@ -115,19 +256,56 @@ instr_lookup_tbl = [
] ]
# at [p]osition, with [s]ize of bits # at [p]osition, with [s]ize of bits
def _get_bits(num: int, p: int, s: int): def get_bits(num: int, p: int, s: int):
return (num>>p) & (~((~0)<<s)) return (num>>p) & (~((~0)<<s))
# set bits from data to num at [p]osition, with [s]ize of bits
def set_bits(num, data, p, s) -> int:
return (num & (~((~((~0)<<s))<<p))) | ((data << p) & ((~((~0)<<s))<<p))
def _decode_instr(data: int) -> Instruction:
opcode = get_bits(data, 0, 6)
template = instr_lookup_tbl[opcode]
instr = Instruction(template.type, template.name)
# i read the lopcodes.h file to get these bit position and sizes.
instr.opcode = opcode
instr.A = get_bits(data, 6, 8) # starts after POS_OP + SIZE_OP (6), with a size of 8
if instr.type == InstructionType.ABC:
instr.B = get_bits(data, 23, 9) # starts after POS_C + SIZE_C (23), with a size of 9
instr.C = get_bits(data, 14, 9) # starts after POS_A + SIZE_A (14), with a size of 9
elif instr.type == InstructionType.ABx:
instr.B = get_bits(data, 14, 18) # starts after POS_A + SIZE_A (14), with a size of 18
elif instr.type == InstructionType.AsBx:
instr.B = get_bits(data, 14, 18) - 131071 # Bx is now signed, so just sub half of the MAX_UINT for 18 bits
return instr
# returns a u32 instruction
def _encode_instr(instr: Instruction) -> int:
data = 0
# encode instruction (basically, do the inverse of _decode_instr)
data = set_bits(data, instr.opcode, 0, 6)
data = set_bits(data, instr.A, 6, 8)
if instr.type == InstructionType.ABC:
data = set_bits(data, instr.B, 23, 9)
data = set_bits(data, instr.C, 14, 9)
elif instr.type == InstructionType.ABx:
data = set_bits(data, instr.B, 14, 18)
elif instr.type == InstructionType.AsBx:
data = set_bits(data, instr.B + 131071, 14, 18)
return data
class LuaUndump: class LuaUndump:
def __init__(self): def __init__(self):
self.rootChunk: Chunk = None self.rootChunk: Chunk = None
self.index = 0 self.index = 0
@staticmethod def _loadBlock(self, sz) -> bytearray:
def dis_chunk(chunk: Chunk):
chunk.print()
def loadBlock(self, sz) -> bytearray:
if self.index + sz > len(self.bytecode): if self.index + sz > len(self.bytecode):
raise Exception("Malformed bytecode!") raise Exception("Malformed bytecode!")
@ -135,99 +313,71 @@ class LuaUndump:
self.index = self.index + sz self.index = self.index + sz
return temp return temp
def get_byte(self) -> int: def _get_byte(self) -> int:
return self.loadBlock(1)[0] return self._loadBlock(1)[0]
def get_int32(self) -> int: def _get_uint32(self) -> int:
if (self.big_endian): order = 'big' if self.big_endian else 'little'
return int.from_bytes(self.loadBlock(4), byteorder='big', signed=False) return int.from_bytes(self._loadBlock(4), byteorder=order, signed=False)
else:
return int.from_bytes(self.loadBlock(4), byteorder='little', signed=False)
def get_int(self) -> int: def _get_uint(self) -> int:
if (self.big_endian): order = 'big' if self.big_endian else 'little'
return int.from_bytes(self.loadBlock(self.int_size), byteorder='big', signed=False) return int.from_bytes(self._loadBlock(self.int_size), byteorder=order, signed=False)
else:
return int.from_bytes(self.loadBlock(self.int_size), byteorder='little', signed=False)
def get_size_t(self) -> int: def _get_size_t(self) -> int:
if (self.big_endian): order = 'big' if self.big_endian else 'little'
return int.from_bytes(self.loadBlock(self.size_t), byteorder='big', signed=False) return int.from_bytes(self._loadBlock(self.size_t), byteorder=order, signed=False)
else:
return int.from_bytes(self.loadBlock(self.size_t), byteorder='little', signed=False)
def get_double(self) -> int: def _get_double(self) -> int:
if self.big_endian: order = '>d' if self.big_endian else '<d'
return struct.unpack('>d', self.loadBlock(8))[0] return struct.unpack(order, self._loadBlock(self.l_number_size))[0]
else:
return struct.unpack('<d', self.loadBlock(8))[0]
def get_string(self, size) -> str: def _get_string(self) -> str:
if (size == None): size = self._get_size_t()
size = self.get_size_t()
if (size == 0): if (size == 0):
return "" return ""
return "".join(chr(x) for x in self.loadBlock(size)) # [:-1] to remove the NULL terminator
return ("".join(chr(x) for x in self._loadBlock(size)))[:-1]
def decode_chunk(self): def decode_chunk(self) -> Chunk:
chunk = Chunk() chunk = Chunk()
chunk.name = self.get_string(None) # chunk meta info
chunk.frst_line = self.get_int() chunk.name = self._get_string()
chunk.last_line = self.get_int() chunk.frst_line = self._get_uint()
chunk.last_line = self._get_uint()
chunk.numUpvals = self.get_byte() chunk.numUpvals = self._get_byte()
chunk.numParams = self.get_byte() chunk.numParams = self._get_byte()
chunk.isVarg = (self.get_byte() != 0) chunk.isVarg = (self._get_byte() != 0)
chunk.maxStack = self.get_byte() chunk.maxStack = self._get_byte()
if (not chunk.name == ""):
chunk.name = chunk.name[1:-1]
# parse instructions # parse instructions
num = self.get_int() num = self._get_uint()
for i in range(num): for i in range(num):
data = self.get_int32() chunk.appendInstruction(_decode_instr(self._get_uint32()))
opcode = _get_bits(data, 0, 6)
template = instr_lookup_tbl[opcode]
instruction = Instruction(template.type, template.name)
# i read the lopcodes.h file to get these bit position and sizes.
instruction.opcode = opcode
instruction.A = _get_bits(data, 6, 8) # starts after POS_OP + SIZE_OP (6), with a size of 8
if instruction.type == InstructionType.ABC:
instruction.B = _get_bits(data, 23, 9) # starts after POS_C + SIZE_C (23), with a size of 9
instruction.C = _get_bits(data, 14, 9) # starts after POS_A + SIZE_A (14), with a size of 9
elif instruction.type == InstructionType.ABx:
instruction.B = _get_bits(data, 14, 18) # starts after POS_A + SIZE_A (14), with a size of 18
elif instruction.type == InstructionType.AsBx:
instruction.B = _get_bits(data, 14, 18) - 131071 # Bx is now signed, so just sub half of the MAX_UINT for 18 bits
chunk.appendInstruction(instruction)
# get constants # get constants
num = self.get_int() num = self._get_uint()
for i in range(num): for i in range(num):
constant: Constant = None constant: Constant = None
type = self.get_byte() type = self._get_byte()
if type == 0: # nil if type == 0: # nil
constant = Constant(ConstType.NIL, None) constant = Constant(ConstType.NIL, None)
elif type == 1: # bool elif type == 1: # bool
constant = Constant(ConstType.BOOL, (self.get_byte() != 0)) constant = Constant(ConstType.BOOL, (self._get_byte() != 0))
elif type == 3: # number elif type == 3: # number
constant = Constant(ConstType.NUMBER, self.get_double()) constant = Constant(ConstType.NUMBER, self._get_double())
elif type == 4: # string elif type == 4: # string
constant = Constant(ConstType.STRING, self.get_string(None)[:-1]) constant = Constant(ConstType.STRING, self._get_string())
else: else:
raise Exception("Unknown Datatype! [%d]" % type) raise Exception("Unknown Datatype! [%d]" % type)
chunk.appendConstant(constant) chunk.appendConstant(constant)
# parse protos # parse protos
num = self.get_int() num = self._get_uint()
for i in range(num): for i in range(num):
chunk.appendProto(self.decode_chunk()) chunk.appendProto(self.decode_chunk())
@ -235,27 +385,28 @@ class LuaUndump:
# eh, for now just consume the bytes. # eh, for now just consume the bytes.
# line numbers # line numbers
num = self.get_int() num = self._get_uint()
for i in range(num): for i in range(num):
self.get_int() self._get_uint()
# locals # locals
num = self.get_int() num = self._get_uint()
for i in range(num): for i in range(num):
self.get_string(None)[:-1] # local name name = self._get_string() # local name
self.get_int() # local start PC start = self._get_uint() # local start PC
self.get_int() # local end PC end = self._get_uint() # local end PC
chunk.appendLocal(Local(name, start, end))
# upvalues # upvalues
num = self.get_int() num = self._get_uint()
for i in range(num): for i in range(num):
self.get_string(None) # upvalue name chunk.appendUpval(self._get_string()) # upvalue name
return chunk return chunk
def decode_rawbytecode(self, rawbytecode): def decode_rawbytecode(self, rawbytecode):
# bytecode sanity checks # bytecode sanity checks
if not rawbytecode[0:4] == b'\x1bLua': if not rawbytecode[0:4] == _LUAMAGIC:
raise Exception("Lua Bytecode expected!") raise Exception("Lua Bytecode expected!")
bytecode = array.array('b', rawbytecode) bytecode = array.array('b', rawbytecode)
@ -267,14 +418,14 @@ class LuaUndump:
# aligns index, skips header # aligns index, skips header
self.index = 4 self.index = 4
self.vm_version = self.get_byte() self.vm_version = self._get_byte()
self.bytecode_format = self.get_byte() self.bytecode_format = self._get_byte()
self.big_endian = (self.get_byte() == 0) self.big_endian = (self._get_byte() == 0)
self.int_size = self.get_byte() self.int_size = self._get_byte()
self.size_t = self.get_byte() self.size_t = self._get_byte()
self.instr_size = self.get_byte() # gets size of instructions self.instr_size = self._get_byte() # gets size of instructions
self.l_number_size = self.get_byte() # size of lua_Number self.l_number_size = self._get_byte() # size of lua_Number
self.integral_flag = self.get_byte() self.integral_flag = self._get_byte() # is lua_Number defined as an int? false = float/double, true = int/long/short/etc.
self.rootChunk = self.decode_chunk() self.rootChunk = self.decode_chunk()
return self.rootChunk return self.rootChunk
@ -285,5 +436,122 @@ class LuaUndump:
return self.decode_rawbytecode(bytecode) return self.decode_rawbytecode(bytecode)
def print_dissassembly(self): def print_dissassembly(self):
LuaUndump.dis_chunk(self.rootChunk) self.rootChunk.print()
class LuaDump:
def __init__(self, rootChunk: Chunk):
self.rootChunk = rootChunk
self.bytecode = bytearray()
# header info
self.vm_version = 0x51
self.bytecode_format = 0x00
self.big_endian = False
# data sizes
self.int_size = 4
self.size_t = 8
self.instr_size = 4
self.l_number_size = 8
self.integral_flag = False # lua_Number is a double
def _writeBlock(self, data: bytes):
self.bytecode += bytearray(data)
def _set_byte(self, b: int):
self.bytecode.append(b)
def _set_uint32(self, i: int):
order = 'big' if self.big_endian else 'little'
self._writeBlock(i.to_bytes(4, order, signed=False))
def _set_uint(self, i: int):
order = 'big' if self.big_endian else 'little'
self._writeBlock(i.to_bytes(self.int_size, order, signed=False))
def _set_size_t(self, i: int):
order = 'big' if self.big_endian else 'little'
self._writeBlock(i.to_bytes(self.size_t, order, signed=False))
def _set_double(self, f: float):
order = '>d' if self.big_endian else '<d'
self._writeBlock(struct.pack(order, f))
def _set_string(self, string: str):
self._set_size_t(len(string)+1)
self._writeBlock(string.encode('utf-8'))
self._set_byte(0x00) # write null terminator
def _dumpChunk(self, chunk: Chunk):
# write meta info
self._set_string(chunk.name)
self._set_uint(chunk.frst_line)
self._set_uint(chunk.last_line)
self._set_byte(chunk.numUpvals)
self._set_byte(chunk.numParams)
self._set_byte(1 if chunk.isVarg else 1)
self._set_byte(chunk.maxStack)
# write instructions
self._set_uint(len(chunk.instructions))
for l in chunk.instructions:
self._set_uint32(_encode_instr(l))
# write constants
self._set_uint(len(chunk.constants))
for constant in chunk.constants:
# write constant data
if constant.type == ConstType.NIL:
self._set_byte(0)
elif constant.type == ConstType.BOOL:
self._set_byte(1)
self._set_byte(1 if constant.data else 0)
elif constant.type == ConstType.NUMBER: # number
self._set_byte(3)
self._set_double(constant.data)
elif constant.type == ConstType.STRING: # string
self._set_byte(4)
self._set_string(constant.data)
else:
raise Exception("Unknown Datatype! [%s]" % str(constant.type))
# write child protos
self._set_uint(len(chunk.protos))
for p in chunk.protos:
self._dumpChunk(p)
# write line numbers
self._set_uint(len(chunk.lineNums))
for l in chunk.lineNums:
self._set_uint(l)
# write locals
self._set_uint(len(chunk.locals))
for l in chunk.locals:
self._set_string(l.name)
self._set_uint(l.start)
self._set_uint(l.end)
# write upvals
self._set_uint(len(chunk.upvalues))
for u in chunk.upvalues:
self._set_string(u)
def _dumpHeader(self):
self._writeBlock(_LUAMAGIC)
# write header info
self._set_byte(self.vm_version)
self._set_byte(self.bytecode_format)
self._set_byte(0 if self.big_endian else 1)
self._set_byte(self.int_size)
self._set_byte(self.size_t)
self._set_byte(self.instr_size)
self._set_byte(self.l_number_size)
self._set_byte(self.integral_flag)
def dump(self) -> bytearray:
self._dumpHeader()
self._dumpChunk(self.rootChunk)
return self.bytecode