diff --git a/luac.py b/luac.py index eeee7f7..28f622f 100644 --- a/luac.py +++ b/luac.py @@ -1,7 +1,7 @@ ''' - Luac.py + l(un)dump.py - A Lua5.1 cross-platform bytecode deserializer. This module pulls int and size_t sizes from the + A Lua5.1 cross-platform bytecode deserializer && serializer. This module pulls int and size_t sizes from the chunk header, meaning it should be able to deserialize lua bytecode dumps from most platforms, regardless of the host machine. @@ -18,12 +18,66 @@ class InstructionType(Enum): ABx = auto(), AsBx = auto() +class Opcodes(IntEnum): + MOVE = 0, + LOADK = 1, + LOADBOOL = 2, + LOADNIL = 3, + GETUPVAL = 4, + GETGLOBAL = 5, + GETTABLE = 6, + SETGLOBAL = 7, + SETUPVAL = 8, + SETTABLE = 9, + NEWTABLE = 10, + SELF = 11, + ADD = 12, + SUB = 13, + MUL = 14, + DIV = 15, + MOD = 16, + POW = 17, + UNM = 18, + NOT = 19, + LEN = 20, + CONCAT = 21, + JMP = 22, + EQ = 23, + LT = 24, + LE = 25, + TEST = 26, + TESTSET = 27, + CALL = 28, + TAILCALL = 29, + RETURN = 30, + FORLOOP = 31, + FORPREP = 32, + TFORLOOP = 33, + SETLIST = 34, + CLOSE = 35, + CLOSURE = 36, + VARARG = 37 + class ConstType(IntEnum): NIL = 0, BOOL = 1, NUMBER = 3, STRING = 4, +_RKBCInstr = [Opcodes.SETTABLE, Opcodes.ADD, Opcodes.SUB, Opcodes.MUL, Opcodes.DIV, Opcodes.MOD, Opcodes.POW, Opcodes.EQ, Opcodes.LT] +_RKCInstr = [Opcodes.GETTABLE, Opcodes.SELF] +_KBx = [Opcodes.LOADK, Opcodes.GETGLOBAL, Opcodes.SETGLOBAL] + +_LUAMAGIC = b'\x1bLua' + +# is an 'RK' value a K? (result is true for K, false for R) +def whichRK(rk: int): + return (rk & (1 << 8)) > 0 + +# read an RK as a K +def readRKasK(rk: int): + return (rk & ~(1 << 8)) + class Instruction: def __init__(self, type: InstructionType, name: str) -> None: self.type = type @@ -33,27 +87,89 @@ class Instruction: self.B: int = None self.C: int = None + # 'RK's are special in because can be a register or a konstant. a bitflag is read to determine which + def __formatRK(self, rk: int) -> str: + if whichRK(rk): + return "K[" + str(readRKasK(rk)) + "]" + else: + return "R[" + str(rk) + "]" + def toString(self): instr = "%10s" % self.name regs = "" if self.type == InstructionType.ABC: - regs = "%d %d %d" % (self.A, self.B, self.C) + # by default, treat them as registers + A = "%d" % self.A + B = "%d" % self.B + C = "%d" % self.C + + # these opcodes have RKs for B & C + if self.opcode in _RKBCInstr: + A = "R[%d]" % self.A + B = self.__formatRK(self.B) + C = self.__formatRK(self.C) + elif self.opcode in _RKCInstr: # just for C + A = "R[%d]" % self.A + C = self.__formatRK(self.C) + + regs = "%6s %6s %6s" % (A, B, C) elif self.type == InstructionType.ABx or self.type == InstructionType.AsBx: - regs = "%d %d" % (self.A, self.B) + A = "R[%d]" % self.A + B = "%d" % self.B + + if self.opcode in _KBx: + B = "K[%d]" % self.B + + regs = "%6s %6s" % (A, B) return "%s : %s" % (instr, regs) + def getAnnotation(self, chunk): + if self.opcode == Opcodes.MOVE: + return "move R[%d] into R[%d]" % (self.B, self.A) + elif self.opcode == Opcodes.LOADK: + return "load %s into R[%d]" % (chunk.getConstant(self.B).toCode(), self.A) + elif self.opcode == Opcodes.GETGLOBAL: + return 'move _G[%s] into R[%d]' % (chunk.getConstant(self.B).toCode(), self.A) + elif self.opcode == Opcodes.ADD: + return 'add %s to %s, place into R[%d]' % (self.__formatRK(self.C), self.__formatRK(self.B), self.A) + elif self.opcode == Opcodes.SUB: + return 'sub %s from %s, place into R[%d]' % (self.__formatRK(self.C), self.__formatRK(self.B), self.A) + elif self.opcode == Opcodes.MUL: + return 'mul %s to %s, place into R[%d]' % (self.__formatRK(self.C), self.__formatRK(self.B), self.A) + elif self.opcode == Opcodes.DIV: + return 'div %s from %s, place into R[%d]' % (self.__formatRK(self.C), self.__formatRK(self.B), self.A) + elif self.opcode == Opcodes.CONCAT: + count = self.C - self.B + 1 + return "concat %d values from R[%d] to R[%d], store into R[%d]" % (count, self.B, self.C, self.A) + else: + return "" + class Constant: def __init__(self, type: ConstType, data) -> None: self.type = type self.data = data def toString(self): - return "[" + self.type.name + "] " + str(self.data) + return "[%s] %s" % (self.type.name, str(self.data)) + + # format the constant so that it is parsable by lua + def toCode(self): + if self.type == ConstType.STRING: + return "\"" + self.data + "\"" + elif self.type == ConstType.BOOL: + if self.data: + return "true" + else: + return "false" + elif self.type == ConstType.NUMBER: + return "%g" % self.data + else: + return "nil" class Local: - def __init(self, name: str, start: int, end: int): + def __init__(self, name: str, start: int, end: int): self.name = name self.start = start self.end = end @@ -73,6 +189,7 @@ class Chunk: self.maxStack: int = 0 self.upvalues: list[str] = [] + self.lineNums: list[int] = [] self.locals: list[Local] = [] def appendInstruction(self, instr: Instruction): @@ -84,19 +201,43 @@ class Chunk: def appendProto(self, proto): self.protos.append(proto) + def appendLine(self, line: int): + self.lineNums.append(line) + + def appendLocal(self, local: Local): + self.locals.append(local) + + def appendUpval(self, upval: str): + self.upvalues.append(upval) + + def findLocal(self, pc: int) -> Local: + for l in self.locals: + if l.start <= pc and l.end >= pc: + return l + + # there's no local information (may have been stripped) + return None + + def getConstant(self, indx: int) -> Constant: + return self.constants[indx] + def print(self): print("\n==== [[" + str(self.name) + "'s constants]] ====\n") - for z in range(len(self.constants)): - i = self.constants[z] - print(str(z) + ": " + i.toString()) + for i in range(len(self.constants)): + print("%d: %s" % (i, self.constants[i].toString())) + + print("\n==== [[" + str(self.name) + "'s locals]] ====\n") + for i in range(len(self.locals)): + print("R[%d]: %s" % (i, self.locals[i].name)) print("\n==== [[" + str(self.name) + "'s dissassembly]] ====\n") for i in range(len(self.instructions)): - print("[%3d] %s" % (i, self.instructions[i].toString())) + print("[%3d] %-40s ; %s" % (i, self.instructions[i].toString(), self.instructions[i].getAnnotation(self))) - print("\n==== [[" + str(self.name) + "'s protos]] ====\n") - for z in self.protos: - z.print() + if len(self.protos) > 0: + print("\n==== [[" + str(self.name) + "'s protos]] ====\n") + for z in self.protos: + z.print() instr_lookup_tbl = [ Instruction(InstructionType.ABC, "MOVE"), Instruction(InstructionType.ABx, "LOADK"), Instruction(InstructionType.ABC, "LOADBOOL"), @@ -115,19 +256,56 @@ instr_lookup_tbl = [ ] # at [p]osition, with [s]ize of bits -def _get_bits(num: int, p: int, s: int): +def get_bits(num: int, p: int, s: int): return (num>>p) & (~((~0)< int: + return (num & (~((~((~0)< Instruction: + opcode = get_bits(data, 0, 6) + template = instr_lookup_tbl[opcode] + instr = Instruction(template.type, template.name) + + # i read the lopcodes.h file to get these bit position and sizes. + instr.opcode = opcode + instr.A = get_bits(data, 6, 8) # starts after POS_OP + SIZE_OP (6), with a size of 8 + + if instr.type == InstructionType.ABC: + instr.B = get_bits(data, 23, 9) # starts after POS_C + SIZE_C (23), with a size of 9 + instr.C = get_bits(data, 14, 9) # starts after POS_A + SIZE_A (14), with a size of 9 + elif instr.type == InstructionType.ABx: + instr.B = get_bits(data, 14, 18) # starts after POS_A + SIZE_A (14), with a size of 18 + elif instr.type == InstructionType.AsBx: + instr.B = get_bits(data, 14, 18) - 131071 # Bx is now signed, so just sub half of the MAX_UINT for 18 bits + + return instr + +# returns a u32 instruction +def _encode_instr(instr: Instruction) -> int: + data = 0 + + # encode instruction (basically, do the inverse of _decode_instr) + data = set_bits(data, instr.opcode, 0, 6) + data = set_bits(data, instr.A, 6, 8) + + if instr.type == InstructionType.ABC: + data = set_bits(data, instr.B, 23, 9) + data = set_bits(data, instr.C, 14, 9) + elif instr.type == InstructionType.ABx: + data = set_bits(data, instr.B, 14, 18) + elif instr.type == InstructionType.AsBx: + data = set_bits(data, instr.B + 131071, 14, 18) + + return data + class LuaUndump: def __init__(self): self.rootChunk: Chunk = None self.index = 0 - @staticmethod - def dis_chunk(chunk: Chunk): - chunk.print() - - def loadBlock(self, sz) -> bytearray: + def _loadBlock(self, sz) -> bytearray: if self.index + sz > len(self.bytecode): raise Exception("Malformed bytecode!") @@ -135,99 +313,71 @@ class LuaUndump: self.index = self.index + sz return temp - def get_byte(self) -> int: - return self.loadBlock(1)[0] + def _get_byte(self) -> int: + return self._loadBlock(1)[0] - def get_int32(self) -> int: - if (self.big_endian): - return int.from_bytes(self.loadBlock(4), byteorder='big', signed=False) - else: - return int.from_bytes(self.loadBlock(4), byteorder='little', signed=False) + def _get_uint32(self) -> int: + order = 'big' if self.big_endian else 'little' + return int.from_bytes(self._loadBlock(4), byteorder=order, signed=False) - def get_int(self) -> int: - if (self.big_endian): - return int.from_bytes(self.loadBlock(self.int_size), byteorder='big', signed=False) - else: - return int.from_bytes(self.loadBlock(self.int_size), byteorder='little', signed=False) + def _get_uint(self) -> int: + order = 'big' if self.big_endian else 'little' + return int.from_bytes(self._loadBlock(self.int_size), byteorder=order, signed=False) - def get_size_t(self) -> int: - if (self.big_endian): - return int.from_bytes(self.loadBlock(self.size_t), byteorder='big', signed=False) - else: - return int.from_bytes(self.loadBlock(self.size_t), byteorder='little', signed=False) + def _get_size_t(self) -> int: + order = 'big' if self.big_endian else 'little' + return int.from_bytes(self._loadBlock(self.size_t), byteorder=order, signed=False) - def get_double(self) -> int: - if self.big_endian: - return struct.unpack('>d', self.loadBlock(8))[0] - else: - return struct.unpack(' int: + order = '>d' if self.big_endian else ' str: - if (size == None): - size = self.get_size_t() - if (size == 0): - return "" + def _get_string(self) -> str: + size = self._get_size_t() + if (size == 0): + return "" - return "".join(chr(x) for x in self.loadBlock(size)) + # [:-1] to remove the NULL terminator + return ("".join(chr(x) for x in self._loadBlock(size)))[:-1] - def decode_chunk(self): + def decode_chunk(self) -> Chunk: chunk = Chunk() - chunk.name = self.get_string(None) - chunk.frst_line = self.get_int() - chunk.last_line = self.get_int() - - chunk.numUpvals = self.get_byte() - chunk.numParams = self.get_byte() - chunk.isVarg = (self.get_byte() != 0) - chunk.maxStack = self.get_byte() - - if (not chunk.name == ""): - chunk.name = chunk.name[1:-1] + # chunk meta info + chunk.name = self._get_string() + chunk.frst_line = self._get_uint() + chunk.last_line = self._get_uint() + chunk.numUpvals = self._get_byte() + chunk.numParams = self._get_byte() + chunk.isVarg = (self._get_byte() != 0) + chunk.maxStack = self._get_byte() # parse instructions - num = self.get_int() + num = self._get_uint() for i in range(num): - data = self.get_int32() - opcode = _get_bits(data, 0, 6) - template = instr_lookup_tbl[opcode] - instruction = Instruction(template.type, template.name) - - # i read the lopcodes.h file to get these bit position and sizes. - instruction.opcode = opcode - instruction.A = _get_bits(data, 6, 8) # starts after POS_OP + SIZE_OP (6), with a size of 8 - - if instruction.type == InstructionType.ABC: - instruction.B = _get_bits(data, 23, 9) # starts after POS_C + SIZE_C (23), with a size of 9 - instruction.C = _get_bits(data, 14, 9) # starts after POS_A + SIZE_A (14), with a size of 9 - elif instruction.type == InstructionType.ABx: - instruction.B = _get_bits(data, 14, 18) # starts after POS_A + SIZE_A (14), with a size of 18 - elif instruction.type == InstructionType.AsBx: - instruction.B = _get_bits(data, 14, 18) - 131071 # Bx is now signed, so just sub half of the MAX_UINT for 18 bits - - chunk.appendInstruction(instruction) + chunk.appendInstruction(_decode_instr(self._get_uint32())) # get constants - num = self.get_int() + num = self._get_uint() for i in range(num): constant: Constant = None - type = self.get_byte() + type = self._get_byte() - if type == 0: #nil + if type == 0: # nil constant = Constant(ConstType.NIL, None) elif type == 1: # bool - constant = Constant(ConstType.BOOL, (self.get_byte() != 0)) + constant = Constant(ConstType.BOOL, (self._get_byte() != 0)) elif type == 3: # number - constant = Constant(ConstType.NUMBER, self.get_double()) + constant = Constant(ConstType.NUMBER, self._get_double()) elif type == 4: # string - constant = Constant(ConstType.STRING, self.get_string(None)[:-1]) + constant = Constant(ConstType.STRING, self._get_string()) else: raise Exception("Unknown Datatype! [%d]" % type) chunk.appendConstant(constant) # parse protos - num = self.get_int() + num = self._get_uint() for i in range(num): chunk.appendProto(self.decode_chunk()) @@ -235,46 +385,47 @@ class LuaUndump: # eh, for now just consume the bytes. # line numbers - num = self.get_int() + num = self._get_uint() for i in range(num): - self.get_int() + self._get_uint() # locals - num = self.get_int() + num = self._get_uint() for i in range(num): - self.get_string(None)[:-1] # local name - self.get_int() # local start PC - self.get_int() # local end PC + name = self._get_string() # local name + start = self._get_uint() # local start PC + end = self._get_uint() # local end PC + chunk.appendLocal(Local(name, start, end)) # upvalues - num = self.get_int() + num = self._get_uint() for i in range(num): - self.get_string(None) # upvalue name + chunk.appendUpval(self._get_string()) # upvalue name return chunk - + def decode_rawbytecode(self, rawbytecode): # bytecode sanity checks - if not rawbytecode[0:4] == b'\x1bLua': + if not rawbytecode[0:4] == _LUAMAGIC: raise Exception("Lua Bytecode expected!") bytecode = array.array('b', rawbytecode) return self.decode_bytecode(bytecode) def decode_bytecode(self, bytecode): - self.bytecode = bytecode + self.bytecode = bytecode # aligns index, skips header self.index = 4 - - self.vm_version = self.get_byte() - self.bytecode_format = self.get_byte() - self.big_endian = (self.get_byte() == 0) - self.int_size = self.get_byte() - self.size_t = self.get_byte() - self.instr_size = self.get_byte() # gets size of instructions - self.l_number_size = self.get_byte() # size of lua_Number - self.integral_flag = self.get_byte() + + self.vm_version = self._get_byte() + self.bytecode_format = self._get_byte() + self.big_endian = (self._get_byte() == 0) + self.int_size = self._get_byte() + self.size_t = self._get_byte() + self.instr_size = self._get_byte() # gets size of instructions + self.l_number_size = self._get_byte() # size of lua_Number + self.integral_flag = self._get_byte() # is lua_Number defined as an int? false = float/double, true = int/long/short/etc. self.rootChunk = self.decode_chunk() return self.rootChunk @@ -285,5 +436,122 @@ class LuaUndump: return self.decode_rawbytecode(bytecode) def print_dissassembly(self): - LuaUndump.dis_chunk(self.rootChunk) + self.rootChunk.print() +class LuaDump: + def __init__(self, rootChunk: Chunk): + self.rootChunk = rootChunk + self.bytecode = bytearray() + + # header info + self.vm_version = 0x51 + self.bytecode_format = 0x00 + self.big_endian = False + + # data sizes + self.int_size = 4 + self.size_t = 8 + self.instr_size = 4 + self.l_number_size = 8 + self.integral_flag = False # lua_Number is a double + + def _writeBlock(self, data: bytes): + self.bytecode += bytearray(data) + + def _set_byte(self, b: int): + self.bytecode.append(b) + + def _set_uint32(self, i: int): + order = 'big' if self.big_endian else 'little' + self._writeBlock(i.to_bytes(4, order, signed=False)) + + def _set_uint(self, i: int): + order = 'big' if self.big_endian else 'little' + self._writeBlock(i.to_bytes(self.int_size, order, signed=False)) + + def _set_size_t(self, i: int): + order = 'big' if self.big_endian else 'little' + self._writeBlock(i.to_bytes(self.size_t, order, signed=False)) + + def _set_double(self, f: float): + order = '>d' if self.big_endian else ' bytearray: + self._dumpHeader() + self._dumpChunk(self.rootChunk) + + return self.bytecode \ No newline at end of file