From 0b94b64c89c4e93bf017ba975e8c0aadd7b7eb44 Mon Sep 17 00:00:00 2001 From: CPunch Date: Wed, 8 Dec 2021 15:53:40 -0600 Subject: [PATCH] Major refactoring - Added enums - Added structures to hold data instead of huge generic dictionaries - Pretty sure my instruction register decoding was wrong, but it works properly now (checked against chunkspy) - Leaves room for some better dissasembly annotations in the future --- luac.py | 293 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 156 insertions(+), 137 deletions(-) diff --git a/luac.py b/luac.py index a9b89da..b429352 100644 --- a/luac.py +++ b/luac.py @@ -1,34 +1,94 @@ import struct import array +from enum import IntEnum, Enum, auto -lua_opcode_types = [ - "ABC", "ABx", "ABC", "ABC", - "ABC", "ABx", "ABC", "ABx", - "ABC", "ABC", "ABC", "ABC", - "ABC", "ABC", "ABC", "ABC", - "ABC", "ABC", "ABC", "ABC", - "ABC", "ABC", "AsBx", "ABC", - "ABC", "ABC", "ABC", "ABC", - "ABC", "ABC", "ABC", "AsBx", - "AsBx", "ABC", "ABC", "ABC", - "ABx", "ABC" + +class InstructionType(Enum): + ABC = auto(), + ABx = auto(), + AsBx = auto() + +class ConstType(IntEnum): + NIL = 0, + BOOL = 1, + NUMBER = 3, + STRING = 4, + +class Instruction: + def __init__(self, type: InstructionType, name: str) -> None: + self.type = type + self.name = name + self.opcode: int = None + self.A: int = None + self.B: int = None + self.C: int = None + + def toString(self): + instr = "%10s" % self.name + regs = "" + + if self.type == InstructionType.ABC: + regs = "%d %d %d" % (self.A, self.B, self.C) + elif self.type == InstructionType.ABx or self.type == InstructionType.AsBx: + regs = "%d %d" % (self.A, self.B) + + return "%s : %s" % (instr, regs) + +class Constant: + def __init__(self, type: ConstType, data) -> None: + self.type = type + self.data = data + +class Local: + def __init(self, name: str, start: int, end: int): + self.name = name + self.start = start + self.end = end + +class Chunk: + def __init__(self) -> None: + self.constants: list[Constant] = [] + self.instructions: list[Instruction] = [] + self.protos: list[Chunk] = [] + + self.name: str = "Unnamed proto" + self.frst_line: int = 0 + self.last_line: int = 0 + self.numUpvals: int = 0 + self.numParams: int = 0 + self.isVarg: bool = False + self.maxStack: int = 0 + + self.upvalues: list[str] = [] + self.locals: list[Local] = [] + + def appendInstruction(self, instr: Instruction): + self.instructions.append(instr) + + def appendConstant(self, const: Constant): + self.constants.append(const) + + def appendProto(self, proto): + self.protos.append(proto) + +instr_lookup_tbl = [ + Instruction(InstructionType.ABC, "MOVE"), Instruction(InstructionType.ABx, "LOADK"), Instruction(InstructionType.ABC, "LOADBOOL"), + Instruction(InstructionType.ABC, "LOADNIL"), Instruction(InstructionType.ABC, "GETUPVAL"), Instruction(InstructionType.ABx, "GETGLOBAL"), + Instruction(InstructionType.ABC, "GETTABLE"), Instruction(InstructionType.ABx, "SETGLOBAL"), Instruction(InstructionType.ABC, "SETUPVAL"), + Instruction(InstructionType.ABC, "SETTABLE"), Instruction(InstructionType.ABC, "NEWTABLE"), Instruction(InstructionType.ABC, "SELF"), + Instruction(InstructionType.ABC, "ADD"), Instruction(InstructionType.ABC, "SUB"), Instruction(InstructionType.ABC, "MUL"), + Instruction(InstructionType.ABC, "DIV"), Instruction(InstructionType.ABC, "MOD"), Instruction(InstructionType.ABC, "POW"), + Instruction(InstructionType.ABC, "UNM"), Instruction(InstructionType.ABC, "NOT"), Instruction(InstructionType.ABC, "LEN"), + Instruction(InstructionType.ABC, "CONCAT"), Instruction(InstructionType.AsBx, "JMP"), Instruction(InstructionType.ABC, "EQ"), + Instruction(InstructionType.ABC, "LT"), Instruction(InstructionType.ABC, "LE"), Instruction(InstructionType.ABC, "TEST"), + Instruction(InstructionType.ABC, "TESTSET"), Instruction(InstructionType.ABC, "CALL"), Instruction(InstructionType.ABC, "TAILCALL"), + Instruction(InstructionType.ABC, "RETURN"), Instruction(InstructionType.AsBx, "FORLOOP"), Instruction(InstructionType.AsBx, "FORPREP"), + Instruction(InstructionType.ABC, "TFORLOOP"), Instruction(InstructionType.ABC, "SETLIST"), Instruction(InstructionType.ABC, "CLOSE"), + Instruction(InstructionType.ABx, "CLOSURE"), Instruction(InstructionType.ABC, "VARARG") ] -lua_opcode_names = [ - "MOVE", "LOADK", "LOADBOOL", "LOADNIL", - "GETUPVAL", "GETGLOBAL", "GETTABLE", "SETGLOBAL", - "SETUPVAL", "SETTABLE", "NEWTABLE", "SELF", - "ADD", "SUB", "MUL", "DIV", - "MOD", "POW", "UNM", "NOT", - "LEN", "CONCAT", "JMP", "EQ", - "LT", "LE", "TEST", "TESTSET", - "CALL", "TAILCALL", "RETURN", "FORLOOP", - "FORPREP", "TFORLOOP", "SETLIST", "CLOSE", - "CLOSURE", "VARARG" -] - -# at [p]osition to k -def get_bits(num, p, k): +# at [p]osition, with [s]ize of bits +def _get_bits(num, p, s): # convert number into binary first binary = bin(num) @@ -39,169 +99,137 @@ def get_bits(num, p, k): for i in range(32 - len(binary)): binary = '0' + binary + start = len(binary) - (p+s) end = len(binary) - p - start = len(binary) - k # extract k bit sub-string - kBitSubStr = binary[start : end] + kBitSubStr = binary[start : end] # convert extracted sub-string into decimal again - return (int(kBitSubStr,2)) + return (int(kBitSubStr,2)) class LuaUndump: def __init__(self): - self.chunks = [] - self.chunk = {} + self.rootChunk: Chunk = None self.index = 0 @staticmethod - def dis_chunk(chunk): - print("==== [[" + str(chunk['NAME']) + "]] ====\n") - for z in chunk['PROTOTYPES']: + def dis_chunk(chunk: Chunk): + print("\n==== [[" + str(chunk.name) + "'s constants]] ====\n") + for z in range(len(chunk.constants)): + i = chunk.constants[z] + print(str(z) + ": [" + i.type.name + "] " + str(i.data)) + + print("\n==== [[" + str(chunk.name) + "'s dissassembly]] ====\n") + for i in range(len(chunk.instructions)): + print("[%3d] %s" % (i, chunk.instructions[i].toString())) + + print("\n==== [[" + str(chunk.name) + "'s protos]] ====\n") + for z in chunk.protos: print("** decoding proto\n") - LuaUndump.dis_chunk(chunk['PROTOTYPES'][z]) - - print("\n==== [[" + str(chunk['NAME']) + "'s constants]] ====\n") - for z in chunk['CONSTANTS']: - i = chunk['CONSTANTS'][z] - print(str(z) + ": " + str(i['DATA'])) - - print("\n==== [[" + str(chunk['NAME']) + "'s dissassembly]] ====\n") - - for z in chunk['INSTRUCTIONS']: - i = chunk['INSTRUCTIONS'][z] - if (i['TYPE'] == "ABC"): - print(lua_opcode_names[i['OPCODE']], i['A'], i['B'], i['C']) - elif (i['TYPE'] == "ABx"): - if (i['OPCODE'] == 1 or i['OPCODE'] == 5): - print(lua_opcode_names[i['OPCODE']], i['A'], -i['Bx']-1, chunk['CONSTANTS'][i['Bx']]['DATA']) - else: - print(lua_opcode_names[i['OPCODE']], i['A'], -i['Bx']-1) - elif (i['TYPE'] == "AsBx"): - print("AsBx", lua_opcode_names[i['OPCODE']], i['A'], i['sBx']) + LuaUndump.dis_chunk(z) - def loadBlock(self, sz): + def loadBlock(self, sz) -> bytearray: temp = bytearray(self.bytecode[self.index:self.index+sz]) self.index = self.index + sz return temp - def get_byte(self): + def get_byte(self) -> int: return self.loadBlock(1)[0] - def get_int32(self): + def get_int32(self) -> int: if (self.big_endian): return int.from_bytes(self.loadBlock(4), byteorder='big', signed=False) else: return int.from_bytes(self.loadBlock(4), byteorder='little', signed=False) - def get_int(self): + def get_int(self) -> int: if (self.big_endian): return int.from_bytes(self.loadBlock(self.int_size), byteorder='big', signed=False) else: return int.from_bytes(self.loadBlock(self.int_size), byteorder='little', signed=False) - def get_size_t(self): + def get_size_t(self) -> int: if (self.big_endian): return int.from_bytes(self.loadBlock(self.size_t), byteorder='big', signed=False) else: return int.from_bytes(self.loadBlock(self.size_t), byteorder='little', signed=False) - def get_double(self): + def get_double(self) -> int: if self.big_endian: return struct.unpack('>d', self.loadBlock(8))[0] else: return struct.unpack(' str: if (size == None): size = self.get_size_t() if (size == 0): - return None + return "" return "".join(chr(x) for x in self.loadBlock(size)) def decode_chunk(self): - chunk = { - 'INSTRUCTIONS': {}, - 'CONSTANTS': {}, - 'PROTOTYPES': {} - } + chunk = Chunk() - chunk['NAME'] = self.get_string(None) - chunk['FIRST_LINE'] = self.get_int() - chunk['LAST_LINE'] = self.get_int() + chunk.name = self.get_string(None) + chunk.frst_line = self.get_int() + chunk.last_line = self.get_int() - chunk['UPVALUES'] = self.get_byte() - chunk['ARGUMENTS'] = self.get_byte() - chunk['VARG'] = self.get_byte() - chunk['STACK'] = self.get_byte() + chunk.numUpvals = self.get_byte() + chunk.numParams = self.get_byte() + chunk.isVarg = (self.get_byte() != 0) + chunk.maxStack = self.get_byte() - if (not chunk['NAME'] == None): - chunk['NAME'] = chunk['NAME'][1:-1] + if (not chunk.name == ""): + chunk.name = chunk.name[1:-1] # parse instructions - print("** DECODING INSTRUCTIONS") - num = self.get_int() for i in range(num): - instruction = { - # opcode = opcode number; - # type = [ABC, ABx, AsBx] - # A, B, C, Bx, or sBx depending on type - } - data = self.get_int32() - opcode = get_bits(data, 0, 6) - tp = lua_opcode_types[opcode] + opcode = _get_bits(data, 0, 6) + template = instr_lookup_tbl[opcode] + instruction = Instruction(template.type, template.name) - instruction['OPCODE'] = opcode - instruction['TYPE'] = tp - instruction['A'] = get_bits(data, 7, 14) + instruction.opcode = opcode + instruction.A = _get_bits(data, 6, 8) - if instruction['TYPE'] == "ABC": - instruction['B'] = get_bits(data, 24, 31) - instruction['C'] = get_bits(data, 15, 23) - elif instruction['TYPE'] == "ABx": - instruction['Bx'] = get_bits(data, 15, 31) - elif instruction['TYPE'] == "AsBx": - instruction['sBx'] = get_bits(data, 15, 31) - 131071 + if instruction.type == InstructionType.ABC: + instruction.B = _get_bits(data, 23, 9) + instruction.C = _get_bits(data, 14, 9) + elif instruction.type == InstructionType.ABx: + instruction.B = _get_bits(data, 14, 18) + elif instruction.type == InstructionType.AsBx: + instruction.B = _get_bits(data, 14, 18) - 131071 - chunk['INSTRUCTIONS'][i] = instruction - - print(lua_opcode_names[opcode], instruction) + chunk.appendInstruction(instruction) # get constants - print("** DECODING CONSTANTS") - num = self.get_int() for i in range(num): - constant = { - # type = constant type; - # data = constant data; - } - constant['TYPE'] = self.get_byte() + constant: Constant = None + type = self.get_byte() - if constant['TYPE'] == 1: - constant['DATA'] = (self.get_byte() != 0) - elif constant['TYPE'] == 3: - constant['DATA'] = self.get_double() - elif constant['TYPE'] == 4: - constant['DATA'] = self.get_string(None)[:-1] + if type == 0: #nil + constant = Constant(ConstType.NIL, None) + elif type == 1: # bool + constant = Constant(ConstType.BOOL, (self.get_byte() != 0)) + elif type == 3: # number + constant = Constant(ConstType.NUMBER, self.get_double()) + elif type == 4: # string + constant = Constant(ConstType.STRING, self.get_string(None)[:-1]) + else: + raise Exception("Unknown Datatype! [%d]" % type) - print(constant) - - chunk['CONSTANTS'][i] = constant + chunk.appendConstant(constant) # parse protos - - print("** DECODING PROTOS") - num = self.get_int() for i in range(num): - chunk['PROTOTYPES'][i] = self.decode_chunk() + chunk.appendProto(self.decode_chunk()) # debug stuff - print("** DECODING DEBUG SYMBOLS") # line numbers num = self.get_int() @@ -211,7 +239,7 @@ class LuaUndump: # locals num = self.get_int() for i in range(num): - print(self.get_string(None)[:-1]) # local name + #print(self.get_string(None)[:-1]) # local name self.get_int32() # local start PC self.get_int32() # local end PC @@ -220,23 +248,20 @@ class LuaUndump: for i in range(num): self.get_string(None) # upvalue name - self.chunks.append(chunk) - return chunk def decode_rawbytecode(self, rawbytecode): # bytecode sanity checks if not rawbytecode[0:4] == b'\x1bLua': - print("Lua Bytecode expected!") - exit(0) - - bytecode = array.array('b', rawbytecode) + raise Exception("Lua Bytecode expected!") + + bytecode = array.array('b', rawbytecode) return self.decode_bytecode(bytecode) def decode_bytecode(self, bytecode): self.bytecode = bytecode - # alligns index lol + # aligns index, skips header self.index = 4 self.vm_version = self.get_byte() @@ -248,14 +273,8 @@ class LuaUndump: self.l_number_size = self.get_byte() # size of lua_Number self.integral_flag = self.get_byte() - print("Lua VM version: ", hex(self.vm_version)) - print("Big Endian: ", self.big_endian) - print("int_size: ", self.int_size) - print("size_t: ", self.size_t) - - #print(self.bytecode) - self.chunk = self.decode_chunk() - return self.chunk + self.rootChunk = self.decode_chunk() + return self.rootChunk def loadFile(self, luaCFile): with open(luaCFile, 'rb') as luac_file: @@ -263,5 +282,5 @@ class LuaUndump: return self.decode_rawbytecode(bytecode) def print_dissassembly(self): - LuaUndump.dis_chunk(self.chunk) + LuaUndump.dis_chunk(self.rootChunk)