Major refactoring

- Added enums
- Added structures to hold data instead of huge generic dictionaries
- Pretty sure my instruction register decoding was wrong, but it works properly now (checked against chunkspy)
- Leaves room for some better dissasembly annotations in the future
This commit is contained in:
CPunch 2021-12-08 15:53:40 -06:00
parent 74dfb5f241
commit 0b94b64c89

285
luac.py
View File

@ -1,34 +1,94 @@
import struct import struct
import array import array
from enum import IntEnum, Enum, auto
lua_opcode_types = [
"ABC", "ABx", "ABC", "ABC", class InstructionType(Enum):
"ABC", "ABx", "ABC", "ABx", ABC = auto(),
"ABC", "ABC", "ABC", "ABC", ABx = auto(),
"ABC", "ABC", "ABC", "ABC", AsBx = auto()
"ABC", "ABC", "ABC", "ABC",
"ABC", "ABC", "AsBx", "ABC", class ConstType(IntEnum):
"ABC", "ABC", "ABC", "ABC", NIL = 0,
"ABC", "ABC", "ABC", "AsBx", BOOL = 1,
"AsBx", "ABC", "ABC", "ABC", NUMBER = 3,
"ABx", "ABC" STRING = 4,
class Instruction:
def __init__(self, type: InstructionType, name: str) -> None:
self.type = type
self.name = name
self.opcode: int = None
self.A: int = None
self.B: int = None
self.C: int = None
def toString(self):
instr = "%10s" % self.name
regs = ""
if self.type == InstructionType.ABC:
regs = "%d %d %d" % (self.A, self.B, self.C)
elif self.type == InstructionType.ABx or self.type == InstructionType.AsBx:
regs = "%d %d" % (self.A, self.B)
return "%s : %s" % (instr, regs)
class Constant:
def __init__(self, type: ConstType, data) -> None:
self.type = type
self.data = data
class Local:
def __init(self, name: str, start: int, end: int):
self.name = name
self.start = start
self.end = end
class Chunk:
def __init__(self) -> None:
self.constants: list[Constant] = []
self.instructions: list[Instruction] = []
self.protos: list[Chunk] = []
self.name: str = "Unnamed proto"
self.frst_line: int = 0
self.last_line: int = 0
self.numUpvals: int = 0
self.numParams: int = 0
self.isVarg: bool = False
self.maxStack: int = 0
self.upvalues: list[str] = []
self.locals: list[Local] = []
def appendInstruction(self, instr: Instruction):
self.instructions.append(instr)
def appendConstant(self, const: Constant):
self.constants.append(const)
def appendProto(self, proto):
self.protos.append(proto)
instr_lookup_tbl = [
Instruction(InstructionType.ABC, "MOVE"), Instruction(InstructionType.ABx, "LOADK"), Instruction(InstructionType.ABC, "LOADBOOL"),
Instruction(InstructionType.ABC, "LOADNIL"), Instruction(InstructionType.ABC, "GETUPVAL"), Instruction(InstructionType.ABx, "GETGLOBAL"),
Instruction(InstructionType.ABC, "GETTABLE"), Instruction(InstructionType.ABx, "SETGLOBAL"), Instruction(InstructionType.ABC, "SETUPVAL"),
Instruction(InstructionType.ABC, "SETTABLE"), Instruction(InstructionType.ABC, "NEWTABLE"), Instruction(InstructionType.ABC, "SELF"),
Instruction(InstructionType.ABC, "ADD"), Instruction(InstructionType.ABC, "SUB"), Instruction(InstructionType.ABC, "MUL"),
Instruction(InstructionType.ABC, "DIV"), Instruction(InstructionType.ABC, "MOD"), Instruction(InstructionType.ABC, "POW"),
Instruction(InstructionType.ABC, "UNM"), Instruction(InstructionType.ABC, "NOT"), Instruction(InstructionType.ABC, "LEN"),
Instruction(InstructionType.ABC, "CONCAT"), Instruction(InstructionType.AsBx, "JMP"), Instruction(InstructionType.ABC, "EQ"),
Instruction(InstructionType.ABC, "LT"), Instruction(InstructionType.ABC, "LE"), Instruction(InstructionType.ABC, "TEST"),
Instruction(InstructionType.ABC, "TESTSET"), Instruction(InstructionType.ABC, "CALL"), Instruction(InstructionType.ABC, "TAILCALL"),
Instruction(InstructionType.ABC, "RETURN"), Instruction(InstructionType.AsBx, "FORLOOP"), Instruction(InstructionType.AsBx, "FORPREP"),
Instruction(InstructionType.ABC, "TFORLOOP"), Instruction(InstructionType.ABC, "SETLIST"), Instruction(InstructionType.ABC, "CLOSE"),
Instruction(InstructionType.ABx, "CLOSURE"), Instruction(InstructionType.ABC, "VARARG")
] ]
lua_opcode_names = [ # at [p]osition, with [s]ize of bits
"MOVE", "LOADK", "LOADBOOL", "LOADNIL", def _get_bits(num, p, s):
"GETUPVAL", "GETGLOBAL", "GETTABLE", "SETGLOBAL",
"SETUPVAL", "SETTABLE", "NEWTABLE", "SELF",
"ADD", "SUB", "MUL", "DIV",
"MOD", "POW", "UNM", "NOT",
"LEN", "CONCAT", "JMP", "EQ",
"LT", "LE", "TEST", "TESTSET",
"CALL", "TAILCALL", "RETURN", "FORLOOP",
"FORPREP", "TFORLOOP", "SETLIST", "CLOSE",
"CLOSURE", "VARARG"
]
# at [p]osition to k
def get_bits(num, p, k):
# convert number into binary first # convert number into binary first
binary = bin(num) binary = bin(num)
@ -39,8 +99,8 @@ def get_bits(num, p, k):
for i in range(32 - len(binary)): for i in range(32 - len(binary)):
binary = '0' + binary binary = '0' + binary
start = len(binary) - (p+s)
end = len(binary) - p end = len(binary) - p
start = len(binary) - k
# extract k bit sub-string # extract k bit sub-string
kBitSubStr = binary[start : end] kBitSubStr = binary[start : end]
@ -50,158 +110,126 @@ def get_bits(num, p, k):
class LuaUndump: class LuaUndump:
def __init__(self): def __init__(self):
self.chunks = [] self.rootChunk: Chunk = None
self.chunk = {}
self.index = 0 self.index = 0
@staticmethod @staticmethod
def dis_chunk(chunk): def dis_chunk(chunk: Chunk):
print("==== [[" + str(chunk['NAME']) + "]] ====\n") print("\n==== [[" + str(chunk.name) + "'s constants]] ====\n")
for z in chunk['PROTOTYPES']: for z in range(len(chunk.constants)):
i = chunk.constants[z]
print(str(z) + ": [" + i.type.name + "] " + str(i.data))
print("\n==== [[" + str(chunk.name) + "'s dissassembly]] ====\n")
for i in range(len(chunk.instructions)):
print("[%3d] %s" % (i, chunk.instructions[i].toString()))
print("\n==== [[" + str(chunk.name) + "'s protos]] ====\n")
for z in chunk.protos:
print("** decoding proto\n") print("** decoding proto\n")
LuaUndump.dis_chunk(chunk['PROTOTYPES'][z]) LuaUndump.dis_chunk(z)
print("\n==== [[" + str(chunk['NAME']) + "'s constants]] ====\n") def loadBlock(self, sz) -> bytearray:
for z in chunk['CONSTANTS']:
i = chunk['CONSTANTS'][z]
print(str(z) + ": " + str(i['DATA']))
print("\n==== [[" + str(chunk['NAME']) + "'s dissassembly]] ====\n")
for z in chunk['INSTRUCTIONS']:
i = chunk['INSTRUCTIONS'][z]
if (i['TYPE'] == "ABC"):
print(lua_opcode_names[i['OPCODE']], i['A'], i['B'], i['C'])
elif (i['TYPE'] == "ABx"):
if (i['OPCODE'] == 1 or i['OPCODE'] == 5):
print(lua_opcode_names[i['OPCODE']], i['A'], -i['Bx']-1, chunk['CONSTANTS'][i['Bx']]['DATA'])
else:
print(lua_opcode_names[i['OPCODE']], i['A'], -i['Bx']-1)
elif (i['TYPE'] == "AsBx"):
print("AsBx", lua_opcode_names[i['OPCODE']], i['A'], i['sBx'])
def loadBlock(self, sz):
temp = bytearray(self.bytecode[self.index:self.index+sz]) temp = bytearray(self.bytecode[self.index:self.index+sz])
self.index = self.index + sz self.index = self.index + sz
return temp return temp
def get_byte(self): def get_byte(self) -> int:
return self.loadBlock(1)[0] return self.loadBlock(1)[0]
def get_int32(self): def get_int32(self) -> int:
if (self.big_endian): if (self.big_endian):
return int.from_bytes(self.loadBlock(4), byteorder='big', signed=False) return int.from_bytes(self.loadBlock(4), byteorder='big', signed=False)
else: else:
return int.from_bytes(self.loadBlock(4), byteorder='little', signed=False) return int.from_bytes(self.loadBlock(4), byteorder='little', signed=False)
def get_int(self): def get_int(self) -> int:
if (self.big_endian): if (self.big_endian):
return int.from_bytes(self.loadBlock(self.int_size), byteorder='big', signed=False) return int.from_bytes(self.loadBlock(self.int_size), byteorder='big', signed=False)
else: else:
return int.from_bytes(self.loadBlock(self.int_size), byteorder='little', signed=False) return int.from_bytes(self.loadBlock(self.int_size), byteorder='little', signed=False)
def get_size_t(self): def get_size_t(self) -> int:
if (self.big_endian): if (self.big_endian):
return int.from_bytes(self.loadBlock(self.size_t), byteorder='big', signed=False) return int.from_bytes(self.loadBlock(self.size_t), byteorder='big', signed=False)
else: else:
return int.from_bytes(self.loadBlock(self.size_t), byteorder='little', signed=False) return int.from_bytes(self.loadBlock(self.size_t), byteorder='little', signed=False)
def get_double(self): def get_double(self) -> int:
if self.big_endian: if self.big_endian:
return struct.unpack('>d', self.loadBlock(8))[0] return struct.unpack('>d', self.loadBlock(8))[0]
else: else:
return struct.unpack('<d', self.loadBlock(8))[0] return struct.unpack('<d', self.loadBlock(8))[0]
def get_string(self, size): def get_string(self, size) -> str:
if (size == None): if (size == None):
size = self.get_size_t() size = self.get_size_t()
if (size == 0): if (size == 0):
return None return ""
return "".join(chr(x) for x in self.loadBlock(size)) return "".join(chr(x) for x in self.loadBlock(size))
def decode_chunk(self): def decode_chunk(self):
chunk = { chunk = Chunk()
'INSTRUCTIONS': {},
'CONSTANTS': {},
'PROTOTYPES': {}
}
chunk['NAME'] = self.get_string(None) chunk.name = self.get_string(None)
chunk['FIRST_LINE'] = self.get_int() chunk.frst_line = self.get_int()
chunk['LAST_LINE'] = self.get_int() chunk.last_line = self.get_int()
chunk['UPVALUES'] = self.get_byte() chunk.numUpvals = self.get_byte()
chunk['ARGUMENTS'] = self.get_byte() chunk.numParams = self.get_byte()
chunk['VARG'] = self.get_byte() chunk.isVarg = (self.get_byte() != 0)
chunk['STACK'] = self.get_byte() chunk.maxStack = self.get_byte()
if (not chunk['NAME'] == None): if (not chunk.name == ""):
chunk['NAME'] = chunk['NAME'][1:-1] chunk.name = chunk.name[1:-1]
# parse instructions # parse instructions
print("** DECODING INSTRUCTIONS")
num = self.get_int() num = self.get_int()
for i in range(num): for i in range(num):
instruction = {
# opcode = opcode number;
# type = [ABC, ABx, AsBx]
# A, B, C, Bx, or sBx depending on type
}
data = self.get_int32() data = self.get_int32()
opcode = get_bits(data, 0, 6) opcode = _get_bits(data, 0, 6)
tp = lua_opcode_types[opcode] template = instr_lookup_tbl[opcode]
instruction = Instruction(template.type, template.name)
instruction['OPCODE'] = opcode instruction.opcode = opcode
instruction['TYPE'] = tp instruction.A = _get_bits(data, 6, 8)
instruction['A'] = get_bits(data, 7, 14)
if instruction['TYPE'] == "ABC": if instruction.type == InstructionType.ABC:
instruction['B'] = get_bits(data, 24, 31) instruction.B = _get_bits(data, 23, 9)
instruction['C'] = get_bits(data, 15, 23) instruction.C = _get_bits(data, 14, 9)
elif instruction['TYPE'] == "ABx": elif instruction.type == InstructionType.ABx:
instruction['Bx'] = get_bits(data, 15, 31) instruction.B = _get_bits(data, 14, 18)
elif instruction['TYPE'] == "AsBx": elif instruction.type == InstructionType.AsBx:
instruction['sBx'] = get_bits(data, 15, 31) - 131071 instruction.B = _get_bits(data, 14, 18) - 131071
chunk['INSTRUCTIONS'][i] = instruction chunk.appendInstruction(instruction)
print(lua_opcode_names[opcode], instruction)
# get constants # get constants
print("** DECODING CONSTANTS")
num = self.get_int() num = self.get_int()
for i in range(num): for i in range(num):
constant = { constant: Constant = None
# type = constant type; type = self.get_byte()
# data = constant data;
}
constant['TYPE'] = self.get_byte()
if constant['TYPE'] == 1: if type == 0: #nil
constant['DATA'] = (self.get_byte() != 0) constant = Constant(ConstType.NIL, None)
elif constant['TYPE'] == 3: elif type == 1: # bool
constant['DATA'] = self.get_double() constant = Constant(ConstType.BOOL, (self.get_byte() != 0))
elif constant['TYPE'] == 4: elif type == 3: # number
constant['DATA'] = self.get_string(None)[:-1] constant = Constant(ConstType.NUMBER, self.get_double())
elif type == 4: # string
constant = Constant(ConstType.STRING, self.get_string(None)[:-1])
else:
raise Exception("Unknown Datatype! [%d]" % type)
print(constant) chunk.appendConstant(constant)
chunk['CONSTANTS'][i] = constant
# parse protos # parse protos
print("** DECODING PROTOS")
num = self.get_int() num = self.get_int()
for i in range(num): for i in range(num):
chunk['PROTOTYPES'][i] = self.decode_chunk() chunk.appendProto(self.decode_chunk())
# debug stuff # debug stuff
print("** DECODING DEBUG SYMBOLS")
# line numbers # line numbers
num = self.get_int() num = self.get_int()
@ -211,7 +239,7 @@ class LuaUndump:
# locals # locals
num = self.get_int() num = self.get_int()
for i in range(num): for i in range(num):
print(self.get_string(None)[:-1]) # local name #print(self.get_string(None)[:-1]) # local name
self.get_int32() # local start PC self.get_int32() # local start PC
self.get_int32() # local end PC self.get_int32() # local end PC
@ -220,15 +248,12 @@ class LuaUndump:
for i in range(num): for i in range(num):
self.get_string(None) # upvalue name self.get_string(None) # upvalue name
self.chunks.append(chunk)
return chunk return chunk
def decode_rawbytecode(self, rawbytecode): def decode_rawbytecode(self, rawbytecode):
# bytecode sanity checks # bytecode sanity checks
if not rawbytecode[0:4] == b'\x1bLua': if not rawbytecode[0:4] == b'\x1bLua':
print("Lua Bytecode expected!") raise Exception("Lua Bytecode expected!")
exit(0)
bytecode = array.array('b', rawbytecode) bytecode = array.array('b', rawbytecode)
return self.decode_bytecode(bytecode) return self.decode_bytecode(bytecode)
@ -236,7 +261,7 @@ class LuaUndump:
def decode_bytecode(self, bytecode): def decode_bytecode(self, bytecode):
self.bytecode = bytecode self.bytecode = bytecode
# alligns index lol # aligns index, skips header
self.index = 4 self.index = 4
self.vm_version = self.get_byte() self.vm_version = self.get_byte()
@ -248,14 +273,8 @@ class LuaUndump:
self.l_number_size = self.get_byte() # size of lua_Number self.l_number_size = self.get_byte() # size of lua_Number
self.integral_flag = self.get_byte() self.integral_flag = self.get_byte()
print("Lua VM version: ", hex(self.vm_version)) self.rootChunk = self.decode_chunk()
print("Big Endian: ", self.big_endian) return self.rootChunk
print("int_size: ", self.int_size)
print("size_t: ", self.size_t)
#print(self.bytecode)
self.chunk = self.decode_chunk()
return self.chunk
def loadFile(self, luaCFile): def loadFile(self, luaCFile):
with open(luaCFile, 'rb') as luac_file: with open(luaCFile, 'rb') as luac_file:
@ -263,5 +282,5 @@ class LuaUndump:
return self.decode_rawbytecode(bytecode) return self.decode_rawbytecode(bytecode)
def print_dissassembly(self): def print_dissassembly(self):
LuaUndump.dis_chunk(self.chunk) LuaUndump.dis_chunk(self.rootChunk)