LuaPytecode/luac.py
CPunch 0b94b64c89 Major refactoring
- Added enums
- Added structures to hold data instead of huge generic dictionaries
- Pretty sure my instruction register decoding was wrong, but it works properly now (checked against chunkspy)
- Leaves room for some better dissasembly annotations in the future
2021-12-08 15:53:40 -06:00

287 lines
9.8 KiB
Python

import struct
import array
from enum import IntEnum, Enum, auto
class InstructionType(Enum):
ABC = auto(),
ABx = auto(),
AsBx = auto()
class ConstType(IntEnum):
NIL = 0,
BOOL = 1,
NUMBER = 3,
STRING = 4,
class Instruction:
def __init__(self, type: InstructionType, name: str) -> None:
self.type = type
self.name = name
self.opcode: int = None
self.A: int = None
self.B: int = None
self.C: int = None
def toString(self):
instr = "%10s" % self.name
regs = ""
if self.type == InstructionType.ABC:
regs = "%d %d %d" % (self.A, self.B, self.C)
elif self.type == InstructionType.ABx or self.type == InstructionType.AsBx:
regs = "%d %d" % (self.A, self.B)
return "%s : %s" % (instr, regs)
class Constant:
def __init__(self, type: ConstType, data) -> None:
self.type = type
self.data = data
class Local:
def __init(self, name: str, start: int, end: int):
self.name = name
self.start = start
self.end = end
class Chunk:
def __init__(self) -> None:
self.constants: list[Constant] = []
self.instructions: list[Instruction] = []
self.protos: list[Chunk] = []
self.name: str = "Unnamed proto"
self.frst_line: int = 0
self.last_line: int = 0
self.numUpvals: int = 0
self.numParams: int = 0
self.isVarg: bool = False
self.maxStack: int = 0
self.upvalues: list[str] = []
self.locals: list[Local] = []
def appendInstruction(self, instr: Instruction):
self.instructions.append(instr)
def appendConstant(self, const: Constant):
self.constants.append(const)
def appendProto(self, proto):
self.protos.append(proto)
instr_lookup_tbl = [
Instruction(InstructionType.ABC, "MOVE"), Instruction(InstructionType.ABx, "LOADK"), Instruction(InstructionType.ABC, "LOADBOOL"),
Instruction(InstructionType.ABC, "LOADNIL"), Instruction(InstructionType.ABC, "GETUPVAL"), Instruction(InstructionType.ABx, "GETGLOBAL"),
Instruction(InstructionType.ABC, "GETTABLE"), Instruction(InstructionType.ABx, "SETGLOBAL"), Instruction(InstructionType.ABC, "SETUPVAL"),
Instruction(InstructionType.ABC, "SETTABLE"), Instruction(InstructionType.ABC, "NEWTABLE"), Instruction(InstructionType.ABC, "SELF"),
Instruction(InstructionType.ABC, "ADD"), Instruction(InstructionType.ABC, "SUB"), Instruction(InstructionType.ABC, "MUL"),
Instruction(InstructionType.ABC, "DIV"), Instruction(InstructionType.ABC, "MOD"), Instruction(InstructionType.ABC, "POW"),
Instruction(InstructionType.ABC, "UNM"), Instruction(InstructionType.ABC, "NOT"), Instruction(InstructionType.ABC, "LEN"),
Instruction(InstructionType.ABC, "CONCAT"), Instruction(InstructionType.AsBx, "JMP"), Instruction(InstructionType.ABC, "EQ"),
Instruction(InstructionType.ABC, "LT"), Instruction(InstructionType.ABC, "LE"), Instruction(InstructionType.ABC, "TEST"),
Instruction(InstructionType.ABC, "TESTSET"), Instruction(InstructionType.ABC, "CALL"), Instruction(InstructionType.ABC, "TAILCALL"),
Instruction(InstructionType.ABC, "RETURN"), Instruction(InstructionType.AsBx, "FORLOOP"), Instruction(InstructionType.AsBx, "FORPREP"),
Instruction(InstructionType.ABC, "TFORLOOP"), Instruction(InstructionType.ABC, "SETLIST"), Instruction(InstructionType.ABC, "CLOSE"),
Instruction(InstructionType.ABx, "CLOSURE"), Instruction(InstructionType.ABC, "VARARG")
]
# at [p]osition, with [s]ize of bits
def _get_bits(num, p, s):
# convert number into binary first
binary = bin(num)
# remove first two characters
binary = binary[2:]
# fill in missing bits
for i in range(32 - len(binary)):
binary = '0' + binary
start = len(binary) - (p+s)
end = len(binary) - p
# extract k bit sub-string
kBitSubStr = binary[start : end]
# convert extracted sub-string into decimal again
return (int(kBitSubStr,2))
class LuaUndump:
def __init__(self):
self.rootChunk: Chunk = None
self.index = 0
@staticmethod
def dis_chunk(chunk: Chunk):
print("\n==== [[" + str(chunk.name) + "'s constants]] ====\n")
for z in range(len(chunk.constants)):
i = chunk.constants[z]
print(str(z) + ": [" + i.type.name + "] " + str(i.data))
print("\n==== [[" + str(chunk.name) + "'s dissassembly]] ====\n")
for i in range(len(chunk.instructions)):
print("[%3d] %s" % (i, chunk.instructions[i].toString()))
print("\n==== [[" + str(chunk.name) + "'s protos]] ====\n")
for z in chunk.protos:
print("** decoding proto\n")
LuaUndump.dis_chunk(z)
def loadBlock(self, sz) -> bytearray:
temp = bytearray(self.bytecode[self.index:self.index+sz])
self.index = self.index + sz
return temp
def get_byte(self) -> int:
return self.loadBlock(1)[0]
def get_int32(self) -> int:
if (self.big_endian):
return int.from_bytes(self.loadBlock(4), byteorder='big', signed=False)
else:
return int.from_bytes(self.loadBlock(4), byteorder='little', signed=False)
def get_int(self) -> int:
if (self.big_endian):
return int.from_bytes(self.loadBlock(self.int_size), byteorder='big', signed=False)
else:
return int.from_bytes(self.loadBlock(self.int_size), byteorder='little', signed=False)
def get_size_t(self) -> int:
if (self.big_endian):
return int.from_bytes(self.loadBlock(self.size_t), byteorder='big', signed=False)
else:
return int.from_bytes(self.loadBlock(self.size_t), byteorder='little', signed=False)
def get_double(self) -> int:
if self.big_endian:
return struct.unpack('>d', self.loadBlock(8))[0]
else:
return struct.unpack('<d', self.loadBlock(8))[0]
def get_string(self, size) -> str:
if (size == None):
size = self.get_size_t()
if (size == 0):
return ""
return "".join(chr(x) for x in self.loadBlock(size))
def decode_chunk(self):
chunk = Chunk()
chunk.name = self.get_string(None)
chunk.frst_line = self.get_int()
chunk.last_line = self.get_int()
chunk.numUpvals = self.get_byte()
chunk.numParams = self.get_byte()
chunk.isVarg = (self.get_byte() != 0)
chunk.maxStack = self.get_byte()
if (not chunk.name == ""):
chunk.name = chunk.name[1:-1]
# parse instructions
num = self.get_int()
for i in range(num):
data = self.get_int32()
opcode = _get_bits(data, 0, 6)
template = instr_lookup_tbl[opcode]
instruction = Instruction(template.type, template.name)
instruction.opcode = opcode
instruction.A = _get_bits(data, 6, 8)
if instruction.type == InstructionType.ABC:
instruction.B = _get_bits(data, 23, 9)
instruction.C = _get_bits(data, 14, 9)
elif instruction.type == InstructionType.ABx:
instruction.B = _get_bits(data, 14, 18)
elif instruction.type == InstructionType.AsBx:
instruction.B = _get_bits(data, 14, 18) - 131071
chunk.appendInstruction(instruction)
# get constants
num = self.get_int()
for i in range(num):
constant: Constant = None
type = self.get_byte()
if type == 0: #nil
constant = Constant(ConstType.NIL, None)
elif type == 1: # bool
constant = Constant(ConstType.BOOL, (self.get_byte() != 0))
elif type == 3: # number
constant = Constant(ConstType.NUMBER, self.get_double())
elif type == 4: # string
constant = Constant(ConstType.STRING, self.get_string(None)[:-1])
else:
raise Exception("Unknown Datatype! [%d]" % type)
chunk.appendConstant(constant)
# parse protos
num = self.get_int()
for i in range(num):
chunk.appendProto(self.decode_chunk())
# debug stuff
# line numbers
num = self.get_int()
for i in range(num):
self.get_int32()
# locals
num = self.get_int()
for i in range(num):
#print(self.get_string(None)[:-1]) # local name
self.get_int32() # local start PC
self.get_int32() # local end PC
# upvalues
num = self.get_int()
for i in range(num):
self.get_string(None) # upvalue name
return chunk
def decode_rawbytecode(self, rawbytecode):
# bytecode sanity checks
if not rawbytecode[0:4] == b'\x1bLua':
raise Exception("Lua Bytecode expected!")
bytecode = array.array('b', rawbytecode)
return self.decode_bytecode(bytecode)
def decode_bytecode(self, bytecode):
self.bytecode = bytecode
# aligns index, skips header
self.index = 4
self.vm_version = self.get_byte()
self.bytecode_format = self.get_byte()
self.big_endian = (self.get_byte() == 0)
self.int_size = self.get_byte()
self.size_t = self.get_byte()
self.instr_size = self.get_byte() # gets size of instructions
self.l_number_size = self.get_byte() # size of lua_Number
self.integral_flag = self.get_byte()
self.rootChunk = self.decode_chunk()
return self.rootChunk
def loadFile(self, luaCFile):
with open(luaCFile, 'rb') as luac_file:
bytecode = luac_file.read()
return self.decode_rawbytecode(bytecode)
def print_dissassembly(self):
LuaUndump.dis_chunk(self.rootChunk)