LuaPytecode/luac.py

307 lines
11 KiB
Python

'''
Luac.py
A Lua5.1 cross-platform bytecode deserializer. This module pulls int and size_t sizes from the
chunk header, meaning it should be able to deserialize lua bytecode dumps from most platforms,
regardless of the host machine.
For details on the Lua5.1 bytecode format, I read [this PDF](https://archive.org/download/a-no-frills-intro-to-lua-5.1-vm-instructions/a-no-frills-intro-to-lua-5.1-vm-instructions_archive.torrent)
as well as read the lundump.c source file from the Lua5.1 source.
'''
import struct
import array
from enum import IntEnum, Enum, auto
class InstructionType(Enum):
ABC = auto(),
ABx = auto(),
AsBx = auto()
class ConstType(IntEnum):
NIL = 0,
BOOL = 1,
NUMBER = 3,
STRING = 4,
class Instruction:
def __init__(self, type: InstructionType, name: str) -> None:
self.type = type
self.name = name
self.opcode: int = None
self.A: int = None
self.B: int = None
self.C: int = None
def toString(self):
instr = "%10s" % self.name
regs = ""
if self.type == InstructionType.ABC:
regs = "%d %d %d" % (self.A, self.B, self.C)
elif self.type == InstructionType.ABx or self.type == InstructionType.AsBx:
regs = "%d %d" % (self.A, self.B)
return "%s : %s" % (instr, regs)
class Constant:
def __init__(self, type: ConstType, data) -> None:
self.type = type
self.data = data
def toString(self):
return "[" + self.type.name + "] " + str(self.data)
class Local:
def __init(self, name: str, start: int, end: int):
self.name = name
self.start = start
self.end = end
class Chunk:
def __init__(self) -> None:
self.constants: list[Constant] = []
self.instructions: list[Instruction] = []
self.protos: list[Chunk] = []
self.name: str = "Unnamed proto"
self.frst_line: int = 0
self.last_line: int = 0
self.numUpvals: int = 0
self.numParams: int = 0
self.isVarg: bool = False
self.maxStack: int = 0
self.upvalues: list[str] = []
self.locals: list[Local] = []
def appendInstruction(self, instr: Instruction):
self.instructions.append(instr)
def appendConstant(self, const: Constant):
self.constants.append(const)
def appendProto(self, proto):
self.protos.append(proto)
def print(self):
print("\n==== [[" + str(self.name) + "'s constants]] ====\n")
for z in range(len(self.constants)):
i = self.constants[z]
print(str(z) + ": " + i.toString())
print("\n==== [[" + str(self.name) + "'s dissassembly]] ====\n")
for i in range(len(self.instructions)):
print("[%3d] %s" % (i, self.instructions[i].toString()))
print("\n==== [[" + str(self.name) + "'s protos]] ====\n")
for z in self.protos:
z.print()
instr_lookup_tbl = [
Instruction(InstructionType.ABC, "MOVE"), Instruction(InstructionType.ABx, "LOADK"), Instruction(InstructionType.ABC, "LOADBOOL"),
Instruction(InstructionType.ABC, "LOADNIL"), Instruction(InstructionType.ABC, "GETUPVAL"), Instruction(InstructionType.ABx, "GETGLOBAL"),
Instruction(InstructionType.ABC, "GETTABLE"), Instruction(InstructionType.ABx, "SETGLOBAL"), Instruction(InstructionType.ABC, "SETUPVAL"),
Instruction(InstructionType.ABC, "SETTABLE"), Instruction(InstructionType.ABC, "NEWTABLE"), Instruction(InstructionType.ABC, "SELF"),
Instruction(InstructionType.ABC, "ADD"), Instruction(InstructionType.ABC, "SUB"), Instruction(InstructionType.ABC, "MUL"),
Instruction(InstructionType.ABC, "DIV"), Instruction(InstructionType.ABC, "MOD"), Instruction(InstructionType.ABC, "POW"),
Instruction(InstructionType.ABC, "UNM"), Instruction(InstructionType.ABC, "NOT"), Instruction(InstructionType.ABC, "LEN"),
Instruction(InstructionType.ABC, "CONCAT"), Instruction(InstructionType.AsBx, "JMP"), Instruction(InstructionType.ABC, "EQ"),
Instruction(InstructionType.ABC, "LT"), Instruction(InstructionType.ABC, "LE"), Instruction(InstructionType.ABC, "TEST"),
Instruction(InstructionType.ABC, "TESTSET"), Instruction(InstructionType.ABC, "CALL"), Instruction(InstructionType.ABC, "TAILCALL"),
Instruction(InstructionType.ABC, "RETURN"), Instruction(InstructionType.AsBx, "FORLOOP"), Instruction(InstructionType.AsBx, "FORPREP"),
Instruction(InstructionType.ABC, "TFORLOOP"), Instruction(InstructionType.ABC, "SETLIST"), Instruction(InstructionType.ABC, "CLOSE"),
Instruction(InstructionType.ABx, "CLOSURE"), Instruction(InstructionType.ABC, "VARARG")
]
# at [p]osition, with [s]ize of bits
def _get_bits(num, p, s):
# convert number into binary first
binary = bin(num)
# remove first two characters
binary = binary[2:]
# fill in missing bits
for i in range(32 - len(binary)):
binary = '0' + binary
start = len(binary) - (p+s)
end = len(binary) - p
# extract k bit sub-string
kBitSubStr = binary[start : end]
# convert extracted sub-string into decimal again
return (int(kBitSubStr,2))
class LuaUndump:
def __init__(self):
self.rootChunk: Chunk = None
self.index = 0
@staticmethod
def dis_chunk(chunk: Chunk):
chunk.print()
def loadBlock(self, sz) -> bytearray:
if self.index + sz > len(self.bytecode):
raise Exception("Malformed bytecode!")
temp = bytearray(self.bytecode[self.index:self.index+sz])
self.index = self.index + sz
return temp
def get_byte(self) -> int:
return self.loadBlock(1)[0]
def get_int32(self) -> int:
if (self.big_endian):
return int.from_bytes(self.loadBlock(4), byteorder='big', signed=False)
else:
return int.from_bytes(self.loadBlock(4), byteorder='little', signed=False)
def get_int(self) -> int:
if (self.big_endian):
return int.from_bytes(self.loadBlock(self.int_size), byteorder='big', signed=False)
else:
return int.from_bytes(self.loadBlock(self.int_size), byteorder='little', signed=False)
def get_size_t(self) -> int:
if (self.big_endian):
return int.from_bytes(self.loadBlock(self.size_t), byteorder='big', signed=False)
else:
return int.from_bytes(self.loadBlock(self.size_t), byteorder='little', signed=False)
def get_double(self) -> int:
if self.big_endian:
return struct.unpack('>d', self.loadBlock(8))[0]
else:
return struct.unpack('<d', self.loadBlock(8))[0]
def get_string(self, size) -> str:
if (size == None):
size = self.get_size_t()
if (size == 0):
return ""
return "".join(chr(x) for x in self.loadBlock(size))
def decode_chunk(self):
chunk = Chunk()
chunk.name = self.get_string(None)
chunk.frst_line = self.get_int()
chunk.last_line = self.get_int()
chunk.numUpvals = self.get_byte()
chunk.numParams = self.get_byte()
chunk.isVarg = (self.get_byte() != 0)
chunk.maxStack = self.get_byte()
if (not chunk.name == ""):
chunk.name = chunk.name[1:-1]
# parse instructions
num = self.get_int()
for i in range(num):
data = self.get_int32()
opcode = _get_bits(data, 0, 6)
template = instr_lookup_tbl[opcode]
instruction = Instruction(template.type, template.name)
# i read the lopcodes.h file to get these bit position and sizes.
instruction.opcode = opcode
instruction.A = _get_bits(data, 6, 8) # starts after POS_OP + SIZE_OP (6), with a size of 8
if instruction.type == InstructionType.ABC:
instruction.B = _get_bits(data, 23, 9) # starts after POS_C + SIZE_C (23), with a size of 9
instruction.C = _get_bits(data, 14, 9) # starts after POS_A + SIZE_A (14), with a size of 9
elif instruction.type == InstructionType.ABx:
instruction.B = _get_bits(data, 14, 18) # starts after POS_A + SIZE_A (14), with a size of 18
elif instruction.type == InstructionType.AsBx:
instruction.B = _get_bits(data, 14, 18) - 131071 # Bx is now signed, so just sub half of the MAX_UINT for 18 bits
chunk.appendInstruction(instruction)
# get constants
num = self.get_int()
for i in range(num):
constant: Constant = None
type = self.get_byte()
if type == 0: #nil
constant = Constant(ConstType.NIL, None)
elif type == 1: # bool
constant = Constant(ConstType.BOOL, (self.get_byte() != 0))
elif type == 3: # number
constant = Constant(ConstType.NUMBER, self.get_double())
elif type == 4: # string
constant = Constant(ConstType.STRING, self.get_string(None)[:-1])
else:
raise Exception("Unknown Datatype! [%d]" % type)
chunk.appendConstant(constant)
# parse protos
num = self.get_int()
for i in range(num):
chunk.appendProto(self.decode_chunk())
# debug stuff, maybe i'll add this to chunks to have better disassembly annotation in the future?
# eh, for now just consume the bytes.
# line numbers
num = self.get_int()
for i in range(num):
self.get_int()
# locals
num = self.get_int()
for i in range(num):
self.get_string(None)[:-1] # local name
self.get_int() # local start PC
self.get_int() # local end PC
# upvalues
num = self.get_int()
for i in range(num):
self.get_string(None) # upvalue name
return chunk
def decode_rawbytecode(self, rawbytecode):
# bytecode sanity checks
if not rawbytecode[0:4] == b'\x1bLua':
raise Exception("Lua Bytecode expected!")
bytecode = array.array('b', rawbytecode)
return self.decode_bytecode(bytecode)
def decode_bytecode(self, bytecode):
self.bytecode = bytecode
# aligns index, skips header
self.index = 4
self.vm_version = self.get_byte()
self.bytecode_format = self.get_byte()
self.big_endian = (self.get_byte() == 0)
self.int_size = self.get_byte()
self.size_t = self.get_byte()
self.instr_size = self.get_byte() # gets size of instructions
self.l_number_size = self.get_byte() # size of lua_Number
self.integral_flag = self.get_byte()
self.rootChunk = self.decode_chunk()
return self.rootChunk
def loadFile(self, luaCFile):
with open(luaCFile, 'rb') as luac_file:
bytecode = luac_file.read()
return self.decode_rawbytecode(bytecode)
def print_dissassembly(self):
LuaUndump.dis_chunk(self.rootChunk)