LuaPytecode/luac.py

307 lines
11 KiB
Python
Raw Normal View History

2021-12-08 22:26:29 +00:00
'''
Luac.py
A Lua5.1 cross-platform bytecode deserializer. This module pulls int and size_t sizes from the
chunk header, meaning it should be able to deserialize lua bytecode dumps from most platforms,
regardless of the host machine.
For details on the Lua5.1 bytecode format, I read [this PDF](https://archive.org/download/a-no-frills-intro-to-lua-5.1-vm-instructions/a-no-frills-intro-to-lua-5.1-vm-instructions_archive.torrent)
as well as read the lundump.c source file from the Lua5.1 source.
'''
2019-08-14 20:19:03 +00:00
import struct
import array
from enum import IntEnum, Enum, auto
class InstructionType(Enum):
ABC = auto(),
ABx = auto(),
AsBx = auto()
class ConstType(IntEnum):
NIL = 0,
BOOL = 1,
NUMBER = 3,
STRING = 4,
class Instruction:
def __init__(self, type: InstructionType, name: str) -> None:
self.type = type
self.name = name
self.opcode: int = None
self.A: int = None
self.B: int = None
self.C: int = None
def toString(self):
instr = "%10s" % self.name
regs = ""
if self.type == InstructionType.ABC:
regs = "%d %d %d" % (self.A, self.B, self.C)
elif self.type == InstructionType.ABx or self.type == InstructionType.AsBx:
regs = "%d %d" % (self.A, self.B)
return "%s : %s" % (instr, regs)
class Constant:
def __init__(self, type: ConstType, data) -> None:
self.type = type
self.data = data
def toString(self):
return "[" + self.type.name + "] " + str(self.data)
class Local:
def __init(self, name: str, start: int, end: int):
self.name = name
self.start = start
self.end = end
class Chunk:
def __init__(self) -> None:
self.constants: list[Constant] = []
self.instructions: list[Instruction] = []
self.protos: list[Chunk] = []
self.name: str = "Unnamed proto"
self.frst_line: int = 0
self.last_line: int = 0
self.numUpvals: int = 0
self.numParams: int = 0
self.isVarg: bool = False
self.maxStack: int = 0
self.upvalues: list[str] = []
self.locals: list[Local] = []
def appendInstruction(self, instr: Instruction):
self.instructions.append(instr)
def appendConstant(self, const: Constant):
self.constants.append(const)
def appendProto(self, proto):
self.protos.append(proto)
def print(self):
print("\n==== [[" + str(self.name) + "'s constants]] ====\n")
for z in range(len(self.constants)):
i = self.constants[z]
print(str(z) + ": " + i.toString())
print("\n==== [[" + str(self.name) + "'s dissassembly]] ====\n")
for i in range(len(self.instructions)):
print("[%3d] %s" % (i, self.instructions[i].toString()))
print("\n==== [[" + str(self.name) + "'s protos]] ====\n")
for z in self.protos:
z.print()
instr_lookup_tbl = [
Instruction(InstructionType.ABC, "MOVE"), Instruction(InstructionType.ABx, "LOADK"), Instruction(InstructionType.ABC, "LOADBOOL"),
Instruction(InstructionType.ABC, "LOADNIL"), Instruction(InstructionType.ABC, "GETUPVAL"), Instruction(InstructionType.ABx, "GETGLOBAL"),
Instruction(InstructionType.ABC, "GETTABLE"), Instruction(InstructionType.ABx, "SETGLOBAL"), Instruction(InstructionType.ABC, "SETUPVAL"),
Instruction(InstructionType.ABC, "SETTABLE"), Instruction(InstructionType.ABC, "NEWTABLE"), Instruction(InstructionType.ABC, "SELF"),
Instruction(InstructionType.ABC, "ADD"), Instruction(InstructionType.ABC, "SUB"), Instruction(InstructionType.ABC, "MUL"),
Instruction(InstructionType.ABC, "DIV"), Instruction(InstructionType.ABC, "MOD"), Instruction(InstructionType.ABC, "POW"),
Instruction(InstructionType.ABC, "UNM"), Instruction(InstructionType.ABC, "NOT"), Instruction(InstructionType.ABC, "LEN"),
Instruction(InstructionType.ABC, "CONCAT"), Instruction(InstructionType.AsBx, "JMP"), Instruction(InstructionType.ABC, "EQ"),
Instruction(InstructionType.ABC, "LT"), Instruction(InstructionType.ABC, "LE"), Instruction(InstructionType.ABC, "TEST"),
Instruction(InstructionType.ABC, "TESTSET"), Instruction(InstructionType.ABC, "CALL"), Instruction(InstructionType.ABC, "TAILCALL"),
Instruction(InstructionType.ABC, "RETURN"), Instruction(InstructionType.AsBx, "FORLOOP"), Instruction(InstructionType.AsBx, "FORPREP"),
Instruction(InstructionType.ABC, "TFORLOOP"), Instruction(InstructionType.ABC, "SETLIST"), Instruction(InstructionType.ABC, "CLOSE"),
Instruction(InstructionType.ABx, "CLOSURE"), Instruction(InstructionType.ABC, "VARARG")
2019-08-14 20:19:03 +00:00
]
# at [p]osition, with [s]ize of bits
def _get_bits(num, p, s):
2019-08-14 20:19:03 +00:00
# convert number into binary first
binary = bin(num)
# remove first two characters
binary = binary[2:]
# fill in missing bits
for i in range(32 - len(binary)):
binary = '0' + binary
start = len(binary) - (p+s)
end = len(binary) - p
2019-08-14 20:19:03 +00:00
# extract k bit sub-string
kBitSubStr = binary[start : end]
2019-08-14 20:19:03 +00:00
# convert extracted sub-string into decimal again
return (int(kBitSubStr,2))
2019-08-14 20:47:18 +00:00
class LuaUndump:
2019-08-14 20:19:03 +00:00
def __init__(self):
self.rootChunk: Chunk = None
2019-08-14 20:19:03 +00:00
self.index = 0
2019-08-15 17:02:55 +00:00
@staticmethod
def dis_chunk(chunk: Chunk):
chunk.print()
def loadBlock(self, sz) -> bytearray:
if self.index + sz > len(self.bytecode):
raise Exception("Malformed bytecode!")
temp = bytearray(self.bytecode[self.index:self.index+sz])
self.index = self.index + sz
return temp
2019-08-14 20:19:03 +00:00
def get_byte(self) -> int:
return self.loadBlock(1)[0]
2019-08-14 20:19:03 +00:00
def get_int32(self) -> int:
2019-08-14 20:19:03 +00:00
if (self.big_endian):
return int.from_bytes(self.loadBlock(4), byteorder='big', signed=False)
2019-08-14 20:19:03 +00:00
else:
return int.from_bytes(self.loadBlock(4), byteorder='little', signed=False)
2019-08-14 20:19:03 +00:00
def get_int(self) -> int:
2019-08-14 20:19:03 +00:00
if (self.big_endian):
return int.from_bytes(self.loadBlock(self.int_size), byteorder='big', signed=False)
2019-08-14 20:19:03 +00:00
else:
return int.from_bytes(self.loadBlock(self.int_size), byteorder='little', signed=False)
2019-08-14 20:19:03 +00:00
def get_size_t(self) -> int:
2019-08-14 20:19:03 +00:00
if (self.big_endian):
return int.from_bytes(self.loadBlock(self.size_t), byteorder='big', signed=False)
2019-08-14 20:19:03 +00:00
else:
return int.from_bytes(self.loadBlock(self.size_t), byteorder='little', signed=False)
2019-08-14 20:19:03 +00:00
def get_double(self) -> int:
2019-08-14 20:47:18 +00:00
if self.big_endian:
return struct.unpack('>d', self.loadBlock(8))[0]
2019-08-14 20:47:18 +00:00
else:
return struct.unpack('<d', self.loadBlock(8))[0]
2019-08-14 20:19:03 +00:00
def get_string(self, size) -> str:
2019-08-14 20:19:03 +00:00
if (size == None):
size = self.get_size_t()
if (size == 0):
return ""
return "".join(chr(x) for x in self.loadBlock(size))
2019-08-14 20:19:03 +00:00
def decode_chunk(self):
chunk = Chunk()
2019-08-14 20:19:03 +00:00
chunk.name = self.get_string(None)
chunk.frst_line = self.get_int()
chunk.last_line = self.get_int()
2019-08-14 20:19:03 +00:00
chunk.numUpvals = self.get_byte()
chunk.numParams = self.get_byte()
chunk.isVarg = (self.get_byte() != 0)
chunk.maxStack = self.get_byte()
2019-08-14 20:19:03 +00:00
if (not chunk.name == ""):
chunk.name = chunk.name[1:-1]
2019-08-14 20:19:03 +00:00
# parse instructions
num = self.get_int()
for i in range(num):
2019-08-15 17:02:55 +00:00
data = self.get_int32()
opcode = _get_bits(data, 0, 6)
template = instr_lookup_tbl[opcode]
instruction = Instruction(template.type, template.name)
2019-08-14 20:19:03 +00:00
# i read the lopcodes.h file to get these bit position and sizes.
instruction.opcode = opcode
instruction.A = _get_bits(data, 6, 8) # starts after POS_OP + SIZE_OP (6), with a size of 8
2019-08-14 20:19:03 +00:00
if instruction.type == InstructionType.ABC:
instruction.B = _get_bits(data, 23, 9) # starts after POS_C + SIZE_C (23), with a size of 9
instruction.C = _get_bits(data, 14, 9) # starts after POS_A + SIZE_A (14), with a size of 9
elif instruction.type == InstructionType.ABx:
instruction.B = _get_bits(data, 14, 18) # starts after POS_A + SIZE_A (14), with a size of 18
elif instruction.type == InstructionType.AsBx:
instruction.B = _get_bits(data, 14, 18) - 131071 # Bx is now signed, so just sub half of the MAX_UINT for 18 bits
2019-08-14 20:19:03 +00:00
chunk.appendInstruction(instruction)
2019-08-14 20:19:03 +00:00
# get constants
2019-08-15 17:02:55 +00:00
num = self.get_int()
2019-08-14 20:19:03 +00:00
for i in range(num):
constant: Constant = None
type = self.get_byte()
if type == 0: #nil
constant = Constant(ConstType.NIL, None)
elif type == 1: # bool
constant = Constant(ConstType.BOOL, (self.get_byte() != 0))
elif type == 3: # number
constant = Constant(ConstType.NUMBER, self.get_double())
elif type == 4: # string
constant = Constant(ConstType.STRING, self.get_string(None)[:-1])
else:
raise Exception("Unknown Datatype! [%d]" % type)
chunk.appendConstant(constant)
2019-08-14 20:19:03 +00:00
# parse protos
2019-08-15 17:02:55 +00:00
num = self.get_int()
2019-08-14 20:19:03 +00:00
for i in range(num):
chunk.appendProto(self.decode_chunk())
2019-08-14 20:19:03 +00:00
# debug stuff, maybe i'll add this to chunks to have better disassembly annotation in the future?
# eh, for now just consume the bytes.
2019-08-14 20:19:03 +00:00
# line numbers
2019-08-15 17:02:55 +00:00
num = self.get_int()
2019-08-14 20:19:03 +00:00
for i in range(num):
self.get_int()
2019-08-14 20:19:03 +00:00
# locals
2019-08-15 17:02:55 +00:00
num = self.get_int()
2019-08-14 20:19:03 +00:00
for i in range(num):
self.get_string(None)[:-1] # local name
self.get_int() # local start PC
self.get_int() # local end PC
2019-08-14 20:19:03 +00:00
# upvalues
2019-08-15 17:02:55 +00:00
num = self.get_int()
2019-08-14 20:19:03 +00:00
for i in range(num):
self.get_string(None) # upvalue name
return chunk
def decode_rawbytecode(self, rawbytecode):
# bytecode sanity checks
if not rawbytecode[0:4] == b'\x1bLua':
raise Exception("Lua Bytecode expected!")
bytecode = array.array('b', rawbytecode)
2019-08-14 20:19:03 +00:00
return self.decode_bytecode(bytecode)
def decode_bytecode(self, bytecode):
self.bytecode = bytecode
# aligns index, skips header
2019-08-14 20:19:03 +00:00
self.index = 4
self.vm_version = self.get_byte()
2019-08-15 17:02:55 +00:00
self.bytecode_format = self.get_byte()
2019-08-14 20:19:03 +00:00
self.big_endian = (self.get_byte() == 0)
self.int_size = self.get_byte()
self.size_t = self.get_byte()
2019-08-15 17:02:55 +00:00
self.instr_size = self.get_byte() # gets size of instructions
self.l_number_size = self.get_byte() # size of lua_Number
self.integral_flag = self.get_byte()
2019-08-14 20:19:03 +00:00
self.rootChunk = self.decode_chunk()
return self.rootChunk
2019-08-14 20:19:03 +00:00
def loadFile(self, luaCFile):
with open(luaCFile, 'rb') as luac_file:
2019-08-14 20:19:03 +00:00
bytecode = luac_file.read()
return self.decode_rawbytecode(bytecode)
2019-08-15 17:02:55 +00:00
def print_dissassembly(self):
LuaUndump.dis_chunk(self.rootChunk)
2019-08-15 17:02:55 +00:00