mirror of
https://github.com/CPunch/LuaDecompy.git
synced 2024-11-21 22:30:05 +00:00
Inital commit
- Extremely basic decompiler implemented in lparser.py - lundump.py ported from [this repository](https://github.com/CPunch/LuaPytecode)
This commit is contained in:
commit
0dbdfc49c6
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@ -0,0 +1,2 @@
|
|||||||
|
example.*
|
||||||
|
__pycache__
|
213
lparser.py
Normal file
213
lparser.py
Normal file
@ -0,0 +1,213 @@
|
|||||||
|
'''
|
||||||
|
lparser.py
|
||||||
|
|
||||||
|
Depends on ldump.py for lua dump deserialization.
|
||||||
|
|
||||||
|
An experimental bytecode decompiler.
|
||||||
|
'''
|
||||||
|
|
||||||
|
from operator import concat
|
||||||
|
from subprocess import call
|
||||||
|
from lundump import Chunk, LuaUndump, Constant, Instruction, InstructionType, Opcodes
|
||||||
|
|
||||||
|
class _Scope:
|
||||||
|
def __init__(self, startPC: int, endPC: int):
|
||||||
|
self.startPC = startPC
|
||||||
|
self.endPC = endPC
|
||||||
|
|
||||||
|
class LuaDecomp:
|
||||||
|
def __init__(self, chunk: Chunk):
|
||||||
|
self.chunk = chunk
|
||||||
|
self.pc = 0
|
||||||
|
self.scope = []
|
||||||
|
self.top = {}
|
||||||
|
self.locals = {}
|
||||||
|
self.unknownLocalCount = 0
|
||||||
|
self.src: str = ""
|
||||||
|
|
||||||
|
# configurations!
|
||||||
|
self.aggressiveLocals = False # should *EVERY* accessed register be considered a local?
|
||||||
|
self.indexWidth = 4 # how many spaces for indentions?
|
||||||
|
|
||||||
|
# parse instructions
|
||||||
|
while self.pc < len(self.chunk.instructions):
|
||||||
|
self.parseExpr()
|
||||||
|
self.pc += 1
|
||||||
|
|
||||||
|
# end the scope (if we're supposed too)
|
||||||
|
self.__checkScope()
|
||||||
|
|
||||||
|
print("\n==== [[" + str(self.chunk.name) + "'s decompiled source]] ====\n")
|
||||||
|
print(self.src)
|
||||||
|
|
||||||
|
def __makeLocalIdentifier(self, indx: int) -> str:
|
||||||
|
self.locals[indx] = "__unknLocal%d" % self.unknownLocalCount
|
||||||
|
self.unknownLocalCount += 1
|
||||||
|
|
||||||
|
return self.locals[indx]
|
||||||
|
|
||||||
|
def __newLocal(self, indx: int, expr: str) -> None:
|
||||||
|
# TODO: grab identifier from chunk(?)
|
||||||
|
self.__makeLocalIdentifier(indx)
|
||||||
|
|
||||||
|
self.__startStatement()
|
||||||
|
self.__addExpr("local " + self.locals[indx] + " = " + expr)
|
||||||
|
|
||||||
|
def __getNextInstr(self) -> Instruction:
|
||||||
|
if self.pc + 1 < len(self.chunk.instructions):
|
||||||
|
return self.chunk.instructions[self.pc + 1]
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def __getCurrInstr(self) -> Instruction:
|
||||||
|
return self.chunk.instructions[self.pc]
|
||||||
|
|
||||||
|
def __addExpr(self, code: str) -> None:
|
||||||
|
self.src += code
|
||||||
|
|
||||||
|
def __startStatement(self):
|
||||||
|
self.src += '\n' + (' ' * self.indexWidth * len(self.scope))
|
||||||
|
|
||||||
|
def __getReg(self, indx: int) -> str:
|
||||||
|
# if the top indx is a local, get it
|
||||||
|
return self.locals[indx] if indx in self.locals else self.top[indx]
|
||||||
|
|
||||||
|
def __setReg(self, indx: int, code: str) -> None:
|
||||||
|
# if the top indx is a local, set it
|
||||||
|
if indx in self.locals:
|
||||||
|
self.__startStatement()
|
||||||
|
self.__addExpr(self.locals[indx] + " = " + code)
|
||||||
|
elif self.aggressiveLocals: # 'every register is a local!!'
|
||||||
|
self.__newLocal(indx, code)
|
||||||
|
|
||||||
|
self.top[indx] = code
|
||||||
|
|
||||||
|
def __startScope(self, scopeType: str, size: int) -> None:
|
||||||
|
self.__addExpr(scopeType)
|
||||||
|
self.scope.append(_Scope(self.pc, self.pc + size))
|
||||||
|
|
||||||
|
# checks if we need to end a scope
|
||||||
|
def __checkScope(self) -> None:
|
||||||
|
if len(self.scope) == 0:
|
||||||
|
return
|
||||||
|
|
||||||
|
if self.pc > self.scope[len(self.scope) - 1].endPC:
|
||||||
|
self.__endScope()
|
||||||
|
|
||||||
|
def __endScope(self) -> None:
|
||||||
|
self.scope.pop()
|
||||||
|
self.__startStatement()
|
||||||
|
self.__addExpr("end")
|
||||||
|
|
||||||
|
def __emitOperand(self, a: int, b: str, c: str, op: str) -> None:
|
||||||
|
self.__setReg(a, "(" + b + op + c + ")")
|
||||||
|
|
||||||
|
# 'RK's are special in because can be a register or a konstant. a bitflag is read to determine which
|
||||||
|
def __readRK(self, rk: int) -> str:
|
||||||
|
if (rk & (1 << 8)) > 0:
|
||||||
|
return self.chunk.constants[(rk & ~(1 << 8))].toCode()
|
||||||
|
else:
|
||||||
|
return self.__getReg(rk)
|
||||||
|
|
||||||
|
def parseExpr(self):
|
||||||
|
instr = self.__getCurrInstr()
|
||||||
|
|
||||||
|
# python, add switch statements *please*
|
||||||
|
if instr.opcode == Opcodes.MOVE: # move is a fake ABC instr, C is ignored
|
||||||
|
# move registers
|
||||||
|
self.__setReg(instr.A, self.__getReg(instr.B))
|
||||||
|
elif instr.opcode == Opcodes.LOADK:
|
||||||
|
self.__setReg(instr.A, self.chunk.constants[instr.B].toCode())
|
||||||
|
elif instr.opcode == Opcodes.LOADBOOL:
|
||||||
|
if instr.B == 0:
|
||||||
|
self.__setReg(instr.A, "false")
|
||||||
|
else:
|
||||||
|
self.__setReg(instr.A, "true")
|
||||||
|
elif instr.opcode == Opcodes.GETGLOBAL:
|
||||||
|
self.__setReg(instr.A, self.chunk.constants[instr.B].data)
|
||||||
|
elif instr.opcode == Opcodes.GETTABLE:
|
||||||
|
self.__setReg(instr.A, self.__getReg(instr.B) + "[" + self.__readRK(instr.C) + "]")
|
||||||
|
elif instr.opcode == Opcodes.SETGLOBAL:
|
||||||
|
self.__startStatement()
|
||||||
|
self.__addExpr(self.chunk.constants[instr.B].data + " = " + self.__getReg(instr.A))
|
||||||
|
elif instr.opcode == Opcodes.SETTABLE:
|
||||||
|
self.__startStatement()
|
||||||
|
self.__addExpr(self.__getReg(instr.A) + "[" + self.__readRK(instr.B) + "] = " + self.__readRK(instr.C))
|
||||||
|
elif instr.opcode == Opcodes.ADD:
|
||||||
|
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " + ")
|
||||||
|
elif instr.opcode == Opcodes.SUB:
|
||||||
|
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " - ")
|
||||||
|
elif instr.opcode == Opcodes.MUL:
|
||||||
|
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " * ")
|
||||||
|
elif instr.opcode == Opcodes.DIV:
|
||||||
|
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " / ")
|
||||||
|
elif instr.opcode == Opcodes.MOD:
|
||||||
|
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " % ")
|
||||||
|
elif instr.opcode == Opcodes.POW:
|
||||||
|
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " ^ ")
|
||||||
|
elif instr.opcode == Opcodes.UNM:
|
||||||
|
self.__setReg(instr.A, "-" + self.__getReg(instr.B))
|
||||||
|
elif instr.opcode == Opcodes.NOT:
|
||||||
|
self.__setReg(instr.A, "!" + self.__getReg(instr.B))
|
||||||
|
elif instr.opcode == Opcodes.LEN:
|
||||||
|
self.__setReg(instr.A, "#" + self.__getCurrInstr(instr.B))
|
||||||
|
elif instr.opcode == Opcodes.CONCAT:
|
||||||
|
count = instr.C-instr.B+1
|
||||||
|
concatStr = ""
|
||||||
|
|
||||||
|
# concat all items on stack from RC to RB
|
||||||
|
for i in range(count):
|
||||||
|
concatStr += self.__getReg(instr.B + i) + (" .. " if not i == count - 1 else "")
|
||||||
|
|
||||||
|
self.__setReg(instr.A, concatStr)
|
||||||
|
elif instr.opcode == Opcodes.JMP:
|
||||||
|
pass
|
||||||
|
elif instr.opcode == Opcodes.EQ:
|
||||||
|
self.__startStatement()
|
||||||
|
if instr.A > 0:
|
||||||
|
self.__addExpr("if not ")
|
||||||
|
else:
|
||||||
|
self.__addExpr("if ")
|
||||||
|
self.__addExpr(self.__readRK(instr.B) + " == " + self.__readRK(instr.C) + " ")
|
||||||
|
self.__startScope("then ", self.__getNextInstr().B + 1)
|
||||||
|
|
||||||
|
self.pc += 1 # skip next instr
|
||||||
|
elif instr.opcode == Opcodes.LT:
|
||||||
|
self.__emitOperand(instr.A, self.__readRK(instr.B), self.__readRK(instr.C), " < ")
|
||||||
|
elif instr.opcode == Opcodes.LE:
|
||||||
|
self.__emitOperand(instr.A, instr.B, instr.C, " <= ")
|
||||||
|
elif instr.opcode == Opcodes.CALL:
|
||||||
|
preStr = ""
|
||||||
|
callStr = ""
|
||||||
|
ident = ""
|
||||||
|
|
||||||
|
# parse arguments
|
||||||
|
callStr += self.__getReg(instr.A) + "("
|
||||||
|
for i in range(instr.A + 1, instr.A + instr.B):
|
||||||
|
callStr += self.__getReg(i) + (", " if not i + 1 == instr.A + instr.B else "")
|
||||||
|
callStr += ")"
|
||||||
|
|
||||||
|
# parse return values
|
||||||
|
if instr.C > 1:
|
||||||
|
preStr = "local "
|
||||||
|
for indx in range(instr.A, instr.A + instr.C - 1):
|
||||||
|
if indx in self.locals:
|
||||||
|
ident = self.locals[indx]
|
||||||
|
else:
|
||||||
|
ident = self.__makeLocalIdentifier(indx)
|
||||||
|
preStr += ident
|
||||||
|
|
||||||
|
# normally setReg() does this
|
||||||
|
self.top[indx] = ident
|
||||||
|
|
||||||
|
# just so we don't have a trailing ', '
|
||||||
|
preStr += ", " if not indx == instr.A + instr.C - 2 else ""
|
||||||
|
preStr += " = "
|
||||||
|
|
||||||
|
self.__startStatement()
|
||||||
|
self.__addExpr(preStr + callStr)
|
||||||
|
elif instr.opcode == Opcodes.RETURN:
|
||||||
|
self.__startStatement()
|
||||||
|
pass # no-op for now
|
||||||
|
else:
|
||||||
|
raise Exception("unsupported instruction: %s" % instr.toString())
|
399
lundump.py
Normal file
399
lundump.py
Normal file
@ -0,0 +1,399 @@
|
|||||||
|
'''
|
||||||
|
l(un)dump.py
|
||||||
|
|
||||||
|
A Lua5.1 cross-platform bytecode deserializer. This module pulls int and size_t sizes from the
|
||||||
|
chunk header, meaning it should be able to deserialize lua bytecode dumps from most platforms,
|
||||||
|
regardless of the host machine.
|
||||||
|
|
||||||
|
For details on the Lua5.1 bytecode format, I read [this PDF](https://archive.org/download/a-no-frills-intro-to-lua-5.1-vm-instructions/a-no-frills-intro-to-lua-5.1-vm-instructions_archive.torrent)
|
||||||
|
as well as read the lundump.c source file from the Lua5.1 source.
|
||||||
|
'''
|
||||||
|
|
||||||
|
from multiprocessing.spawn import get_executable
|
||||||
|
import struct
|
||||||
|
import array
|
||||||
|
from enum import IntEnum, Enum, auto
|
||||||
|
from typing_extensions import Self
|
||||||
|
|
||||||
|
class InstructionType(Enum):
|
||||||
|
ABC = auto(),
|
||||||
|
ABx = auto(),
|
||||||
|
AsBx = auto()
|
||||||
|
|
||||||
|
class Opcodes(IntEnum):
|
||||||
|
MOVE = 0,
|
||||||
|
LOADK = 1,
|
||||||
|
LOADBOOL = 2,
|
||||||
|
LOADNIL = 3,
|
||||||
|
GETUPVAL = 4,
|
||||||
|
GETGLOBAL = 5,
|
||||||
|
GETTABLE = 6,
|
||||||
|
SETGLOBAL = 7,
|
||||||
|
SETUPVAL = 8,
|
||||||
|
SETTABLE = 9,
|
||||||
|
NEWTABLE = 10,
|
||||||
|
SELF = 11,
|
||||||
|
ADD = 12,
|
||||||
|
SUB = 13,
|
||||||
|
MUL = 14,
|
||||||
|
DIV = 15,
|
||||||
|
MOD = 16,
|
||||||
|
POW = 17,
|
||||||
|
UNM = 18,
|
||||||
|
NOT = 19,
|
||||||
|
LEN = 20,
|
||||||
|
CONCAT = 21,
|
||||||
|
JMP = 22,
|
||||||
|
EQ = 23,
|
||||||
|
LT = 24,
|
||||||
|
LE = 25,
|
||||||
|
TEST = 26,
|
||||||
|
TESTSET = 27,
|
||||||
|
CALL = 28,
|
||||||
|
TAILCALL = 29,
|
||||||
|
RETURN = 30,
|
||||||
|
FORLOOP = 31,
|
||||||
|
FORPREP = 32,
|
||||||
|
TFORLOOP = 33,
|
||||||
|
SETLIST = 34,
|
||||||
|
CLOSE = 35,
|
||||||
|
CLOSURE = 36,
|
||||||
|
VARARG = 37
|
||||||
|
|
||||||
|
class ConstType(IntEnum):
|
||||||
|
NIL = 0,
|
||||||
|
BOOL = 1,
|
||||||
|
NUMBER = 3,
|
||||||
|
STRING = 4,
|
||||||
|
|
||||||
|
class Instruction:
|
||||||
|
def __init__(self, type: InstructionType, name: str) -> None:
|
||||||
|
self.type = type
|
||||||
|
self.name = name
|
||||||
|
self.opcode: int = None
|
||||||
|
self.A: int = None
|
||||||
|
self.B: int = None
|
||||||
|
self.C: int = None
|
||||||
|
|
||||||
|
# 'RK's are special in because can be a register or a konstant. a bitflag is read to determine which
|
||||||
|
def __readRK(self, rk: int) -> str:
|
||||||
|
if (rk & (1 << 8)) > 0:
|
||||||
|
return "K[" + str((rk & ~(1 << 8))) + "]"
|
||||||
|
else:
|
||||||
|
return "R[" + str(rk) + "]"
|
||||||
|
|
||||||
|
def toString(self):
|
||||||
|
instr = "%10s" % self.name
|
||||||
|
regs = ""
|
||||||
|
|
||||||
|
if self.type == InstructionType.ABC:
|
||||||
|
A = "%d" % self.A
|
||||||
|
B = "%d" % self.B
|
||||||
|
C = "%d" % self.C
|
||||||
|
|
||||||
|
# these opcodes have RKs for B & C
|
||||||
|
if self.opcode == Opcodes.SETTABLE or self.opcode == Opcodes.EQ or self.opcode == Opcodes.LT:
|
||||||
|
B = self.__readRK(self.B)
|
||||||
|
C = self.__readRK(self.C)
|
||||||
|
elif self.opcode == Opcodes.GETTABLE: # just for C
|
||||||
|
C = self.__readRK(self.C)
|
||||||
|
|
||||||
|
regs = "%s %s %s" % (A, B, C)
|
||||||
|
elif self.type == InstructionType.ABx or self.type == InstructionType.AsBx:
|
||||||
|
regs = "%d %d" % (self.A, self.B)
|
||||||
|
|
||||||
|
return "%s : %s" % (instr, regs)
|
||||||
|
|
||||||
|
class Constant:
|
||||||
|
def __init__(self, type: ConstType, data) -> None:
|
||||||
|
self.type = type
|
||||||
|
self.data = data
|
||||||
|
|
||||||
|
def toString(self):
|
||||||
|
return "[" + self.type.name + "] " + str(self.data)
|
||||||
|
|
||||||
|
# format the constant so that it is parsable by lua
|
||||||
|
def toCode(self):
|
||||||
|
if self.type == ConstType.STRING:
|
||||||
|
return "\"" + self.data + "\""
|
||||||
|
elif self.type == ConstType.BOOL:
|
||||||
|
if self.data:
|
||||||
|
return "true"
|
||||||
|
else:
|
||||||
|
return "false"
|
||||||
|
elif self.type == ConstType.NUMBER:
|
||||||
|
return str(self.data)
|
||||||
|
else:
|
||||||
|
return "nil"
|
||||||
|
|
||||||
|
class Local:
|
||||||
|
def __init__(self, name: str, start: int, end: int):
|
||||||
|
self.name = name
|
||||||
|
self.start = start
|
||||||
|
self.end = end
|
||||||
|
|
||||||
|
class Chunk:
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self.constants: list[Constant] = []
|
||||||
|
self.instructions: list[Instruction] = []
|
||||||
|
self.protos: list[Chunk] = []
|
||||||
|
|
||||||
|
self.name: str = "Unnamed proto"
|
||||||
|
self.frst_line: int = 0
|
||||||
|
self.last_line: int = 0
|
||||||
|
self.numUpvals: int = 0
|
||||||
|
self.numParams: int = 0
|
||||||
|
self.isVarg: bool = False
|
||||||
|
self.maxStack: int = 0
|
||||||
|
|
||||||
|
self.upvalues: list[str] = []
|
||||||
|
self.locals: list[Local] = []
|
||||||
|
|
||||||
|
def appendInstruction(self, instr: Instruction):
|
||||||
|
self.instructions.append(instr)
|
||||||
|
|
||||||
|
def appendConstant(self, const: Constant):
|
||||||
|
self.constants.append(const)
|
||||||
|
|
||||||
|
def appendProto(self, proto):
|
||||||
|
self.protos.append(proto)
|
||||||
|
|
||||||
|
def appendLocal(self, local: Local):
|
||||||
|
self.locals.append(local)
|
||||||
|
|
||||||
|
def findLocal(self, pc: int) -> Local:
|
||||||
|
for l in self.locals:
|
||||||
|
if l.start <= pc and l.end >= pc:
|
||||||
|
return l
|
||||||
|
|
||||||
|
# there's no local information (may have been stripped)
|
||||||
|
return None
|
||||||
|
|
||||||
|
def print(self):
|
||||||
|
print("\n==== [[" + str(self.name) + "'s constants]] ====\n")
|
||||||
|
for z in range(len(self.constants)):
|
||||||
|
i = self.constants[z]
|
||||||
|
print(str(z) + ": " + i.toString())
|
||||||
|
|
||||||
|
print("\n==== [[" + str(self.name) + "'s dissassembly]] ====\n")
|
||||||
|
for i in range(len(self.instructions)):
|
||||||
|
print("[%3d] %s" % (i, self.instructions[i].toString()))
|
||||||
|
|
||||||
|
print("\n==== [[" + str(self.name) + "'s protos]] ====\n")
|
||||||
|
for z in self.protos:
|
||||||
|
z.print()
|
||||||
|
|
||||||
|
instr_lookup_tbl = [
|
||||||
|
Instruction(InstructionType.ABC, "MOVE"), Instruction(InstructionType.ABx, "LOADK"), Instruction(InstructionType.ABC, "LOADBOOL"),
|
||||||
|
Instruction(InstructionType.ABC, "LOADNIL"), Instruction(InstructionType.ABC, "GETUPVAL"), Instruction(InstructionType.ABx, "GETGLOBAL"),
|
||||||
|
Instruction(InstructionType.ABC, "GETTABLE"), Instruction(InstructionType.ABx, "SETGLOBAL"), Instruction(InstructionType.ABC, "SETUPVAL"),
|
||||||
|
Instruction(InstructionType.ABC, "SETTABLE"), Instruction(InstructionType.ABC, "NEWTABLE"), Instruction(InstructionType.ABC, "SELF"),
|
||||||
|
Instruction(InstructionType.ABC, "ADD"), Instruction(InstructionType.ABC, "SUB"), Instruction(InstructionType.ABC, "MUL"),
|
||||||
|
Instruction(InstructionType.ABC, "DIV"), Instruction(InstructionType.ABC, "MOD"), Instruction(InstructionType.ABC, "POW"),
|
||||||
|
Instruction(InstructionType.ABC, "UNM"), Instruction(InstructionType.ABC, "NOT"), Instruction(InstructionType.ABC, "LEN"),
|
||||||
|
Instruction(InstructionType.ABC, "CONCAT"), Instruction(InstructionType.AsBx, "JMP"), Instruction(InstructionType.ABC, "EQ"),
|
||||||
|
Instruction(InstructionType.ABC, "LT"), Instruction(InstructionType.ABC, "LE"), Instruction(InstructionType.ABC, "TEST"),
|
||||||
|
Instruction(InstructionType.ABC, "TESTSET"), Instruction(InstructionType.ABC, "CALL"), Instruction(InstructionType.ABC, "TAILCALL"),
|
||||||
|
Instruction(InstructionType.ABC, "RETURN"), Instruction(InstructionType.AsBx, "FORLOOP"), Instruction(InstructionType.AsBx, "FORPREP"),
|
||||||
|
Instruction(InstructionType.ABC, "TFORLOOP"), Instruction(InstructionType.ABC, "SETLIST"), Instruction(InstructionType.ABC, "CLOSE"),
|
||||||
|
Instruction(InstructionType.ABx, "CLOSURE"), Instruction(InstructionType.ABC, "VARARG")
|
||||||
|
]
|
||||||
|
|
||||||
|
# at [p]osition, with [s]ize of bits
|
||||||
|
def get_bits(num: int, p: int, s: int):
|
||||||
|
return (num>>p) & (~((~0)<<s))
|
||||||
|
|
||||||
|
# set bits from data to num at [p]osition, with [s]ize of bits
|
||||||
|
def set_bits(num, data, p, s) -> int:
|
||||||
|
return (num & (~((~((~0)<<s))<<p))) | ((data << p) & ((~((~0)<<s))<<p))
|
||||||
|
|
||||||
|
def _decode_instr(data: int) -> Instruction:
|
||||||
|
opcode = get_bits(data, 0, 6)
|
||||||
|
template = instr_lookup_tbl[opcode]
|
||||||
|
instr = Instruction(template.type, template.name)
|
||||||
|
|
||||||
|
# i read the lopcodes.h file to get these bit position and sizes.
|
||||||
|
instr.opcode = opcode
|
||||||
|
instr.A = get_bits(data, 6, 8) # starts after POS_OP + SIZE_OP (6), with a size of 8
|
||||||
|
|
||||||
|
if instr.type == InstructionType.ABC:
|
||||||
|
instr.B = get_bits(data, 23, 9) # starts after POS_C + SIZE_C (23), with a size of 9
|
||||||
|
instr.C = get_bits(data, 14, 9) # starts after POS_A + SIZE_A (14), with a size of 9
|
||||||
|
elif instr.type == InstructionType.ABx:
|
||||||
|
instr.B = get_bits(data, 14, 18) # starts after POS_A + SIZE_A (14), with a size of 18
|
||||||
|
elif instr.type == InstructionType.AsBx:
|
||||||
|
instr.B = get_bits(data, 14, 18) - 131071 # Bx is now signed, so just sub half of the MAX_UINT for 18 bits
|
||||||
|
|
||||||
|
return instr
|
||||||
|
|
||||||
|
# returns a u32 instruction
|
||||||
|
def _encode_instr(instr: Instruction) -> int:
|
||||||
|
data = 0
|
||||||
|
|
||||||
|
# encode instruction (basically, do the inverse of _decode_instr)
|
||||||
|
data = set_bits(data, instr.opcode, 0, 6)
|
||||||
|
data = set_bits(data, instr.A, 6, 8)
|
||||||
|
|
||||||
|
if instr.type == InstructionType.ABC:
|
||||||
|
data = set_bits(data, instr.B, 23, 9)
|
||||||
|
data = set_bits(data, instr.C, 14, 9)
|
||||||
|
elif instr.type == InstructionType.ABx:
|
||||||
|
data = set_bits(data, instr.B, 14, 18)
|
||||||
|
elif instr.type == InstructionType.AsBx:
|
||||||
|
data = set_bits(data, instr.B + 131071, 14, 18)
|
||||||
|
|
||||||
|
return data
|
||||||
|
|
||||||
|
class LuaUndump:
|
||||||
|
def __init__(self):
|
||||||
|
self.rootChunk: Chunk = None
|
||||||
|
self.index = 0
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def dis_chunk(chunk: Chunk):
|
||||||
|
chunk.print()
|
||||||
|
|
||||||
|
def loadBlock(self, sz) -> bytearray:
|
||||||
|
if self.index + sz > len(self.bytecode):
|
||||||
|
raise Exception("Malformed bytecode!")
|
||||||
|
|
||||||
|
temp = bytearray(self.bytecode[self.index:self.index+sz])
|
||||||
|
self.index = self.index + sz
|
||||||
|
return temp
|
||||||
|
|
||||||
|
def get_byte(self) -> int:
|
||||||
|
return self.loadBlock(1)[0]
|
||||||
|
|
||||||
|
def get_int32(self) -> int:
|
||||||
|
if (self.big_endian):
|
||||||
|
return int.from_bytes(self.loadBlock(4), byteorder='big', signed=False)
|
||||||
|
else:
|
||||||
|
return int.from_bytes(self.loadBlock(4), byteorder='little', signed=False)
|
||||||
|
|
||||||
|
def get_int(self) -> int:
|
||||||
|
if (self.big_endian):
|
||||||
|
return int.from_bytes(self.loadBlock(self.int_size), byteorder='big', signed=False)
|
||||||
|
else:
|
||||||
|
return int.from_bytes(self.loadBlock(self.int_size), byteorder='little', signed=False)
|
||||||
|
|
||||||
|
def get_size_t(self) -> int:
|
||||||
|
if (self.big_endian):
|
||||||
|
return int.from_bytes(self.loadBlock(self.size_t), byteorder='big', signed=False)
|
||||||
|
else:
|
||||||
|
return int.from_bytes(self.loadBlock(self.size_t), byteorder='little', signed=False)
|
||||||
|
|
||||||
|
def get_double(self) -> int:
|
||||||
|
if self.big_endian:
|
||||||
|
return struct.unpack('>d', self.loadBlock(8))[0]
|
||||||
|
else:
|
||||||
|
return struct.unpack('<d', self.loadBlock(8))[0]
|
||||||
|
|
||||||
|
def get_string(self, size) -> str:
|
||||||
|
if (size == None):
|
||||||
|
size = self.get_size_t()
|
||||||
|
if (size == 0):
|
||||||
|
return ""
|
||||||
|
|
||||||
|
return "".join(chr(x) for x in self.loadBlock(size))
|
||||||
|
|
||||||
|
def decode_chunk(self) -> Chunk:
|
||||||
|
chunk = Chunk()
|
||||||
|
|
||||||
|
chunk.name = self.get_string(None)
|
||||||
|
chunk.frst_line = self.get_int()
|
||||||
|
chunk.last_line = self.get_int()
|
||||||
|
|
||||||
|
chunk.numUpvals = self.get_byte()
|
||||||
|
chunk.numParams = self.get_byte()
|
||||||
|
chunk.isVarg = (self.get_byte() != 0)
|
||||||
|
chunk.maxStack = self.get_byte()
|
||||||
|
|
||||||
|
if (not chunk.name == ""):
|
||||||
|
chunk.name = chunk.name[1:-1]
|
||||||
|
|
||||||
|
# parse instructions
|
||||||
|
num = self.get_int()
|
||||||
|
for i in range(num):
|
||||||
|
chunk.appendInstruction(_decode_instr(self.get_int32()))
|
||||||
|
|
||||||
|
# get constants
|
||||||
|
num = self.get_int()
|
||||||
|
for i in range(num):
|
||||||
|
constant: Constant = None
|
||||||
|
type = self.get_byte()
|
||||||
|
|
||||||
|
if type == 0: #nil
|
||||||
|
constant = Constant(ConstType.NIL, None)
|
||||||
|
elif type == 1: # bool
|
||||||
|
constant = Constant(ConstType.BOOL, (self.get_byte() != 0))
|
||||||
|
elif type == 3: # number
|
||||||
|
constant = Constant(ConstType.NUMBER, self.get_double())
|
||||||
|
elif type == 4: # string
|
||||||
|
constant = Constant(ConstType.STRING, self.get_string(None)[:-1])
|
||||||
|
else:
|
||||||
|
raise Exception("Unknown Datatype! [%d]" % type)
|
||||||
|
|
||||||
|
chunk.appendConstant(constant)
|
||||||
|
|
||||||
|
# parse protos
|
||||||
|
num = self.get_int()
|
||||||
|
for i in range(num):
|
||||||
|
chunk.appendProto(self.decode_chunk())
|
||||||
|
|
||||||
|
# debug stuff, maybe i'll add this to chunks to have better disassembly annotation in the future?
|
||||||
|
# eh, for now just consume the bytes.
|
||||||
|
|
||||||
|
# line numbers
|
||||||
|
num = self.get_int()
|
||||||
|
for i in range(num):
|
||||||
|
self.get_int()
|
||||||
|
|
||||||
|
# locals
|
||||||
|
num = self.get_int()
|
||||||
|
for i in range(num):
|
||||||
|
name = self.get_string(None) # local name
|
||||||
|
start = self.get_int() # local start PC
|
||||||
|
end = self.get_int() # local end PC
|
||||||
|
chunk.appendLocal(Local(name, start, end))
|
||||||
|
|
||||||
|
# upvalues
|
||||||
|
num = self.get_int()
|
||||||
|
for i in range(num):
|
||||||
|
self.get_string(None) # upvalue name
|
||||||
|
|
||||||
|
return chunk
|
||||||
|
|
||||||
|
def decode_rawbytecode(self, rawbytecode):
|
||||||
|
# bytecode sanity checks
|
||||||
|
if not rawbytecode[0:4] == b'\x1bLua':
|
||||||
|
raise Exception("Lua Bytecode expected!")
|
||||||
|
|
||||||
|
bytecode = array.array('b', rawbytecode)
|
||||||
|
return self.decode_bytecode(bytecode)
|
||||||
|
|
||||||
|
def decode_bytecode(self, bytecode):
|
||||||
|
self.bytecode = bytecode
|
||||||
|
|
||||||
|
# aligns index, skips header
|
||||||
|
self.index = 4
|
||||||
|
|
||||||
|
self.vm_version = self.get_byte()
|
||||||
|
self.bytecode_format = self.get_byte()
|
||||||
|
self.big_endian = (self.get_byte() == 0)
|
||||||
|
self.int_size = self.get_byte()
|
||||||
|
self.size_t = self.get_byte()
|
||||||
|
self.instr_size = self.get_byte() # gets size of instructions
|
||||||
|
self.l_number_size = self.get_byte() # size of lua_Number
|
||||||
|
self.integral_flag = self.get_byte()
|
||||||
|
|
||||||
|
self.rootChunk = self.decode_chunk()
|
||||||
|
return self.rootChunk
|
||||||
|
|
||||||
|
def loadFile(self, luaCFile):
|
||||||
|
with open(luaCFile, 'rb') as luac_file:
|
||||||
|
bytecode = luac_file.read()
|
||||||
|
return self.decode_rawbytecode(bytecode)
|
||||||
|
|
||||||
|
def print_dissassembly(self):
|
||||||
|
LuaUndump.dis_chunk(self.rootChunk)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user