From 3731d28ca9af4d4339ff5a047ddc531579d7e5a6 Mon Sep 17 00:00:00 2001 From: Dani Messerman Date: Thu, 30 Apr 2015 20:29:06 +0300 Subject: [PATCH] Generates mov reg, reg --- src/binary_translation/CMakeLists.txt | 2 + src/binary_translation/InstructionBlock.cpp | 49 ++++ src/binary_translation/InstructionBlock.h | 64 ++++++ .../Instructions/DataProcessing.cpp | 26 ++- .../Instructions/DataProcessing.h | 5 +- .../Instructions/Instruction.h | 13 +- src/binary_translation/ModuleGen.cpp | 209 +++++++++++++++++- src/binary_translation/ModuleGen.h | 52 ++++- 8 files changed, 401 insertions(+), 19 deletions(-) create mode 100644 src/binary_translation/InstructionBlock.cpp create mode 100644 src/binary_translation/InstructionBlock.h diff --git a/src/binary_translation/CMakeLists.txt b/src/binary_translation/CMakeLists.txt index 435202191..7f260b1d6 100644 --- a/src/binary_translation/CMakeLists.txt +++ b/src/binary_translation/CMakeLists.txt @@ -3,6 +3,7 @@ set(SRCS CodeGen.cpp ModuleGen.cpp Disassembler.cpp + InstructionBlock.cpp Instructions/Instruction.cpp Instructions/DataProcessing.cpp @@ -11,6 +12,7 @@ set(HEADERS CodeGen.h ModuleGen.h Disassembler.h + InstructionBlock.h Instructions/Types.h Instructions/Instruction.h diff --git a/src/binary_translation/InstructionBlock.cpp b/src/binary_translation/InstructionBlock.cpp new file mode 100644 index 000000000..bb80284cb --- /dev/null +++ b/src/binary_translation/InstructionBlock.cpp @@ -0,0 +1,49 @@ +#include "InstructionBlock.h" +#include "ModuleGen.h" +#include "Instructions/Instruction.h" +#include +#include + +InstructionBlock::InstructionBlock(ModuleGen* module, Instruction* instruction) + : module(module), + instruction(std::unique_ptr(instruction)) +{ + std::stringstream ss; + ss << std::hex << std::setfill('0') << std::setw(8) << instruction->Address(); + address_string = ss.str(); +} + +InstructionBlock::~InstructionBlock() +{ +} + +void InstructionBlock::GenerateEntryBlock() +{ + entry_basic_block = llvm::BasicBlock::Create(llvm::getGlobalContext(), address_string + "_Entry"); +} + +void InstructionBlock::GenerateCode() +{ + Module()->IrBuilder()->SetInsertPoint(entry_basic_block); + + instruction->GenerateCode(this); + + exit_basic_block = Module()->IrBuilder()->GetInsertBlock(); +} + +llvm::Value *InstructionBlock::Read(Register reg) +{ + auto ib = module->IrBuilder(); + return ib->CreateAlignedLoad(module->GetRegisterPtr(reg), 4); +} + +llvm::Value *InstructionBlock::Write(Register reg, llvm::Value *value) +{ + auto ib = module->IrBuilder(); + return ib->CreateAlignedStore(value, module->GetRegisterPtr(reg), 4); +} + +size_t InstructionBlock::Address() +{ + return instruction->Address(); +} \ No newline at end of file diff --git a/src/binary_translation/InstructionBlock.h b/src/binary_translation/InstructionBlock.h new file mode 100644 index 000000000..bad4a4e6b --- /dev/null +++ b/src/binary_translation/InstructionBlock.h @@ -0,0 +1,64 @@ +#include +#include + +namespace llvm +{ + class Value; + class BasicBlock; +} + +class ModuleGen; +class Instruction; + +enum class Register; + +/* + * An instruction blocks + * Holds the entry and exit points for an instruction + * Responsible to generate the code + */ +class InstructionBlock +{ +public: + InstructionBlock(ModuleGen *module, Instruction *instruction); + ~InstructionBlock(); + + /* + * Generates the basic block of the instruction + */ + void GenerateEntryBlock(); + + /* + * Generates the code for the instruction + */ + void GenerateCode(); + + /* + * Generates code to read the register + */ + llvm::Value *Read(Register reg); + /* + * Generates code to write the value + * Returns the write instruction = written value + */ + llvm::Value *Write(Register reg, llvm::Value *value); + + size_t Address(); + ModuleGen *Module() { return module; } + + llvm::BasicBlock *GetEntryBasicBlock() { return entry_basic_block; } + llvm::BasicBlock *GetExitBasicBlock() { return exit_basic_block; } +private: + // Textual representation of the address + // Used to generate names + std::string address_string; + + ModuleGen *module; + std::unique_ptr instruction; + + // The block at the entry to instruction + llvm::BasicBlock *entry_basic_block; + + // The block at the exit from the instruction + llvm::BasicBlock *exit_basic_block; +}; \ No newline at end of file diff --git a/src/binary_translation/Instructions/DataProcessing.cpp b/src/binary_translation/Instructions/DataProcessing.cpp index 6151b37a2..f94ab0003 100644 --- a/src/binary_translation/Instructions/DataProcessing.cpp +++ b/src/binary_translation/Instructions/DataProcessing.cpp @@ -1,15 +1,35 @@ #include "DataProcessing.h" #include "Disassembler.h" +#include "InstructionBlock.h" static RegisterInstruction register_instruction; bool DataProcessing::Decode() { - if (ReadFields({ FieldDef<4>(&cond), FieldDef<3>(1), FieldDef<4>(&short_op), FieldDef<1>(&s), FieldDef<4>(&rn), - FieldDef<4>(&rd), FieldDef<12>(&imm12) })) + // Mov and shifts must have zeroes at some operands of different data processing instructions + if (ReadFields({ FieldDef<4>(&cond), FieldDef<3>(0), FieldDef<4>((u32)ShortOpType::MoveAndShifts), FieldDef<1>(&s), FieldDef<4>(0), + FieldDef<4>(&rd), FieldDef<5>(&imm5), FieldDef<3>(0), FieldDef<4>(&rm) })) { - form = Form::Immediate; + form = Form::Register; + if (imm5 != 0) return false; // Shifts + if (s != 0) return false; // Set flags + if (rd == Register::PC) return false; // Jump return true; } + if (ReadFields({ FieldDef<4>(&cond), FieldDef<3>(1), FieldDef<4>(&short_op), FieldDef<1>(&s), FieldDef<4>(&rn), + FieldDef<4>(&rd), FieldDef<12>(&imm12) })) + { + // TODO: not implemented + form = Form::Immediate; + return false; + } return false; +} + +void DataProcessing::GenerateCode(InstructionBlock* instruction_block) +{ + // Currently supports only mov reg, reg + + auto value = instruction_block->Read(rm); + instruction_block->Write(rd, value); } \ No newline at end of file diff --git a/src/binary_translation/Instructions/DataProcessing.h b/src/binary_translation/Instructions/DataProcessing.h index 056c6d12b..a33edea3d 100644 --- a/src/binary_translation/Instructions/DataProcessing.h +++ b/src/binary_translation/Instructions/DataProcessing.h @@ -16,7 +16,7 @@ public: { BitwiseAnd = 0, BitwiseXor, Subtract, RevSubtract, Add, AddWithCarry, SubtractWithCarry, ReverseSubtractWithCarry, // Compare, Test, Misc - BitwiseOr = 12, Move, BitwiseBitClear, BitwiseNot + BitwiseOr = 12, MoveAndShifts, BitwiseBitClear, BitwiseNot }; enum class Form { @@ -25,6 +25,7 @@ public: public: virtual bool Decode() override; + void GenerateCode(InstructionBlock* instruction_block) override; private: Form form; Condition cond; @@ -32,5 +33,7 @@ private: bool s; Register rn; Register rd; + Register rm; u32 imm12; + u32 imm5; }; \ No newline at end of file diff --git a/src/binary_translation/Instructions/Instruction.h b/src/binary_translation/Instructions/Instruction.h index 794b9721a..b57191106 100644 --- a/src/binary_translation/Instructions/Instruction.h +++ b/src/binary_translation/Instructions/Instruction.h @@ -2,6 +2,8 @@ #include "common/common_types.h" #include +class InstructionBlock; + class Instruction { protected: @@ -15,9 +17,17 @@ public: * Returns true on success, or false otherwise */ bool Read(u32 instruction, u32 address); + + /* + * Generates code for the instruction into the instruction block + * Derived classes must override this + */ + virtual void GenerateCode(InstructionBlock *instruction_block) = 0; + + u32 Address() { return address; } protected: /* - * Derived classes should override this, and implement it by calling ReadFields + * Derived classes must override this, and implement it by calling ReadFields */ virtual bool Decode() = 0; /* @@ -36,7 +46,6 @@ protected: */ template static FieldDefObject FieldDef(Type *field); - private: /* * Function used by FieldDefObject to write to a field diff --git a/src/binary_translation/ModuleGen.cpp b/src/binary_translation/ModuleGen.cpp index b34c631d1..a870d800e 100644 --- a/src/binary_translation/ModuleGen.cpp +++ b/src/binary_translation/ModuleGen.cpp @@ -3,8 +3,11 @@ #include "core/loader/loader.h" #include "core/mem_map.h" #include "Instructions/Instruction.h" +#include "Instructions/Types.h" +#include "InstructionBlock.h" #include #include +#include using namespace llvm; @@ -21,10 +24,37 @@ ModuleGen::~ModuleGen() void ModuleGen::Run() { GenerateGlobals(); + + DecodeInstructions(); + GenerateInstructionsEntry(); + GenerateCanRunFunction(); GenerateRunFunction(); + GenerateGetBlockAddressFunction(); - GenerateBlocks(); + GenerateInstructionsCode(); + GenerateInstructionsTermination(); + AddInstructionsToRunFunction(); + + GenerateBlockAddressArray(); +} + +Value *ModuleGen::GetRegisterPtr(Register reg) +{ + Value *global; + unsigned index; + if (reg <= Register::PC) + { + global = registers_global; + index = static_cast(reg)-static_cast(Register::R0); + } + else + { + global = flags_global; + index = static_cast(reg)-static_cast(Register::N); + } + auto base = ir_builder->CreateAlignedLoad(global, 4); + return ir_builder->CreateConstInBoundsGEP1_32(base, index); } void ModuleGen::GenerateGlobals() @@ -35,36 +65,193 @@ void ModuleGen::GenerateGlobals() // Flags is stored internally as i1* indexed in multiples of 4 auto flags_global_initializer = ConstantPointerNull::get(IntegerType::getInt1PtrTy(getGlobalContext())); flags_global = new GlobalVariable(*module, flags_global_initializer->getType(), false, GlobalValue::ExternalLinkage, flags_global_initializer, "Flags"); + + auto get_block_address_function_type = FunctionType::get(ir_builder->getInt8PtrTy(), ir_builder->getInt32Ty(), false); + get_block_address_function = Function::Create(get_block_address_function_type, GlobalValue::PrivateLinkage, "GetBlockAddress", module); + + auto can_run_function_type = FunctionType::get(ir_builder->getInt1Ty(), false); + can_run_function = Function::Create(can_run_function_type, GlobalValue::ExternalLinkage, "CanRun", module); + + auto run_function_type = FunctionType::get(ir_builder->getVoidTy(), false); + run_function = Function::Create(run_function_type, GlobalValue::ExternalLinkage, "Run", module); + + block_address_array_base = Loader::ROMCodeStart / 4; + block_address_array_size = Loader::ROMCodeSize / 4; + + block_address_array_type = ArrayType::get(ir_builder->getInt8PtrTy(), block_address_array_size); + block_address_array = new GlobalVariable(*module, block_address_array_type, true, GlobalValue::ExternalLinkage, nullptr, "BlockAddressArray"); +} + +void ModuleGen::GenerateBlockAddressArray() +{ + auto local_block_address_array_values = std::make_unique(block_address_array_size); + + std::fill( + local_block_address_array_values.get(), + local_block_address_array_values.get() + block_address_array_size, + ConstantPointerNull::get(ir_builder->getInt8PtrTy())); + + for (auto i = 0; i < instruction_blocks.size(); ++i) + { + auto &block = instruction_blocks[i]; + auto entry_basic_block = block->GetEntryBasicBlock(); + auto index = block->Address() / 4 - block_address_array_base; + local_block_address_array_values[index] = BlockAddress::get(entry_basic_block->getParent(), entry_basic_block); + } + + auto local_block_address_array_values_ref = ArrayRef(local_block_address_array_values.get(), block_address_array_size); + auto local_blocks_address_array = ConstantArray::get(block_address_array_type, local_block_address_array_values_ref); + block_address_array->setInitializer(local_blocks_address_array); +} + +void ModuleGen::GenerateGetBlockAddressFunction() +{ + /* + entry_basic_block: + auto index = (pc - block_address_array_base) / 4; + if(index < block_address_array_size) + { + index_in_bounds_basic_block: + return block_address_array[index]; + } + else + { + index_out_of_bounds_basic_block: + return nullptr; + } + */ + auto pc = &*get_block_address_function->arg_begin(); + auto entry_basic_block = BasicBlock::Create(getGlobalContext(), "Entry", get_block_address_function); + auto index_in_bounds_basic_block = BasicBlock::Create(getGlobalContext(), "IndexInBounds", get_block_address_function); + auto index_out_of_bounds_basic_block = BasicBlock::Create(getGlobalContext(), "IndexOutOfBounds", get_block_address_function); + + ir_builder->SetInsertPoint(entry_basic_block); + auto index = ir_builder->CreateUDiv(pc, ir_builder->getInt32(4)); + index = ir_builder->CreateSub(index, ir_builder->getInt32(block_address_array_base)); + auto in_bounds_pred = ir_builder->CreateICmpULT(index, ir_builder->getInt32(block_address_array_size)); + ir_builder->CreateCondBr(in_bounds_pred, index_in_bounds_basic_block, index_out_of_bounds_basic_block); + + ir_builder->SetInsertPoint(index_in_bounds_basic_block); + Value *gep_values[] = { ir_builder->getInt32(0), index }; + auto block_address = ir_builder->CreateLoad(ir_builder->CreateInBoundsGEP(block_address_array, gep_values)); + ir_builder->CreateRet(block_address); + + ir_builder->SetInsertPoint(index_out_of_bounds_basic_block); + ir_builder->CreateRet(ConstantPointerNull::get(ir_builder->getInt8PtrTy())); } void ModuleGen::GenerateCanRunFunction() { - auto can_run_function_type = FunctionType::get(ir_builder->getInt1Ty(), false); - can_run_function = Function::Create(can_run_function_type, GlobalValue::ExternalLinkage, "CanRun", module); + // return GetBlockAddress(Read(PC)) != nullptr; auto basic_block = BasicBlock::Create(getGlobalContext(), "Entry", can_run_function); ir_builder->SetInsertPoint(basic_block); - ir_builder->CreateRet(ir_builder->getInt1(false)); + auto block_address = ir_builder->CreateCall(get_block_address_function, ir_builder->CreateAlignedLoad(GetRegisterPtr(Register::PC), 4)); + ir_builder->CreateRet(ir_builder->CreateICmpNE(block_address, ConstantPointerNull::get(ir_builder->getInt8PtrTy()))); } void ModuleGen::GenerateRunFunction() { - auto run_function_type = FunctionType::get(ir_builder->getVoidTy(), false); - run_function = Function::Create(run_function_type, GlobalValue::ExternalLinkage, "Run", module); - auto basic_block = BasicBlock::Create(getGlobalContext(), "Entry", run_function); + /* + run_function_entry: + run_function_re_entry: + auto block_address = GetBlockAddress(Read(PC)) + if(index != nullptr) + { + block_present_basic_block: + goto block_address; + return; + } + else + { + block_not_present_basic_block: + return; + } + */ + run_function_entry = BasicBlock::Create(getGlobalContext(), "Entry", run_function); + // run_function_re_entry is needed because it isn't possible to jump to the first block of a function + run_function_re_entry = BasicBlock::Create(getGlobalContext(), "ReEntry", run_function); + auto block_present_basic_block = BasicBlock::Create(getGlobalContext(), "BlockPresent", run_function); + auto block_not_present_basic_block = BasicBlock::Create(getGlobalContext(), "BlockNotPresent", run_function); - ir_builder->SetInsertPoint(basic_block); + ir_builder->SetInsertPoint(run_function_entry); + ir_builder->CreateBr(run_function_re_entry); + + ir_builder->SetInsertPoint(run_function_re_entry); + auto block_address = ir_builder->CreateCall(get_block_address_function, ir_builder->CreateAlignedLoad(GetRegisterPtr(Register::PC), 4)); + auto block_present_pred = ir_builder->CreateICmpNE(block_address, ConstantPointerNull::get(ir_builder->getInt8PtrTy())); + ir_builder->CreateCondBr(block_present_pred, block_present_basic_block, block_not_present_basic_block); + + ir_builder->SetInsertPoint(block_present_basic_block); + auto indirect_br = ir_builder->CreateIndirectBr(block_address, instruction_blocks.size()); + for (auto &block : instruction_blocks) + { + indirect_br->addDestination(block->GetEntryBasicBlock()); + } + + ir_builder->SetInsertPoint(block_not_present_basic_block); ir_builder->CreateRetVoid(); } -void ModuleGen::GenerateBlocks() +void ModuleGen::DecodeInstructions() { for (auto i = Loader::ROMCodeStart; i <= Loader::ROMCodeStart + Loader::ROMCodeSize - 4; i += 4) { auto instruction = Disassembler::Disassemble(Memory::Read32(i), i); - if (instruction != nullptr) + if (instruction == nullptr) continue; + auto instruction_block = std::make_unique(this, instruction.release()); + instruction_blocks_by_pc[i] = instruction_block.get(); + instruction_blocks.push_back(std::move(instruction_block)); + } +} + +void ModuleGen::GenerateInstructionsEntry() +{ + for (auto &instruction : instruction_blocks) + { + instruction->GenerateEntryBlock(); + } +} + +void ModuleGen::GenerateInstructionsCode() +{ + for (auto &instruction : instruction_blocks) + { + instruction->GenerateCode(); + } +} + +void ModuleGen::GenerateInstructionsTermination() +{ + // Return to the switch + for (auto &block : instruction_blocks) + { + ir_builder->SetInsertPoint(block->GetExitBasicBlock()); + ir_builder->CreateAlignedStore(ir_builder->getInt32(block->Address() + 4), GetRegisterPtr(Register::PC), 4); + ir_builder->CreateBr(run_function_re_entry); + } +} + +void ModuleGen::AddInstructionsToRunFunction() +{ + std::stack basic_blocks_stack; + + for (auto &block : instruction_blocks) + { + basic_blocks_stack.push(block->GetEntryBasicBlock()); + + while (basic_blocks_stack.size()) { - LOG_DEBUG(BinaryTranslator, "Instruction at %08x", i); + auto basic_block = basic_blocks_stack.top(); + basic_blocks_stack.pop(); + basic_block->insertInto(run_function); + auto terminator = basic_block->getTerminator(); + for (auto i = 0; i < terminator->getNumSuccessors(); ++i) + { + auto new_basic_block = terminator->getSuccessor(i); + if (new_basic_block->getParent()) continue; // Already added to run + basic_blocks_stack.push(new_basic_block); + } } } } \ No newline at end of file diff --git a/src/binary_translation/ModuleGen.h b/src/binary_translation/ModuleGen.h index a50721770..fde2fffd3 100644 --- a/src/binary_translation/ModuleGen.h +++ b/src/binary_translation/ModuleGen.h @@ -1,4 +1,10 @@ #include +#include +#include + +enum class Register; + +class InstructionBlock; namespace llvm { @@ -12,11 +18,30 @@ public: ~ModuleGen(); void Run(); + + // Returns the address of a register or a flag + llvm::Value *GetRegisterPtr(Register reg); + + llvm::IRBuilder<> *IrBuilder() { return ir_builder.get(); } + llvm::Module *Module() { return module; } +private: + // Generates the declarations of all the globals of the module void GenerateGlobals(); + void GenerateBlockAddressArray(); + void GenerateGetBlockAddressFunction(); void GenerateCanRunFunction(); void GenerateRunFunction(); - void GenerateBlocks(); -private: + // Creates InstructionBlock for each instruction + void DecodeInstructions(); + // Generates the entry basic blocks for each instruction + void GenerateInstructionsEntry(); + // Generates the code of each instruction + void GenerateInstructionsCode(); + // Terminates each block + void GenerateInstructionsTermination(); + // Adds all the basic blocks of an instruction to the run function + void AddInstructionsToRunFunction(); + std::unique_ptr> ir_builder; llvm::Module *module; @@ -31,6 +56,21 @@ private: * Orderered N, Z, C, V */ llvm::GlobalVariable *flags_global; + + size_t block_address_array_base; + size_t block_address_array_size; + /* + * i8 **BlockAddressArray; + * The array at [i/4 - block_address_array_base] contains the block address for the instruction at i + * or nullptr if it is not decoded + */ + llvm::ArrayType *block_address_array_type; + llvm::GlobalVariable *block_address_array; + /* + * i8 *GetBlockAddress(u32 pc) + * Returns the address of the block for the instruction at pc + */ + llvm::Function *get_block_address_function; /* * bool CanRun() * Returns whether there is a binary translation available for a PC @@ -41,4 +81,12 @@ private: * Runs binary translated opcodes */ llvm::Function *run_function; + llvm::BasicBlock *run_function_entry; + llvm::BasicBlock *run_function_re_entry; + + /* + * All the instruction blocks + */ + std::vector> instruction_blocks; + std::unordered_map instruction_blocks_by_pc; }; \ No newline at end of file