From d4fc4e209ae64148e89bd8af36b837fe42b979a1 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 2 Apr 2016 12:55:41 +0100 Subject: [PATCH] JitX64: Implement load and store multiple --- src/core/arm/decoder/arm.cpp | 22 +- src/core/arm/decoder/decoder.h | 8 +- src/core/arm/decoder/thumb.cpp | 11 +- src/core/arm/jit_x64/common.h | 1 + .../arm/jit_x64/instructions/load_store.cpp | 216 +++++++++++++++++- src/core/arm/jit_x64/jit_x64.h | 7 +- .../core/arm/jit_x64/fuzz_arm_common.cpp | 2 +- .../core/arm/jit_x64/fuzz_arm_load_store.cpp | 36 +++ 8 files changed, 276 insertions(+), 27 deletions(-) diff --git a/src/core/arm/decoder/arm.cpp b/src/core/arm/decoder/arm.cpp index 454f90de9..99dd76329 100644 --- a/src/core/arm/decoder/arm.cpp +++ b/src/core/arm/decoder/arm.cpp @@ -114,7 +114,7 @@ static std::unique_ptr MakeMatcher(const char format[32], Function fn) return std::unique_ptr(std::move(ret)); } -static const std::array arm_instruction_table = {{ +static const std::array arm_instruction_table = {{ // Branch instructions { "BLX (immediate)", MakeMatcher<2>("1111101hvvvvvvvvvvvvvvvvvvvvvvvv", &Visitor::BLX_imm) }, // ARMv5 { "BLX (register)", MakeMatcher<2>("cccc000100101111111111110011mmmm", &Visitor::BLX_reg) }, // ARMv5 @@ -266,21 +266,11 @@ static const std::array arm_instruction_table = {{ { "STRT (A2)", MakeMatcher<0>("----0110-010---------------0----", &Visitor::STRT) }, // Load/Store Multiple instructions - { "LDMIA/LDMFD", MakeMatcher<0>("----100010-1--------------------", &Visitor::LDM) }, // all - { "LDMDA/LDMFA", MakeMatcher<0>("----100000-1--------------------", &Visitor::LDM) }, // all - { "LDMDB/LDMEA", MakeMatcher<0>("----100100-1--------------------", &Visitor::LDM) }, // all - { "LDMIB/LDMED", MakeMatcher<0>("----100110-1--------------------", &Visitor::LDM) }, // all - { "LDM (exc ret)", MakeMatcher<0>("----100--1-1----1---------------", &Visitor::LDM) }, // all - { "LDM (usr reg)", MakeMatcher<0>("----100--1-1----0---------------", &Visitor::LDM) }, // all - { "POP", MakeMatcher<0>("----100010111101----------------", &Visitor::LDM) }, // all - { "POP", MakeMatcher<0>("----010010011101----000000000100", &Visitor::LDM) }, // all - { "PUSH", MakeMatcher<0>("----100100101101----------------", &Visitor::STM) }, // all - { "PUSH", MakeMatcher<0>("----010100101101----000000000100", &Visitor::STM) }, // all - { "STMIA/STMEA", MakeMatcher<0>("----100010-0--------------------", &Visitor::STM) }, // all - { "STMDA/STMED", MakeMatcher<0>("----100000-0--------------------", &Visitor::STM) }, // all - { "STMDB/STMFD", MakeMatcher<0>("----100100-0--------------------", &Visitor::STM) }, // all - { "STMIB/STMFA", MakeMatcher<0>("----100110-0--------------------", &Visitor::STM) }, // all - { "STMIB (usr reg)", MakeMatcher<0>("----100--100--------------------", &Visitor::STM) }, // all + { "LDM", MakeMatcher<6>("cccc100pu0w1nnnnxxxxxxxxxxxxxxxx", &Visitor::LDM) }, // all + { "LDM (usr reg)", MakeMatcher<0>("----100--101--------------------", &Visitor::LDM_usr) }, // all + { "LDM (exce ret)", MakeMatcher<0>("----100--1-1----1---------------", &Visitor::LDM_eret) }, // all + { "STM", MakeMatcher<6>("cccc100pu0w0nnnnxxxxxxxxxxxxxxxx", &Visitor::STM) }, // all + { "STM (usr reg)", MakeMatcher<0>("----100--100--------------------", &Visitor::STM_usr) }, // all // Miscellaneous instructions { "CLZ", MakeMatcher<0>("----000101101111----11110001----", &Visitor::CLZ) }, // ARMv5 diff --git a/src/core/arm/decoder/decoder.h b/src/core/arm/decoder/decoder.h index 4dd36fc03..51b1218bf 100644 --- a/src/core/arm/decoder/decoder.h +++ b/src/core/arm/decoder/decoder.h @@ -58,6 +58,7 @@ using Imm11 = u32; using Imm12 = u32; using Imm24 = u32; using Register = int; +using RegisterList = u16; using ShiftType = int; class Visitor { @@ -188,8 +189,11 @@ public: virtual void STRT() = 0; // Load/Store multiple instructions - virtual void LDM() = 0; - virtual void STM() = 0; + virtual void LDM(Cond cond, bool P, bool U, bool W, Register Rn, RegisterList list) = 0; + virtual void LDM_usr() = 0; + virtual void LDM_eret() = 0; + virtual void STM(Cond cond, bool P, bool U, bool W, Register Rn, RegisterList list) = 0; + virtual void STM_usr() = 0; // Miscellaneous instructions virtual void CLZ() = 0; diff --git a/src/core/arm/decoder/thumb.cpp b/src/core/arm/decoder/thumb.cpp index 7377b8c5d..de761186c 100644 --- a/src/core/arm/decoder/thumb.cpp +++ b/src/core/arm/decoder/thumb.cpp @@ -352,10 +352,12 @@ static const std::array thumb_instruction_table = { { u32 reglist = bits<0, 7>(instruction); if (!L) { // PUSH {reglist, =LR} reglist |= R << 14; - v->STM(); + // Equivalent to STMDB SP!, {reglist} + v->STM(0xE, /*P=*/1, /*U=*/0, /*W=*/1, 13, reglist); } else { // POP {reglist, =PC} reglist |= R << 15; - v->LDM(); + // Equivalent to LDMIA SP!, {reglist} + v->LDM(0xE, /*P=*/0, /*U=*/1, /*W=*/1, 13, reglist); } })}, { "SETEND", MakeMatcher("101101100101x000", [](Visitor* v, u32 instruction) { @@ -400,9 +402,10 @@ static const std::array thumb_instruction_table = { { Register Rn = bits<8, 10>(instruction); u32 reglist = bits<0, 7>(instruction); if (!L) { // STMIA Rn!, { reglist } - v->STM(); + v->STM(0xE, /*P=*/0, /*U=*/1, /*W=*/1, Rn, reglist); } else { // LDMIA Rn!, { reglist } - v->LDM(); + bool w = reglist & (1 << Rn); + v->LDM(0xE, /*P=*/0, /*U=*/1, /*W=*/w, Rn, reglist); } })}, { "B", MakeMatcher("1101xxxxxxxxxxxx", [](Visitor* v, u32 instruction) { diff --git a/src/core/arm/jit_x64/common.h b/src/core/arm/jit_x64/common.h index 4cdbf3079..ac7ed48bb 100644 --- a/src/core/arm/jit_x64/common.h +++ b/src/core/arm/jit_x64/common.h @@ -12,6 +12,7 @@ namespace JitX64 { using ArmReg = ArmDecoder::Register; +using ArmRegList = ArmDecoder::RegisterList; using ArmImm4 = ArmDecoder::Imm4; using ArmImm5 = ArmDecoder::Imm5; using ArmImm8 = ArmDecoder::Imm8; diff --git a/src/core/arm/jit_x64/instructions/load_store.cpp b/src/core/arm/jit_x64/instructions/load_store.cpp index 24eab98f0..8744c4ab0 100644 --- a/src/core/arm/jit_x64/instructions/load_store.cpp +++ b/src/core/arm/jit_x64/instructions/load_store.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/bit_set.h" #include "common/swap.h" #include "common/x64/abi.h" @@ -800,7 +801,218 @@ void JitX64::STRHT() { CompileInterpretInstruction(); } void JitX64::STRT() { CompileInterpretInstruction(); } // Load/Store multiple instructions -void JitX64::LDM() { CompileInterpretInstruction(); } -void JitX64::STM() { CompileInterpretInstruction(); } + +static void LoadAndStoreMultiple_IncrementAfter(XEmitter* code, RegAlloc& reg_alloc, bool W, ArmReg Rn_index, ArmRegList list, std::function call) { + if (W) { + X64Reg Rn = reg_alloc.BindArmForReadWrite(Rn_index); + code->MOV(32, R(ABI_PARAM1), R(Rn)); + reg_alloc.UnlockArm(Rn_index); + + call(); + + Rn = reg_alloc.BindArmForReadWrite(Rn_index); + code->ADD(32, R(Rn), Imm8(4 * Common::CountSetBits(list))); + reg_alloc.UnlockArm(Rn_index); + } else { + OpArg Rn = reg_alloc.LockArmForReadWrite(Rn_index); + code->MOV(32, R(ABI_PARAM1), Rn); + reg_alloc.UnlockArm(Rn_index); + call(); + } +} + +static void LoadAndStoreMultiple_IncrementBefore(XEmitter* code, RegAlloc& reg_alloc, bool W, ArmReg Rn_index, ArmRegList list, std::function call) { + if (W) { + X64Reg Rn = reg_alloc.BindArmForReadWrite(Rn_index); + code->MOV(32, R(ABI_PARAM1), R(Rn)); + code->ADD(32, R(ABI_PARAM1), Imm8(4)); + reg_alloc.UnlockArm(Rn_index); + + call(); + + Rn = reg_alloc.BindArmForReadWrite(Rn_index); + code->ADD(32, R(Rn), Imm8(4 * Common::CountSetBits(list))); + reg_alloc.UnlockArm(Rn_index); + } else { + OpArg Rn = reg_alloc.LockArmForReadWrite(Rn_index); + code->MOV(32, R(ABI_PARAM1), Rn); + code->ADD(32, R(ABI_PARAM1), Imm8(4)); + reg_alloc.UnlockArm(Rn_index); + call(); + } +} + +static void LoadAndStoreMultiple_DecrementAfter(XEmitter* code, RegAlloc& reg_alloc, bool W, ArmReg Rn_index, ArmRegList list, std::function call) { + if (W) { + X64Reg Rn = reg_alloc.BindArmForReadWrite(Rn_index); + code->MOV(32, R(ABI_PARAM1), R(Rn)); + code->SUB(32, R(ABI_PARAM1), Imm32(4 * Common::CountSetBits(list) - 4)); + reg_alloc.UnlockArm(Rn_index); + + call(); + + Rn = reg_alloc.BindArmForReadWrite(Rn_index); + code->SUB(32, R(Rn), Imm32(4 * Common::CountSetBits(list))); + reg_alloc.UnlockArm(Rn_index); + } else { + OpArg Rn = reg_alloc.LockArmForReadWrite(Rn_index); + code->MOV(32, R(ABI_PARAM1), Rn); + code->SUB(32, R(ABI_PARAM1), Imm32(4 * Common::CountSetBits(list) - 4)); + reg_alloc.UnlockArm(Rn_index); + call(); + } +} + +static void LoadAndStoreMultiple_DecrementBefore(XEmitter* code, RegAlloc& reg_alloc, bool W, ArmReg Rn_index, ArmRegList list, std::function call) { + if (W && (list & (1 << Rn_index))) { + X64Reg Rn = reg_alloc.BindArmForReadWrite(Rn_index); + code->SUB(32, R(Rn), Imm32(4 * Common::CountSetBits(list))); + code->MOV(32, R(ABI_PARAM1), R(Rn)); + reg_alloc.UnlockArm(Rn_index); + call(); + } else if (W && (list & (1 << Rn_index))) { + X64Reg Rn = reg_alloc.BindArmForReadWrite(Rn_index); + code->MOV(32, R(ABI_PARAM1), R(Rn)); + code->SUB(32, R(ABI_PARAM1), Imm32(4 * Common::CountSetBits(list))); + reg_alloc.UnlockArm(Rn_index); + + call(); + + Rn = reg_alloc.BindArmForReadWrite(Rn_index); + code->SUB(32, R(Rn), Imm32(4 * Common::CountSetBits(list))); + reg_alloc.UnlockArm(Rn_index); + } else { + OpArg Rn = reg_alloc.LockArmForReadWrite(Rn_index); + code->MOV(32, R(ABI_PARAM1), Rn); + code->SUB(32, R(ABI_PARAM1), Imm32(4 * Common::CountSetBits(list))); + reg_alloc.UnlockArm(Rn_index); + call(); + } +} + +static void LoadAndStoreMultiple_Helper(XEmitter* code, RegAlloc& reg_alloc, bool P, bool U, bool W, ArmReg Rn_index, ArmRegList list, std::function call) { + reg_alloc.FlushX64(ABI_PARAM1); + reg_alloc.LockX64(ABI_PARAM1); + reg_alloc.FlushX64(ABI_PARAM2); + reg_alloc.LockX64(ABI_PARAM2); + reg_alloc.FlushX64(ABI_PARAM3); + reg_alloc.LockX64(ABI_PARAM3); + + code->MOV(32, R(ABI_PARAM2), Imm32(list)); + code->MOV(64, R(ABI_PARAM3), R(reg_alloc.JitStateReg())); + + if (!P && !U) { + LoadAndStoreMultiple_DecrementAfter(code, reg_alloc, W, Rn_index, list, call); + } else if (!P && U) { + LoadAndStoreMultiple_IncrementAfter(code, reg_alloc, W, Rn_index, list, call); + } else if (P && !U) { + LoadAndStoreMultiple_DecrementBefore(code, reg_alloc, W, Rn_index, list, call); + } else if (P && U) { + LoadAndStoreMultiple_IncrementBefore(code, reg_alloc, W, Rn_index, list, call); + } else { + UNREACHABLE(); + } + + reg_alloc.UnlockX64(ABI_PARAM1); + reg_alloc.UnlockX64(ABI_PARAM2); + reg_alloc.UnlockX64(ABI_PARAM3); +} + +static void ExecuteLDMLE(u32 start_address, u16 reg_list, JitState* jit_state) { + for (int i = 0; i < 16; i++) { + const u16 bit = 1 << i; + if (reg_list & bit) { + jit_state->cpu_state.Reg[i] = Memory::Read32(start_address); + start_address += 4; + } + } +} + +static void ExecuteLDMBE(u32 start_address, u16 reg_list, JitState* jit_state) { + for (int i = 0; i < 16; i++) { + const u16 bit = 1 << i; + if (reg_list & bit) { + jit_state->cpu_state.Reg[i] = Common::swap32(Memory::Read32(start_address)); + start_address += 4; + } + } +} + +void JitX64::LDM(Cond cond, bool P, bool U, bool W, ArmReg Rn_index, ArmRegList list) { + cond_manager.CompileCond((ConditionCode)cond); + + ASSERT_MSG(Rn_index != 15, "UNPREDICTABLE"); + ASSERT_MSG(list != 0, "UNPREDICTABLE"); + if (W && (list & (1 << Rn_index))) + ASSERT_MSG(false, "UNPREDICTABLE"); + + // TODO: Optimize + + LoadAndStoreMultiple_Helper(code, reg_alloc, P, U, W, Rn_index, list, + [this](){ CompileCallHost(!current.EFlag ? &ExecuteLDMLE : &ExecuteLDMBE); }); + + current.arm_pc += GetInstSize(); + if (list & (1 << 15)) { + code->BT(32, MJitStateArmPC(), Imm8(0)); + code->SETcc(CC_C, MJitStateTFlag()); + code->AND(32, MJitStateArmPC(), Imm32(0xFFFFFFFE)); + CompileReturnToDispatch(); + } +} + +static void ExecuteSTMLE(u32 start_address, u16 reg_list, JitState* jit_state) { + for (int i = 0; i < 15; i++) { + const u16 bit = 1 << i; + if (reg_list & bit) { + Memory::Write32(start_address, jit_state->cpu_state.Reg[i]); + start_address += 4; + } + } + // Reading R15 here is IMPLEMENTATION DEFINED + if (reg_list & (1 << 15)) { + if (!jit_state->cpu_state.TFlag) { + Memory::Write32(start_address, jit_state->cpu_state.Reg[15] + 8); + } else { + Memory::Write32(start_address, jit_state->cpu_state.Reg[15] + 4); + } + } +} + +static void ExecuteSTMBE(u32 start_address, u16 reg_list, JitState* jit_state) { + for (int i = 0; i < 16; i++) { + const u16 bit = 1 << i; + if (reg_list & bit) { + Memory::Write32(start_address, Common::swap32(jit_state->cpu_state.Reg[i])); + start_address += 4; + } + } + // Reading R15 here is IMPLEMENTATION DEFINED + if (reg_list & (1 << 15)) { + if (!jit_state->cpu_state.TFlag) { + Memory::Write32(start_address, Common::swap32(jit_state->cpu_state.Reg[15] + 8)); + } else { + Memory::Write32(start_address, Common::swap32(jit_state->cpu_state.Reg[15] + 4)); + } + } +} + +void JitX64::STM(Cond cond, bool P, bool U, bool W, ArmReg Rn_index, ArmRegList list) { + cond_manager.CompileCond((ConditionCode)cond); + + ASSERT(list != 0, "UNPREDICTABLE"); + if (W && (list & (1 << Rn_index))) + ASSERT_MSG((list & ((1 << Rn_index) - 1)) == 0, "UNPREDICTABLE"); + + // TODO: Optimize + + LoadAndStoreMultiple_Helper(code, reg_alloc, P, U, W, Rn_index, list, + [this](){ CompileCallHost(!current.EFlag ? &ExecuteSTMLE : &ExecuteSTMBE); }); + + current.arm_pc += GetInstSize(); +} + +void JitX64::LDM_usr() { CompileInterpretInstruction(); } +void JitX64::LDM_eret() { CompileInterpretInstruction(); } +void JitX64::STM_usr() { CompileInterpretInstruction(); } } diff --git a/src/core/arm/jit_x64/jit_x64.h b/src/core/arm/jit_x64/jit_x64.h index 66ed3aa26..7d76d2279 100644 --- a/src/core/arm/jit_x64/jit_x64.h +++ b/src/core/arm/jit_x64/jit_x64.h @@ -285,8 +285,11 @@ private: void STRT() override; // Load/Store multiple instructions - void LDM() override; - void STM() override; + void LDM(Cond cond, bool P, bool U, bool W, ArmReg Rn, ArmRegList list) override; + void LDM_usr() override; + void LDM_eret() override; + void STM(Cond cond, bool P, bool U, bool W, ArmReg Rn, ArmRegList list) override; + void STM_usr() override; // Miscellaneous instructions void CLZ() override; diff --git a/src/tests/core/arm/jit_x64/fuzz_arm_common.cpp b/src/tests/core/arm/jit_x64/fuzz_arm_common.cpp index 92df73c62..277b8d68e 100644 --- a/src/tests/core/arm/jit_x64/fuzz_arm_common.cpp +++ b/src/tests/core/arm/jit_x64/fuzz_arm_common.cpp @@ -160,7 +160,7 @@ void FuzzJit(const int instruction_count, const int instructions_to_execute_coun if (interp_mem_recording != jit_mem_recording) { printf("memory write recording mismatch *\n"); size_t i = 0; - while (i < interp_mem_recording.size() && i < jit_mem_recording.size()) { + while (i < interp_mem_recording.size() || i < jit_mem_recording.size()) { if (i < interp_mem_recording.size()) printf("interp: %i %08x %08x\n", interp_mem_recording[i].size, interp_mem_recording[i].addr, interp_mem_recording[i].data); if (i < jit_mem_recording.size()) diff --git a/src/tests/core/arm/jit_x64/fuzz_arm_load_store.cpp b/src/tests/core/arm/jit_x64/fuzz_arm_load_store.cpp index e4a24505b..e21938171 100644 --- a/src/tests/core/arm/jit_x64/fuzz_arm_load_store.cpp +++ b/src/tests/core/arm/jit_x64/fuzz_arm_load_store.cpp @@ -93,4 +93,40 @@ TEST_CASE("Fuzz ARM load/store instructions (double-word)", "[JitX64]") { SECTION("short blocks") { FuzzJit(1, 2, 5000, instruction_select); } +} + +TEST_CASE("Fuzz ARM load/store multiple instructions", "[JitX64]") { + const std::array, 2> instructions = {{ + FromBitString32("cccc100pu0w1nnnnxxxxxxxxxxxxxxxx"), // LDM + FromBitString32("cccc100pu0w0nnnnxxxxxxxxxxxxxxxx"), // STM + }}; + + auto instruction_select = [&]() -> u32 { + size_t inst_index = RandInt(0, instructions.size() - 1); + + u32 cond = 0xE; + // Have a one-in-twenty-five chance of actually having a cond. + if (RandInt(1, 25) == 1) { + cond = RandInt(0x0, 0xD); + } + + u32 reg_list = RandInt(1, 0xFFFF); + u32 Rn = RandInt(0, 14); + u32 flags = RandInt(0, 0xF); + + if (inst_index == 1 && (flags & 2)) { + if (reg_list & (1 << Rn)) + reg_list &= ~((1 << Rn) - 1); + } else if (inst_index == 1 && (flags & 2)) { + reg_list &= ~(1 << Rn); + } + + u32 assemble_randoms = (reg_list << 0) | (Rn << 16) | (flags << 24) | (cond << 28); + + return instructions[inst_index].first | (assemble_randoms & (~instructions[inst_index].second)); + }; + + SECTION("short blocks") { + FuzzJit(1, 1, 5000, instruction_select); + } } \ No newline at end of file