diff --git a/src/core/arm/jit_x64/cond.cpp b/src/core/arm/jit_x64/cond.cpp index 873b139a1..24a2f23e3 100644 --- a/src/core/arm/jit_x64/cond.cpp +++ b/src/core/arm/jit_x64/cond.cpp @@ -67,7 +67,7 @@ void JitX64::CondManager::CompileCond(const ConditionCode new_cond) { cc = CC_NE; break; case ConditionCode::HI: { //c & !z - const X64Reg tmp = jit->reg_alloc.AllocAndLockTemp(); + const X64Reg tmp = jit->reg_alloc.AllocTemp(); jit->code->MOVZX(64, 8, tmp, jit->MJitStateZFlag()); jit->code->CMP(8, jit->MJitStateCFlag(), R(tmp)); cc = CC_BE; @@ -75,7 +75,7 @@ void JitX64::CondManager::CompileCond(const ConditionCode new_cond) { break; } case ConditionCode::LS: { //!c | z - const X64Reg tmp = jit->reg_alloc.AllocAndLockTemp(); + const X64Reg tmp = jit->reg_alloc.AllocTemp(); jit->code->MOVZX(64, 8, tmp, jit->MJitStateZFlag()); jit->code->CMP(8, jit->MJitStateCFlag(), R(tmp)); cc = CC_A; @@ -83,7 +83,7 @@ void JitX64::CondManager::CompileCond(const ConditionCode new_cond) { break; } case ConditionCode::GE: { // n == v - const X64Reg tmp = jit->reg_alloc.AllocAndLockTemp(); + const X64Reg tmp = jit->reg_alloc.AllocTemp(); jit->code->MOVZX(64, 8, tmp, jit->MJitStateVFlag()); jit->code->CMP(8, jit->MJitStateNFlag(), R(tmp)); cc = CC_NE; @@ -91,7 +91,7 @@ void JitX64::CondManager::CompileCond(const ConditionCode new_cond) { break; } case ConditionCode::LT: { // n != v - const X64Reg tmp = jit->reg_alloc.AllocAndLockTemp(); + const X64Reg tmp = jit->reg_alloc.AllocTemp(); jit->code->MOVZX(64, 8, tmp, jit->MJitStateVFlag()); jit->code->CMP(8, jit->MJitStateNFlag(), R(tmp)); cc = CC_E; @@ -99,7 +99,7 @@ void JitX64::CondManager::CompileCond(const ConditionCode new_cond) { break; } case ConditionCode::GT: { // !z & (n == v) - const X64Reg tmp = jit->reg_alloc.AllocAndLockTemp(); + const X64Reg tmp = jit->reg_alloc.AllocTemp(); jit->code->MOVZX(64, 8, tmp, jit->MJitStateNFlag()); jit->code->XOR(8, R(tmp), jit->MJitStateVFlag()); jit->code->OR(8, R(tmp), jit->MJitStateZFlag()); @@ -109,7 +109,7 @@ void JitX64::CondManager::CompileCond(const ConditionCode new_cond) { break; } case ConditionCode::LE: { // z | (n != v) - X64Reg tmp = jit->reg_alloc.AllocAndLockTemp(); + X64Reg tmp = jit->reg_alloc.AllocTemp(); jit->code->MOVZX(64, 8, tmp, jit->MJitStateNFlag()); jit->code->XOR(8, R(tmp), jit->MJitStateVFlag()); jit->code->OR(8, R(tmp), jit->MJitStateZFlag()); diff --git a/src/core/arm/jit_x64/instructions/data_processing.cpp b/src/core/arm/jit_x64/instructions/data_processing.cpp index ff910fd82..f7223f26e 100644 --- a/src/core/arm/jit_x64/instructions/data_processing.cpp +++ b/src/core/arm/jit_x64/instructions/data_processing.cpp @@ -11,91 +11,50 @@ namespace JitX64 { using namespace Gen; void JitX64::CompileDataProcessingHelper(ArmReg Rn_index, ArmReg Rd_index, std::function body) { - // The major consideration is if Rn and/or Rd == R15. - - if (Rd_index == 15) { - X64Reg Rd = reg_alloc.AllocAndLockTemp(); - reg_alloc.LockArm(Rn_index); - - if (Rn_index == 15) { - // TODO: In this case we can actually calculat the final result. - // We can also use CompileJumpToBB instead of having to use CompileReturnToDispatch. - code->MOV(32, R(Rd), Imm32(GetReg15Value())); - } else { - code->MOV(32, R(Rd), reg_alloc.ArmR(Rn_index)); - } - - body(Rd); - - reg_alloc.UnlockArm(Rn_index); - - code->MOV(32, MJitStateArmPC(), R(Rd)); - reg_alloc.UnlockTemp(Rd); - } else if (Rn_index == 15) { - X64Reg Rd = reg_alloc.BindNoLoadAndLockArm(Rd_index); - reg_alloc.MarkDirty(Rd_index); - + if (Rn_index == 15) { + X64Reg Rd = reg_alloc.WriteOnlyLockArm(Rd_index); code->MOV(32, R(Rd), Imm32(GetReg15Value())); body(Rd); reg_alloc.UnlockArm(Rd_index); - } else if (Rn_index == Rd_index) { - X64Reg Rd = reg_alloc.BindAndLockArm(Rd_index); + } else if (Rn_index == Rd_index) { // Note: Rd_index cannot possibly be 15 in this case. + X64Reg Rd = reg_alloc.LoadAndLockArm(Rd_index); reg_alloc.MarkDirty(Rd_index); body(Rd); reg_alloc.UnlockArm(Rd_index); } else { - X64Reg Rd = reg_alloc.BindNoLoadAndLockArm(Rd_index); - reg_alloc.MarkDirty(Rd_index); + X64Reg Rd = reg_alloc.WriteOnlyLockArm(Rd_index); reg_alloc.LockArm(Rn_index); - code->MOV(32, R(Rd), reg_alloc.ArmR(Rn_index)); body(Rd); - reg_alloc.UnlockArm(Rd_index); reg_alloc.UnlockArm(Rn_index); + reg_alloc.UnlockArm(Rd_index); } } void JitX64::CompileDataProcessingHelper_Reverse(ArmReg Rn_index, ArmReg Rd_index, std::function body) { - // The major consideration is if Rn and/or Rd == R15. - if (Rd_index != Rn_index) { - - X64Reg Rd = INVALID_REG; - - if (Rd_index == 15) { - Rd = reg_alloc.AllocAndLockTemp(); - } else { - Rd = reg_alloc.BindNoLoadAndLockArm(Rd_index); - reg_alloc.MarkDirty(Rd_index); - } + X64Reg Rd = reg_alloc.WriteOnlyLockArm(Rd_index); body(Rd); - if (Rd_index == 15) { - code->MOV(32, MJitStateArmPC(), R(Rd)); - reg_alloc.UnlockTemp(Rd); - } else { - reg_alloc.UnlockArm(Rd_index); - } - + reg_alloc.UnlockArm(Rd_index); } else { - - X64Reg tmp = reg_alloc.AllocAndLockTemp(); + X64Reg tmp = reg_alloc.AllocTemp(); body(tmp); // TODO: Efficiency: Could implement this as a register rebind instead of needing to MOV. - reg_alloc.LockArm(Rd_index); + reg_alloc.LockAndDirtyArm(Rd_index); code->MOV(32, reg_alloc.ArmR(Rd_index), R(tmp)); reg_alloc.UnlockArm(Rd_index); - reg_alloc.UnlockTemp(tmp); + reg_alloc.UnlockTemp(tmp); } } @@ -228,7 +187,7 @@ void JitX64::MOV_imm(Cond cond, bool S, ArmReg Rd_index, int rotate, ArmImm8 imm u32 immediate = rotr(imm8, rotate * 2); if (Rd_index != 15) { - reg_alloc.LockArm(Rd_index); + reg_alloc.LockAndDirtyArm(Rd_index); code->MOV(32, reg_alloc.ArmR(Rd_index), Imm32(immediate)); reg_alloc.UnlockArm(Rd_index); } else { @@ -260,7 +219,7 @@ void JitX64::MVN_imm(Cond cond, bool S, ArmReg Rd_index, int rotate, ArmImm8 imm u32 immediate = rotr(imm8, rotate * 2); if (Rd_index != 15) { - reg_alloc.LockArm(Rd_index); + reg_alloc.LockAndDirtyArm(Rd_index); code->MOV(32, reg_alloc.ArmR(Rd_index), Imm32(~immediate)); reg_alloc.UnlockArm(Rd_index); } else { @@ -429,7 +388,7 @@ void JitX64::TEQ_imm(Cond cond, ArmReg Rn_index, int rotate, ArmImm8 imm8) { u32 immediate = rotr(imm8, rotate * 2); - X64Reg Rn = reg_alloc.AllocAndLockTemp(); + X64Reg Rn = reg_alloc.AllocTemp(); if (Rn_index == 15) { code->MOV(32, R(Rn), Imm32(GetReg15Value())); @@ -462,10 +421,10 @@ void JitX64::TST_imm(Cond cond, ArmReg Rn_index, int rotate, ArmImm8 imm8) { X64Reg Rn; if (Rn_index == 15) { - Rn = reg_alloc.AllocAndLockTemp(); + Rn = reg_alloc.AllocTemp(); code->MOV(32, R(Rn), Imm32(GetReg15Value())); } else { - Rn = reg_alloc.BindAndLockArm(Rn_index); + Rn = reg_alloc.LoadAndLockArm(Rn_index); } code->TEST(32, R(Rn), Imm32(immediate)); diff --git a/src/core/arm/jit_x64/interface.cpp b/src/core/arm/jit_x64/interface.cpp index 2c7be6440..81b7d7cea 100644 --- a/src/core/arm/jit_x64/interface.cpp +++ b/src/core/arm/jit_x64/interface.cpp @@ -3,6 +3,8 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include + #include "common/assert.h" #include "common/make_unique.h" #include "common/x64/abi.h" diff --git a/src/core/arm/jit_x64/reg_alloc.cpp b/src/core/arm/jit_x64/reg_alloc.cpp index e43fbe5cb..3bc7c779b 100644 --- a/src/core/arm/jit_x64/reg_alloc.cpp +++ b/src/core/arm/jit_x64/reg_alloc.cpp @@ -40,7 +40,7 @@ static Gen::OpArg MJitStateCpuReg(ArmReg arm_reg) { static_assert(std::is_same::value, "JitState::cpu_state must be ARMul_State"); static_assert(std::is_same>::value, "ARMul_State::Reg must be std::array"); - ASSERT(arm_reg >= 0 && arm_reg <= 14); + ASSERT(arm_reg >= 0 && arm_reg <= 15); return Gen::MDisp(jit_state_reg, offsetof(JitState, cpu_state) + offsetof(ARMul_State, Reg) + (arm_reg) * sizeof(u32)); } @@ -112,6 +112,8 @@ void RegAlloc::UnlockX64(Gen::X64Reg x64_reg) { } void RegAlloc::FlushArm(ArmReg arm_reg) { + ASSERT(arm_reg >= 0 && arm_reg <= 15); + ArmState& arm_state = arm_gpr[arm_reg]; Gen::X64Reg x64_reg = GetX64For(arm_reg); X64State& x64_state = x64_gpr[x64_reg_to_index.at(x64_reg)]; @@ -129,6 +131,8 @@ void RegAlloc::FlushArm(ArmReg arm_reg) { } void RegAlloc::LockArm(ArmReg arm_reg) { + ASSERT(arm_reg >= 0 && arm_reg <= 14); // Not valid for R15 (cannot read from it) + ArmState& arm_state = arm_gpr[arm_reg]; ASSERT(!arm_state.locked); @@ -145,7 +149,18 @@ void RegAlloc::LockArm(ArmReg arm_reg) { } } -Gen::X64Reg RegAlloc::BindMaybeLoadAndLockArm(ArmReg arm_reg, bool load) { +void RegAlloc::LockAndDirtyArm(ArmReg arm_reg) { + ASSERT(arm_reg >= 0 && arm_reg <= 14); // Not valid for R15 (cannot read from it) + + ArmState& arm_state = arm_gpr[arm_reg]; + + LockArm(arm_reg); + if (arm_state.location.IsSimpleReg()) { + MarkDirty(arm_reg); + } +} + +Gen::X64Reg RegAlloc::BindArmToX64(ArmReg arm_reg, bool load) { ArmState& arm_state = arm_gpr[arm_reg]; ASSERT(!arm_state.locked); @@ -176,15 +191,27 @@ Gen::X64Reg RegAlloc::BindMaybeLoadAndLockArm(ArmReg arm_reg, bool load) { return x64_reg; } -Gen::X64Reg RegAlloc::BindAndLockArm(ArmReg arm_reg) { - return BindMaybeLoadAndLockArm(arm_reg, true); +Gen::X64Reg RegAlloc::LoadAndLockArm(ArmReg arm_reg) { + ASSERT(arm_reg >= 0 && arm_reg <= 14); // Not valid for R15 (cannot read from it) + + const Gen::X64Reg x64_reg = BindArmToX64(arm_reg, true); + + return x64_reg; } -Gen::X64Reg RegAlloc::BindNoLoadAndLockArm(ArmReg arm_reg) { - return BindMaybeLoadAndLockArm(arm_reg, false); +Gen::X64Reg RegAlloc::WriteOnlyLockArm(ArmReg arm_reg) { + ASSERT(arm_reg >= 0 && arm_reg <= 15); // Valid for R15 (we're not reading from it) + + const Gen::X64Reg x64_reg = BindArmToX64(arm_reg, false); + + MarkDirty(arm_reg); + + return x64_reg; } void RegAlloc::UnlockArm(ArmReg arm_reg) { + ASSERT(arm_reg >= 0 && arm_reg <= 15); + ArmState& arm_state = arm_gpr[arm_reg]; ASSERT(arm_state.locked); @@ -258,7 +285,7 @@ Gen::OpArg RegAlloc::ArmR(ArmReg arm_reg) { return arm_state.location; } -Gen::X64Reg RegAlloc::AllocAndLockTemp() { +Gen::X64Reg RegAlloc::AllocTemp() { const Gen::X64Reg x64_reg = AllocReg(); X64State& x64_state = x64_gpr[x64_reg_to_index.at(x64_reg)]; x64_state.locked = true; @@ -277,7 +304,7 @@ void RegAlloc::UnlockTemp(Gen::X64Reg x64_reg) { x64_state.state = X64State::State::Free; } -Gen::X64Reg RegAlloc::BindAndLockMemoryMap() { +Gen::X64Reg RegAlloc::LoadMemoryMap() { // First check to see if it exists. for (auto i : x64_reg_to_index) { X64State& x64_state = x64_gpr[i.second]; @@ -327,7 +354,7 @@ void RegAlloc::AssertNoLocked() { } Gen::X64Reg RegAlloc::AllocReg() { - // TODO: This is terrible. + // TODO: Improve with an actual register allocator as this is terrible. // First check to see if there anything free. for (auto i : x64_reg_to_index) { diff --git a/src/core/arm/jit_x64/reg_alloc.h b/src/core/arm/jit_x64/reg_alloc.h index 9472ad88d..f07a0f5fb 100644 --- a/src/core/arm/jit_x64/reg_alloc.h +++ b/src/core/arm/jit_x64/reg_alloc.h @@ -22,10 +22,30 @@ namespace JitX64 { class RegAlloc final { private: struct ArmState { - Gen::OpArg location; - bool locked; + /** + * Where is the current value of this register? + * There are two options: + * - In an x64 register, in which case location.IsSimpleReg() == true. + * - In memory in ARMul_State, in which case location == MJitStateCpuReg(arm_reg). + */ + Gen::OpArg location = { 0, 0, Gen::INVALID_REG, Gen::INVALID_REG }; + bool locked = false; }; + /** + * Possible states of X64State: + * + * Free (locked must be false): This x64 reg is free to be allocated for any purpose. + * Temp (locked must be true): This x64 reg is being used as a temporary for a calculation. + * DirtyArmReg (arm_reg is valid): This x64 reg holds the value of an ARM reg. + * It's value has been changed since being loaded from memory. + * This value must be flushed back to memory. + * CleanArmReg (arm_reg is valid): This x64 reg holds the value of an ARM reg. + * It's value has not been changed from when it's been loaded from memory. + * This value may be discarded. + * MemoryMap: This value holds a pointer to the ARM page table (current unimplemented). + * UserManuallyLocked: User has called LockX64 on this register. Must call UnlockX64 to unlock. + */ struct X64State { enum class State { Free, @@ -36,9 +56,9 @@ private: UserManuallyLocked }; - bool locked; - State state; - ArmReg arm_reg; + bool locked = false; + State state = State::Free; + ArmReg arm_reg = -1; ///< Only holds a valid value when state == DirtyArmReg / CleanArmReg }; std::array arm_gpr; @@ -47,9 +67,15 @@ private: Gen::XEmitter* code = nullptr; public: + RegAlloc() { Init(nullptr); } + /// Initialise register allocator (call compiling a basic block as it resets internal state) void Init(Gen::XEmitter* emitter); + // Manually load and unlock x64 registers: + // This is required rarely. The most significant case is when shifting, + // because shift instructions must use the CL register. + /// Ensures that the state of that register is State::Free. void FlushX64(Gen::X64Reg x64_reg); /// Locks a register: Marks it as in-use so it isn't allocated. @@ -57,49 +83,97 @@ public: /// Unlocks a register: Allows it to be used for allocation again. void UnlockX64(Gen::X64Reg x64_reg); + // Working with ARM registers: + + /** + * Locks an ARM register so it doesn't move. + * ARM reg may either be in a x64 reg or in memory. + * We're going to read from it only. (Use ArmR to do so.) + * Call UnlockArm when done. + */ + void LockArm(ArmReg arm_reg); + /** + * Locks an ARM register so it doesn't move. + * ARM reg may either be in a x64 reg or in memory. + * We're going to read and/or write to it. (Use ArmR to do so.) + * Call UnlockArm when done. + */ + void LockAndDirtyArm(ArmReg arm_reg); + /// Gets the current location of this ARM register. (ASSERTS IF NOT LOCKED!) + Gen::OpArg ArmR(ArmReg arm_reg); + + /** + * Allocates a x64 register for an ARM register and ensure it's value is loaded into it. + * ARM reg is in an x64 reg. + * We're going to read from it only. (Call MarkDirty if you want to write to it.) + * Call UnlockArm when done. + */ + Gen::X64Reg LoadAndLockArm(ArmReg arm_reg); + /** + * Allocates a x64 register for an ARM register and doesn't bother loading it's value to it. + * ARM reg is in an x64 reg. + * We're going to write to it only. (DO NOT READ, WRITE-ONLY. Also MarkDirty has been called for you.) + * Call UnlockArm when done. + */ + Gen::X64Reg WriteOnlyLockArm(ArmReg arm_reg); + + /** + * Marks an ARM register as dirty. + * If you don't mark something as dirty it won't be flushed back to memory. + * May only be called while an ARM register is locked. + */ + void MarkDirty(ArmReg arm_reg); + /// Unlock ARM register. + void UnlockArm(ArmReg arm_reg); + /// Ensures that this ARM register is not in an x64 register. void FlushArm(ArmReg arm_reg); - /// Locks an ARM register so it doesn't move. - void LockArm(ArmReg arm_reg); - /// Allocates a x64 register for an ARM register and loads its value into it if necessary. - Gen::X64Reg BindAndLockArm(ArmReg arm_reg); - /// Allocates a x64 register for an ARM register but does not load a value. - Gen::X64Reg BindNoLoadAndLockArm(ArmReg arm_reg); - /// Unlock ARM register so the register is free to move and the underlying x64 register is available (if any). - void UnlockArm(ArmReg arm_reg); + /// Flush all ARM registers. void FlushAllArm(); - /// Marks an ARM register as dirty. If you don't mark something as dirty it won't be flushed. - void MarkDirty(ArmReg arm_reg); - /// Flush absolutely everything. + /** + * Flush absolutely everything. + * You MUST always flush everything: + * - just before a branch occurs + * - just before calling into the interpreter + * - just before calling a host function + * - just before returning to the dispatcher + * - just before jumping to a new BB + */ void FlushEverything(); /// Gets the x64 register which corresponds to that ARM register. (ASSERTS IF NOT IN A x64 REG OR NOT LOCKED!) Gen::X64Reg GetX64For(ArmReg arm_reg); - /// Gets the current location of this ARM register. (ASSERTS IF NOT LOCKED!) - Gen::OpArg ArmR(ArmReg arm_reg); + + // Temporaries: /// Allocates a temporary register - Gen::X64Reg AllocAndLockTemp(); + Gen::X64Reg AllocTemp(); /// Releases a temporary register void UnlockTemp(Gen::X64Reg x64_reg); + // Page table: + /// Gets the x64 register with the address of the memory map in it. Allocates one if one doesn't already exist. - Gen::X64Reg BindAndLockMemoryMap(); + Gen::X64Reg LoadMemoryMap(); /// Releases the memory map register. void UnlockMemoryMap(Gen::X64Reg x64_reg); + // JitState pointer: + /// Returns the register in which the JitState pointer is stored. Gen::X64Reg JitStateReg(); + // Debug: + void AssertNoLocked(); private: /// INTERNAL: Allocates a register that is free. Flushes registers that are not locked if necessary. Gen::X64Reg AllocReg(); - /// INTERNAL: Implementation of BindNoLoadAndLockArm and BindAndLockArm - Gen::X64Reg BindMaybeLoadAndLockArm(ArmReg arm_reg, bool load); + /// INTERNAL: Binds an ARM register to an X64 register. Retrieves binding if already bound. + Gen::X64Reg BindArmToX64(ArmReg arm_reg, bool load); }; }