From 67553cf56c15e37b5a49ab74028a6428401a3dc1 Mon Sep 17 00:00:00 2001 From: MerryMage Date: Sat, 2 Apr 2016 10:09:46 +0100 Subject: [PATCH] JitX64: Implement load/store instructions --- src/core/arm/decoder/arm.cpp | 40 +- src/core/arm/decoder/decoder.h | 42 +- src/core/arm/decoder/thumb.cpp | 34 +- src/core/arm/jit_x64/common.h | 2 + .../jit_x64/instructions/data_processing.cpp | 116 +-- .../arm/jit_x64/instructions/load_store.cpp | 805 +++++++++++++++++- src/core/arm/jit_x64/interface.cpp | 4 + src/core/arm/jit_x64/interpret.cpp | 22 +- src/core/arm/jit_x64/jit_x64.cpp | 18 + src/core/arm/jit_x64/jit_x64.h | 63 +- src/core/arm/jit_x64/reg_alloc.cpp | 25 +- src/core/arm/jit_x64/reg_alloc.h | 7 + src/tests/CMakeLists.txt | 1 + .../core/arm/jit_x64/fuzz_arm_common.cpp | 72 +- .../core/arm/jit_x64/fuzz_arm_load_store.cpp | 96 +++ 15 files changed, 1167 insertions(+), 180 deletions(-) create mode 100644 src/tests/core/arm/jit_x64/fuzz_arm_load_store.cpp diff --git a/src/core/arm/decoder/arm.cpp b/src/core/arm/decoder/arm.cpp index 68c2527e0..454f90de9 100644 --- a/src/core/arm/decoder/arm.cpp +++ b/src/core/arm/decoder/arm.cpp @@ -228,38 +228,38 @@ static const std::array arm_instruction_table = {{ { "SWP", MakeMatcher<0>("----00010-00--------00001001----", &Visitor::SWP) }, // ARMv2S // Load/Store instructions - { "LDR (imm)", MakeMatcher<0>("----010--0-1--------------------", &Visitor::LDR_imm) }, - { "LDR (reg)", MakeMatcher<0>("----011--0-1---------------0----", &Visitor::LDR_reg) }, - { "LDRB (imm)", MakeMatcher<0>("----010--1-1--------------------", &Visitor::LDRB_imm) }, - { "LDRB (reg)", MakeMatcher<0>("----011--1-1---------------0----", &Visitor::LDRB_reg) }, + { "LDR (imm)", MakeMatcher<7>("cccc010pu0w1nnnnddddvvvvvvvvvvvv", &Visitor::LDR_imm) }, + { "LDR (reg)", MakeMatcher<9>("cccc011pu0w1nnnnddddvvvvvrr0mmmm", &Visitor::LDR_reg) }, + { "LDRB (imm)", MakeMatcher<7>("cccc010pu1w1nnnnddddvvvvvvvvvvvv", &Visitor::LDRB_imm) }, + { "LDRB (reg)", MakeMatcher<9>("cccc011pu1w1nnnnddddvvvvvrr0mmmm", &Visitor::LDRB_reg) }, { "LDRBT (A1)", MakeMatcher<0>("----0100-111--------------------", &Visitor::LDRBT) }, { "LDRBT (A2)", MakeMatcher<0>("----0110-111---------------0----", &Visitor::LDRBT) }, - { "LDRD (imm)", MakeMatcher<0>("----000--1-0------------1101----", &Visitor::LDRD_imm) }, // ARMv5E - { "LDRD (reg)", MakeMatcher<0>("----000--0-0--------00001101----", &Visitor::LDRD_reg) }, // ARMv5E - { "LDRH (imm)", MakeMatcher<0>("----000--1-1------------1011----", &Visitor::LDRH_imm) }, - { "LDRH (reg)", MakeMatcher<0>("----000--0-1--------00001011----", &Visitor::LDRH_reg) }, + { "LDRD (imm)", MakeMatcher<8>("cccc000pu1w0nnnnddddvvvv1101vvvv", &Visitor::LDRD_imm) }, // ARMv5E + { "LDRD (reg)", MakeMatcher<7>("cccc000pu0w0nnnndddd00001101mmmm", &Visitor::LDRD_reg) }, // ARMv5E + { "LDRH (imm)", MakeMatcher<8>("cccc000pu1w1nnnnddddvvvv1011vvvv", &Visitor::LDRH_imm) }, + { "LDRH (reg)", MakeMatcher<7>("cccc000pu0w1nnnndddd00001011mmmm", &Visitor::LDRH_reg) }, { "LDRHT (A1)", MakeMatcher<0>("----0000-111------------1011----", &Visitor::LDRHT) }, { "LDRHT (A2)", MakeMatcher<0>("----0000-011--------00001011----", &Visitor::LDRHT) }, - { "LDRSB (imm)", MakeMatcher<0>("----000--1-1------------1101----", &Visitor::LDRSB_imm) }, - { "LDRSB (reg)", MakeMatcher<0>("----000--0-1--------00001101----", &Visitor::LDRSB_reg) }, + { "LDRSB (imm)", MakeMatcher<8>("cccc000pu1w1nnnnddddvvvv1101vvvv", &Visitor::LDRSB_imm) }, + { "LDRSB (reg)", MakeMatcher<7>("cccc000pu0w1nnnndddd00001101mmmm", &Visitor::LDRSB_reg) }, { "LDRSBT (A1)", MakeMatcher<0>("----0000-111------------1101----", &Visitor::LDRSBT) }, { "LDRSBT (A2)", MakeMatcher<0>("----0000-011--------00001101----", &Visitor::LDRSBT) }, - { "LDRSH (imm)", MakeMatcher<0>("----000--1-1------------1111----", &Visitor::LDRSH_imm) }, - { "LDRSH (reg)", MakeMatcher<0>("----000--0-1--------00001111----", &Visitor::LDRSH_reg) }, + { "LDRSH (imm)", MakeMatcher<8>("cccc000pu1w1nnnnddddvvvv1111vvvv", &Visitor::LDRSH_imm) }, + { "LDRSH (reg)", MakeMatcher<7>("cccc000pu0w1nnnndddd00001111mmmm", &Visitor::LDRSH_reg) }, { "LDRSHT (A1)", MakeMatcher<0>("----0000-111------------1111----", &Visitor::LDRSHT) }, { "LDRSHT (A2)", MakeMatcher<0>("----0000-011--------00001111----", &Visitor::LDRSHT) }, { "LDRT (A1)", MakeMatcher<0>("----0100-011--------------------", &Visitor::LDRT) }, { "LDRT (A2)", MakeMatcher<0>("----0110-011---------------0----", &Visitor::LDRT) }, - { "STR (imm)", MakeMatcher<0>("----010--0-0--------------------", &Visitor::STR_imm) }, - { "STR (reg)", MakeMatcher<0>("----011--0-0---------------0----", &Visitor::STR_reg) }, - { "STRB (imm)", MakeMatcher<0>("----010--1-0--------------------", &Visitor::STRB_imm) }, - { "STRB (reg)", MakeMatcher<0>("----011--1-0---------------0----", &Visitor::STRB_reg) }, + { "STR (imm)", MakeMatcher<7>("cccc010pu0w0nnnnddddvvvvvvvvvvvv", &Visitor::STR_imm) }, + { "STR (reg)", MakeMatcher<9>("cccc011pu0w0nnnnddddvvvvvrr0mmmm", &Visitor::STR_reg) }, + { "STRB (imm)", MakeMatcher<7>("cccc010pu1w0nnnnddddvvvvvvvvvvvv", &Visitor::STRB_imm) }, + { "STRB (reg)", MakeMatcher<9>("cccc011pu1w0nnnnddddvvvvvrr0mmmm", &Visitor::STRB_reg) }, { "STRBT (A1)", MakeMatcher<0>("----0100-110--------------------", &Visitor::STRBT) }, { "STRBT (A2)", MakeMatcher<0>("----0110-110---------------0----", &Visitor::STRBT) }, - { "STRD (imm)", MakeMatcher<0>("----000--1-0------------1111----", &Visitor::STRD_imm) }, // ARMv5E - { "STRD (reg)", MakeMatcher<0>("----000--0-0--------00001111----", &Visitor::STRD_reg) }, // ARMv5E - { "STRH (imm)", MakeMatcher<0>("----000--1-0------------1011----", &Visitor::STRH_imm) }, - { "STRH (reg)", MakeMatcher<0>("----000--0-0--------00001011----", &Visitor::STRH_reg) }, + { "STRD (imm)", MakeMatcher<8>("cccc000pu1w0nnnnddddvvvv1111vvvv", &Visitor::STRD_imm) }, // ARMv5E + { "STRD (reg)", MakeMatcher<7>("cccc000pu0w0nnnndddd00001111mmmm", &Visitor::STRD_reg) }, // ARMv5E + { "STRH (imm)", MakeMatcher<8>("cccc000pu1w0nnnnddddvvvv1011vvvv", &Visitor::STRH_imm) }, + { "STRH (reg)", MakeMatcher<7>("cccc000pu0w0nnnndddd00001011mmmm", &Visitor::STRH_reg) }, { "STRHT (A1)", MakeMatcher<0>("----0000-110------------1011----", &Visitor::STRHT) }, { "STRHT (A2)", MakeMatcher<0>("----0000-010--------00001011----", &Visitor::STRHT) }, { "STRT (A1)", MakeMatcher<0>("----0100-010--------------------", &Visitor::STRT) }, diff --git a/src/core/arm/decoder/decoder.h b/src/core/arm/decoder/decoder.h index 4316a16fe..4dd36fc03 100644 --- a/src/core/arm/decoder/decoder.h +++ b/src/core/arm/decoder/decoder.h @@ -51,9 +51,11 @@ public: }; using Cond = u8; +using Imm4 = u32; using Imm5 = u32; using Imm8 = u32; using Imm11 = u32; +using Imm12 = u32; using Imm24 = u32; using Register = int; using ShiftType = int; @@ -156,32 +158,32 @@ public: virtual void YIELD() = 0; // Load/Store instructions - virtual void LDR_imm() = 0; - virtual void LDR_reg() = 0; - virtual void LDRB_imm() = 0; - virtual void LDRB_reg() = 0; + virtual void LDR_imm(Cond cond, bool P, bool U, bool W, Register Rn, Register Rd, Imm12 imm12) = 0; + virtual void LDR_reg(Cond cond, bool P, bool U, bool W, Register Rn, Register Rd, Imm5 imm5, ShiftType shift, Register Rm) = 0; + virtual void LDRB_imm(Cond cond, bool P, bool U, bool W, Register Rn, Register Rd, Imm12 imm12) = 0; + virtual void LDRB_reg(Cond cond, bool P, bool U, bool W, Register Rn, Register Rd, Imm5 imm5, ShiftType shift, Register Rm) = 0; virtual void LDRBT() = 0; - virtual void LDRD_imm() = 0; - virtual void LDRD_reg() = 0; - virtual void LDRH_imm() = 0; - virtual void LDRH_reg() = 0; + virtual void LDRD_imm(Cond cond, bool P, bool U, bool W, Register Rn, Register Rd, Imm4 imm8a, Imm4 imm8b) = 0; + virtual void LDRD_reg(Cond cond, bool P, bool U, bool W, Register Rn, Register Rd, Register Rm) = 0; + virtual void LDRH_imm(Cond cond, bool P, bool U, bool W, Register Rn, Register Rd, Imm4 imm8a, Imm4 imm8b) = 0; + virtual void LDRH_reg(Cond cond, bool P, bool U, bool W, Register Rn, Register Rd, Register Rm) = 0; virtual void LDRHT() = 0; - virtual void LDRSB_imm() = 0; - virtual void LDRSB_reg() = 0; + virtual void LDRSB_imm(Cond cond, bool P, bool U, bool W, Register Rn, Register Rd, Imm4 imm8a, Imm4 imm8b) = 0; + virtual void LDRSB_reg(Cond cond, bool P, bool U, bool W, Register Rn, Register Rd, Register Rm) = 0; virtual void LDRSBT() = 0; - virtual void LDRSH_imm() = 0; - virtual void LDRSH_reg() = 0; + virtual void LDRSH_imm(Cond cond, bool P, bool U, bool W, Register Rn, Register Rd, Imm4 imm8a, Imm4 imm8b) = 0; + virtual void LDRSH_reg(Cond cond, bool P, bool U, bool W, Register Rn, Register Rd, Register Rm) = 0; virtual void LDRSHT() = 0; virtual void LDRT() = 0; - virtual void STR_imm() = 0; - virtual void STR_reg() = 0; - virtual void STRB_imm() = 0; - virtual void STRB_reg() = 0; + virtual void STR_imm(Cond cond, bool P, bool U, bool W, Register Rn, Register Rd, Imm12 imm12) = 0; + virtual void STR_reg(Cond cond, bool P, bool U, bool W, Register Rn, Register Rd, Imm5 imm5, ShiftType shift, Register Rm) = 0; + virtual void STRB_imm(Cond cond, bool P, bool U, bool W, Register Rn, Register Rd, Imm12 imm12) = 0; + virtual void STRB_reg(Cond cond, bool P, bool U, bool W, Register Rn, Register Rd, Imm5 imm5, ShiftType shift, Register Rm) = 0; virtual void STRBT() = 0; - virtual void STRD_imm() = 0; - virtual void STRD_reg() = 0; - virtual void STRH_imm() = 0; - virtual void STRH_reg() = 0; + virtual void STRD_imm(Cond cond, bool P, bool U, bool W, Register Rn, Register Rd, Imm4 imm8a, Imm4 imm8b) = 0; + virtual void STRD_reg(Cond cond, bool P, bool U, bool W, Register Rn, Register Rd, Register Rm) = 0; + virtual void STRH_imm(Cond cond, bool P, bool U, bool W, Register Rn, Register Rd, Imm4 imm8a, Imm4 imm8b) = 0; + virtual void STRH_reg(Cond cond, bool P, bool U, bool W, Register Rn, Register Rd, Register Rm) = 0; virtual void STRHT() = 0; virtual void STRT() = 0; diff --git a/src/core/arm/decoder/thumb.cpp b/src/core/arm/decoder/thumb.cpp index d646a7de0..7377b8c5d 100644 --- a/src/core/arm/decoder/thumb.cpp +++ b/src/core/arm/decoder/thumb.cpp @@ -224,7 +224,7 @@ static const std::array thumb_instruction_table = { { // LDR Rd, [PC, #] Register Rd = bits<8, 10>(instruction); u32 imm8 = bits<0, 7>(instruction); - v->LDR_imm(); + v->LDR_imm(0xE, /*P=*/1, /*U=*/1, /*W=*/0, 15, Rd, imm8 << 2); })}, { "load/store reg offset", MakeMatcher("0101oooxxxxxxxxx", [](Visitor* v, u32 instruction) { u32 opcode = bits<9, 11>(instruction); @@ -233,28 +233,28 @@ static const std::array thumb_instruction_table = { { Register Rd = bits<0, 2>(instruction); switch (opcode) { case 0: // STR Rd, [Rn, Rm] - v->STR_reg(); + v->STR_reg(0xE, /*P=*/1, /*U=*/1, /*W=*/0, Rn, Rd, 0, 0, Rm); break; case 1: // STRH Rd, [Rn, Rm] - v->STRH_reg(); + v->STRH_reg(0xE, /*P=*/1, /*U=*/1, /*W=*/0, Rn, Rd, Rm); break; case 2: // STRB Rd, [Rn, Rm] - v->STRB_reg(); + v->STRB_reg(0xE, /*P=*/1, /*U=*/1, /*W=*/0, Rn, Rd, 0, 0, Rm); break; case 3: // LDRSB Rd, [Rn, Rm] - v->LDRSB_reg(); + v->LDRSB_reg(0xE, /*P=*/1, /*U=*/1, /*W=*/0, Rn, Rd, Rm); break; case 4: // LDR Rd, [Rn, Rm] - v->LDR_reg(); + v->LDR_reg(0xE, /*P=*/1, /*U=*/1, /*W=*/0, Rn, Rd, 0, 0, Rm); break; case 5: // LDRH Rd, [Rn, Rm] - v->LDRH_reg(); + v->LDRH_reg(0xE, /*P=*/1, /*U=*/1, /*W=*/0, Rn, Rd, Rm); break; case 6: // LDRB Rd, [Rn, Rm] - v->LDRB_reg(); + v->LDRB_reg(0xE, /*P=*/1, /*U=*/1, /*W=*/0, Rn, Rd, 0, 0, Rm); break; case 7: // LDRSH Rd, [Rn, Rm] - v->LDRSH_reg(); + v->LDRSH_reg(0xE, /*P=*/1, /*U=*/1, /*W=*/0, Rn, Rd, Rm); break; default: UNREACHABLE(); @@ -267,16 +267,16 @@ static const std::array thumb_instruction_table = { { Register Rd = bits<0, 2>(instruction); switch (opc) { case 0: // STR Rd, [Rn, #offset] - v->STR_imm(); + v->STR_imm(0xE, /*P=*/1, /*U=*/1, /*W=*/0, Rn, Rd, offset * 4); break; case 1: // LDR Rd, [Rn, #offset] - v->LDR_imm(); + v->LDR_imm(0xE, /*P=*/1, /*U=*/1, /*W=*/0, Rn, Rd, offset * 4); break; case 2: // STRB Rd, [Rn, #offset] - v->STRB_imm(); + v->STRB_imm(0xE, /*P=*/1, /*U=*/1, /*W=*/0, Rn, Rd, offset); break; case 3: // LDRB Rd, [Rn, #offset] - v->LDRB_imm(); + v->LDRB_imm(0xE, /*P=*/1, /*U=*/1, /*W=*/0, Rn, Rd, offset); break; default: UNREACHABLE(); @@ -288,9 +288,9 @@ static const std::array thumb_instruction_table = { { Register Rn = bits<3, 5>(instruction); Register Rd = bits<0, 2>(instruction); if (!L) { // STRH Rd, [Rn, #offset] - v->STRH_imm(); + v->STRH_imm(0xE, /*P=*/1, /*U=*/1, /*W=*/0, Rn, Rd, (offset * 2) >> 4, (offset * 2) & 0xF); } else { // LDRH Rd, [Rn, #offset] - v->LDRH_imm(); + v->LDRH_imm(0xE, /*P=*/1, /*U=*/1, /*W=*/0, Rn, Rd, (offset * 2) >> 4, (offset * 2) & 0xF); } })}, { "load/store stack", MakeMatcher("1001xxxxxxxxxxxx", [](Visitor* v, u32 instruction) { @@ -298,9 +298,9 @@ static const std::array thumb_instruction_table = { { Register Rd = bits<8, 10>(instruction); u32 offset = bits<0, 7>(instruction); if (!L) { // STR Rd, [SP, #offset] - v->STR_imm(); + v->STR_imm(0xE, /*P=*/1, /*U=*/1, /*W=*/0, 13, Rd, offset * 4); } else { // LDR Rd, [SP, #offset] - v->LDR_imm(); + v->LDR_imm(0xE, /*P=*/1, /*U=*/1, /*W=*/0, 13, Rd, offset * 4); } })}, { "add to sp/pc", MakeMatcher("1010oxxxxxxxxxxx", [](Visitor* v, u32 instruction) { diff --git a/src/core/arm/jit_x64/common.h b/src/core/arm/jit_x64/common.h index 8a0a910a1..4cdbf3079 100644 --- a/src/core/arm/jit_x64/common.h +++ b/src/core/arm/jit_x64/common.h @@ -12,9 +12,11 @@ namespace JitX64 { using ArmReg = ArmDecoder::Register; +using ArmImm4 = ArmDecoder::Imm4; using ArmImm5 = ArmDecoder::Imm5; using ArmImm8 = ArmDecoder::Imm8; using ArmImm11 = ArmDecoder::Imm11; +using ArmImm12 = ArmDecoder::Imm12; using ArmImm24 = ArmDecoder::Imm24; using Cond = ArmDecoder::Cond; using ShiftType = ArmDecoder::ShiftType; diff --git a/src/core/arm/jit_x64/instructions/data_processing.cpp b/src/core/arm/jit_x64/instructions/data_processing.cpp index 79a87fc00..d27b38f5c 100644 --- a/src/core/arm/jit_x64/instructions/data_processing.cpp +++ b/src/core/arm/jit_x64/instructions/data_processing.cpp @@ -61,11 +61,68 @@ void JitX64::CompileDataProcessingHelper_Reverse(ArmReg Rn_index, ArmReg Rd_inde } } -X64Reg JitX64::CompileDataProcessingHelper_reg(ArmImm5 imm5, ShiftType shift, ArmReg Rm_index, bool do_shifter_carry_out) { - // Caller must call reg_alloc.UnlockTemp on return value. +void JitX64::CompileShifter_imm(X64Reg dest, ArmImm5 imm5, ShiftType shift, bool do_shifter_carry_out) { + // dest must contain a copy of the value of Rm. // if do_shifter_carry_out, // we output code that calculates and puts shifter_carry_out into MJitStateCFlag(). + switch (shift) { + case 0b00: // Logical shift left by immediate + if (imm5 != 0) { + code->SHL(32, R(dest), Imm8(imm5)); + if (do_shifter_carry_out) { + code->SETcc(CC_C, MJitStateCFlag()); + } + } + return; + case 0b01: // Logical shift right by immediate + if (imm5 == 0) { + if (do_shifter_carry_out) { + code->BT(32, R(dest), Imm8(31)); + code->SETcc(CC_C, MJitStateCFlag()); + } + code->MOV(64, R(dest), Imm32(0)); + } else { + code->SHR(32, R(dest), Imm8(imm5)); + if (do_shifter_carry_out) { + code->SETcc(CC_C, MJitStateCFlag()); + } + } + return; + case 0b10: // Arithmetic shift right by immediate + if (imm5 == 0) { + if (do_shifter_carry_out) { + code->BT(32, R(dest), Imm8(31)); + code->SETcc(CC_C, MJitStateCFlag()); + } + code->SAR(32, R(dest), Imm8(31)); + } else { + code->SAR(32, R(dest), Imm8(imm5)); + if (do_shifter_carry_out) { + code->SETcc(CC_C, MJitStateCFlag()); + } + } + return; + case 0b11: // Rotate right by immediate + if (imm5 == 0) { //RRX + code->BT(8, MJitStateCFlag(), Imm8(0)); + code->RCR(32, R(dest), Imm8(1)); + if (do_shifter_carry_out) { + code->SETcc(CC_C, MJitStateCFlag()); + } + } else { + code->ROR(32, R(dest), Imm8(imm5)); + if (do_shifter_carry_out) { + code->SETcc(CC_C, MJitStateCFlag()); + } + } + return; + } + + UNREACHABLE(); +} + +X64Reg JitX64::CompileDataProcessingHelper_reg(ArmImm5 imm5, ShiftType shift, ArmReg Rm_index, bool do_shifter_carry_out) { X64Reg tmp = reg_alloc.AllocTemp(); if (Rm_index != 15) { @@ -80,60 +137,9 @@ X64Reg JitX64::CompileDataProcessingHelper_reg(ArmImm5 imm5, ShiftType shift, Ar cond_manager.FlagsDirty(); } - switch (shift) { - case 0b00: // Logical shift left by immediate - if (imm5 != 0) { - code->SHL(32, R(tmp), Imm8(imm5)); - if (do_shifter_carry_out) { - code->SETcc(CC_C, MJitStateCFlag()); - } - } - return tmp; - case 0b01: // Logical shift right by immediate - if (imm5 == 0) { - if (do_shifter_carry_out) { - code->BT(32, R(tmp), Imm8(31)); - code->SETcc(CC_C, MJitStateCFlag()); - } - code->MOV(64, R(tmp), Imm32(0)); - } else { - code->SHR(32, R(tmp), Imm8(imm5)); - if (do_shifter_carry_out) { - code->SETcc(CC_C, MJitStateCFlag()); - } - } - return tmp; - case 0b10: // Arithmetic shift right by immediate - if (imm5 == 0) { - if (do_shifter_carry_out) { - code->BT(32, R(tmp), Imm8(31)); - code->SETcc(CC_C, MJitStateCFlag()); - } - code->SAR(32, R(tmp), Imm8(31)); - } else { - code->SAR(32, R(tmp), Imm8(imm5)); - if (do_shifter_carry_out) { - code->SETcc(CC_C, MJitStateCFlag()); - } - } - return tmp; - case 0b11: // Rotate right by immediate - if (imm5 == 0) { //RRX - code->BT(8, MJitStateCFlag(), Imm8(0)); - code->RCR(32, R(tmp), Imm8(1)); - if (do_shifter_carry_out) { - code->SETcc(CC_C, MJitStateCFlag()); - } - } else { - code->ROR(32, R(tmp), Imm8(imm5)); - if (do_shifter_carry_out) { - code->SETcc(CC_C, MJitStateCFlag()); - } - } - return tmp; - } + CompileShifter_imm(tmp, imm5, shift, do_shifter_carry_out); - UNREACHABLE(); + return tmp; } X64Reg JitX64::CompileDataProcessingHelper_rsr(ArmReg Rs_index, ShiftType shift, ArmReg Rm_index, bool do_shifter_carry_out) { diff --git a/src/core/arm/jit_x64/instructions/load_store.cpp b/src/core/arm/jit_x64/instructions/load_store.cpp index 6b069fd93..24eab98f0 100644 --- a/src/core/arm/jit_x64/instructions/load_store.cpp +++ b/src/core/arm/jit_x64/instructions/load_store.cpp @@ -2,37 +2,800 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/swap.h" +#include "common/x64/abi.h" + #include "core/arm/jit_x64/jit_x64.h" +#include "core/memory.h" namespace JitX64 { -// Load/Store instructions -void JitX64::LDR_imm() { CompileInterpretInstruction(); } -void JitX64::LDR_reg() { CompileInterpretInstruction(); } -void JitX64::LDRB_imm() { CompileInterpretInstruction(); } -void JitX64::LDRB_reg() { CompileInterpretInstruction(); } +// TODO: Loads from constant memory regions can be turned into immediate constant loads. + +using namespace Gen; + +/// This function assumes that the value of Rn is already in dest except for R15. +void JitX64::LoadAndStoreWordOrUnsignedByte_Immediate_Helper(X64Reg dest, bool U, ArmReg Rn_index, ArmImm12 imm12) { + // address = Rn +/- imm12 + + if (Rn_index == 15) { + u32 address; + if (U) { + address = GetReg15Value_WordAligned() + imm12; + } else { + address = GetReg15Value_WordAligned() - imm12; + } + code->MOV(32, R(dest), Imm32(address)); + } else { + if (U) { + code->ADD(32, R(dest), Imm32(imm12)); + } else { + code->SUB(32, R(dest), Imm32(imm12)); + } + } +} + +void JitX64::LoadAndStoreWordOrUnsignedByte_ImmediateOffset(X64Reg dest, bool U, ArmReg Rn_index, ArmImm12 imm12) { + if (Rn_index != 15) { + OpArg Rn = reg_alloc.LockArmForRead(Rn_index); + code->MOV(32, R(dest), Rn); + reg_alloc.UnlockArm(Rn_index); + } + + LoadAndStoreWordOrUnsignedByte_Immediate_Helper(dest, U, Rn_index, imm12); +} + +void JitX64::LoadAndStoreWordOrUnsignedByte_ImmediatePreIndexed(X64Reg dest, bool U, ArmReg Rn_index, ArmImm12 imm12) { + ASSERT_MSG(Rn_index != 15, "UNPREDICTABLE"); + + X64Reg Rn = reg_alloc.BindArmForReadWrite(Rn_index); + + LoadAndStoreWordOrUnsignedByte_Immediate_Helper(Rn, U, Rn_index, imm12); + + code->MOV(32, R(dest), R(Rn)); + reg_alloc.UnlockArm(Rn_index); +} + +void JitX64::LoadAndStoreWordOrUnsignedByte_ImmediatePostIndexed(X64Reg dest, bool U, ArmReg Rn_index, ArmImm12 imm12) { + ASSERT_MSG(Rn_index != 15, "UNPREDICTABLE"); + + X64Reg Rn = reg_alloc.BindArmForReadWrite(Rn_index); + code->MOV(32, R(dest), R(Rn)); + + LoadAndStoreWordOrUnsignedByte_Immediate_Helper(Rn, U, Rn_index, imm12); + + reg_alloc.UnlockArm(Rn_index); +} + +/// This function assumes that the value of Rn is already in dest. +void JitX64::LoadAndStoreWordOrUnsignedByte_Register_Helper(X64Reg dest, bool U, ArmReg Rn_index, ArmReg Rm_index) { + // address = Rn +/- Rm + + ASSERT_MSG(Rm_index != 15, "UNPREDICTABLE"); + + if (Rm_index == Rn_index) { + if (U) { + // address = Rn + Rn + code->SHL(32, R(dest), Imm8(1)); + } else { + // address = Rn - Rn + code->MOV(32, R(dest), Imm32(0)); + } + return; + } + + OpArg Rm = reg_alloc.LockArmForRead(Rm_index); + + if (U) { + // address = Rn + Rm + code->ADD(32, R(dest), Rm); + } else { + // address = Rn - Rm + code->SUB(32, R(dest), Rm); + } + + reg_alloc.UnlockArm(Rm_index); +} + +/// This function assumes that the value of Rn is already in dest. +void JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegister_Helper(X64Reg dest, bool U, ArmReg Rn_index, ArmImm5 imm5, ShiftType shift, ArmReg Rm_index) { + if (imm5 == 0 && shift == 0) { + LoadAndStoreWordOrUnsignedByte_Register_Helper(dest, U, Rn_index, Rm_index); + return; + } + + // index = Rm LSL imm5 / Rm LSR imm5 / Rm ASR imm5 / Rm ROR imm5 / Rm RRX + // address = Rn +/- index + + ASSERT_MSG(Rm_index != 15, "UNPREDICTABLE"); + + // TODO: Optimizations when Rn_index == Rm_index maybe. + + X64Reg index = reg_alloc.AllocTemp(); + if (Rn_index == Rm_index) { + code->MOV(32, R(index), R(dest)); + } else { + OpArg Rm = reg_alloc.LockArmForRead(Rm_index); + code->MOV(32, R(index), Rm); + reg_alloc.UnlockArm(Rm_index); + } + + CompileShifter_imm(index, imm5, shift, false); + + if (U) { + // address = Rn + index + code->ADD(32, R(dest), R(index)); + } else { + // address = Rn - index + code->SUB(32, R(dest), R(index)); + } + + reg_alloc.UnlockTemp(index); +} + +void JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegisterOffset(X64Reg dest, bool U, ArmReg Rn_index, ArmImm5 imm5, ShiftType shift, ArmReg Rm_index) { + if (Rn_index != 15) { + OpArg Rn = reg_alloc.LockArmForRead(Rn_index); + code->MOV(32, R(dest), Rn); + reg_alloc.UnlockArm(Rn_index); + } else { + code->MOV(32, R(dest), Imm32(GetReg15Value_WordAligned())); + } + + LoadAndStoreWordOrUnsignedByte_ScaledRegister_Helper(dest, U, Rn_index, imm5, shift, Rm_index); +} + +void JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegisterPreIndexed(X64Reg dest, bool U, ArmReg Rn_index, ArmImm5 imm5, ShiftType shift, ArmReg Rm_index) { + ASSERT_MSG(Rn_index != 15, "UNPREDICTABLE"); + + X64Reg Rn = reg_alloc.BindArmForReadWrite(Rn_index); + + LoadAndStoreWordOrUnsignedByte_ScaledRegister_Helper(Rn, U, Rn_index, imm5, shift, Rm_index); + + code->MOV(32, R(dest), R(Rn)); + reg_alloc.UnlockArm(Rn_index); +} + +void JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegisterPostIndexed(X64Reg dest, bool U, ArmReg Rn_index, ArmImm5 imm5, ShiftType shift, ArmReg Rm_index) { + ASSERT_MSG(Rn_index != 15, "UNPREDICTABLE"); + + X64Reg Rn = reg_alloc.BindArmForReadWrite(Rn_index); + code->MOV(32, R(dest), R(Rn)); + + LoadAndStoreWordOrUnsignedByte_ScaledRegister_Helper(Rn, U, Rn_index, imm5, shift, Rm_index); + + reg_alloc.UnlockArm(Rn_index); +} + +// TODO: Set up an appropriately mapped region of memory to use instead of compiling CALL instructions. + +static u64 Load64LE(u32 addr) { + // TODO: Improve this. + return Memory::Read32(addr) | (static_cast(Memory::Read32(addr + 4)) << 32); +} + +static u64 Load64BE(u32 addr) { + // TODO: Improve this. + return Common::swap32(Memory::Read32(addr)) | (static_cast(Common::swap32(Memory::Read32(addr + 4))) << 32); +} + +static void Store64LE(u32 addr, u32 v1, u32 v2) { + Memory::Write32(addr, v1); + Memory::Write32(addr + 4, v2); +} + +static void Store64BE(u32 addr, u32 v1, u32 v2) { + Memory::Write32(addr, Common::swap32(v1)); + Memory::Write32(addr+4, Common::swap32(v2)); +} + +static u32 Load32LE(u32 addr) { + return Memory::Read32(addr); +} + +static u32 Load32BE(u32 addr) { + return Common::swap32(Memory::Read32(addr)); +} + +static void Store32LE(u32 addr, u32 value) { + Memory::Write32(addr, value); +} + +static void Store32BE(u32 addr, u32 value) { + Memory::Write32(addr, Common::swap32(value)); +} + +static u16 Load16LE(u32 addr) { + return Memory::Read16(addr); +} + +static u16 Load16BE(u32 addr) { + return Common::swap16(Memory::Read16(addr)); +} + +static void Store16LE(u32 addr, u16 value) { + Memory::Write16(addr, value); +} + +static void Store16BE(u32 addr, u16 value) { + Memory::Write16(addr, Common::swap16(value)); +} + +static u32 Load8(u32 addr) { + return Memory::Read8(addr); +} + +static void Store8(u32 addr, u8 value) { + Memory::Write8(addr, value); +} + +static void GetValueOfRegister(XEmitter* code, RegAlloc& reg_alloc, u32 r15_value, X64Reg x64_reg, ArmReg arm_reg) { + if (arm_reg != 15) { + OpArg Rd = reg_alloc.LockArmForRead(arm_reg); + code->MOV(32, R(x64_reg), Rd); + reg_alloc.UnlockArm(arm_reg); + } else { + // The following is IMPLEMENTATION DEFINED + code->MOV(32, R(x64_reg), Imm32(r15_value)); + } +} + +/** +* This function implements address resolution logic common to all the addressing mode 2 store/load instructions. +* The address is loaded into ABI_PARAM1. +*/ +template +static void LoadStoreCommon_AddrMode2(JitX64* jit, RegAlloc& reg_alloc, bool P, bool W, OffsetFn offset_fn, PreFn pre_fn, PostFn post_fn, Args... args) { + constexpr X64Reg address = ABI_PARAM1; + reg_alloc.FlushX64(address); + reg_alloc.LockX64(address); + + if (P) { + if (!W) { + (jit->*offset_fn)(address, args...); + } else { + (jit->*pre_fn)(address, args...); + } + } else { + if (!W) { + (jit->*post_fn)(address, args...); + } else { + ASSERT_MSG(false, "Translate load/store instructions are unsupported"); + } + } +} + +// Load/Store Instructions: Addressing Mode 2 + +void JitX64::LDR_imm(Cond cond, bool P, bool U, bool W, ArmReg Rn_index, ArmReg Rd_index, ArmImm12 imm12) { + cond_manager.CompileCond((ConditionCode)cond); + + LoadStoreCommon_AddrMode2(this, reg_alloc, P, W, + &JitX64::LoadAndStoreWordOrUnsignedByte_ImmediateOffset, + &JitX64::LoadAndStoreWordOrUnsignedByte_ImmediatePreIndexed, + &JitX64::LoadAndStoreWordOrUnsignedByte_ImmediatePostIndexed, + U, Rn_index, imm12); + + CompileCallHost(!current.EFlag ? &Load32LE : &Load32BE); + + reg_alloc.UnlockX64(ABI_PARAM1); + reg_alloc.LockX64(ABI_RETURN); + + // TODO: Could be optimized as a rebind instead. + X64Reg Rd = reg_alloc.BindArmForWrite(Rd_index); + code->MOV(32, R(Rd), R(ABI_RETURN)); + reg_alloc.UnlockArm(Rd_index); + + reg_alloc.UnlockX64(ABI_RETURN); + + current.arm_pc += GetInstSize(); +} + +void JitX64::LDR_reg(Cond cond, bool P, bool U, bool W, ArmReg Rn_index, ArmReg Rd_index, ArmImm5 imm5, ShiftType shift, ArmReg Rm_index) { + cond_manager.CompileCond((ConditionCode)cond); + + LoadStoreCommon_AddrMode2(this, reg_alloc, P, W, + &JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegisterOffset, + &JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegisterPreIndexed, + &JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegisterPostIndexed, + U, Rn_index, imm5, shift, Rm_index); + + CompileCallHost(!current.EFlag ? &Load32LE : &Load32BE); + + reg_alloc.UnlockX64(ABI_PARAM1); + reg_alloc.LockX64(ABI_RETURN); + + // TODO: Could be optimized as a rebind instead. + X64Reg Rd = reg_alloc.BindArmForWrite(Rd_index); + code->MOV(32, R(Rd), R(ABI_RETURN)); + reg_alloc.UnlockArm(Rd_index); + + reg_alloc.UnlockX64(ABI_RETURN); + + current.arm_pc += GetInstSize(); +} + +void JitX64::LDRB_imm(Cond cond, bool P, bool U, bool W, ArmReg Rn_index, ArmReg Rd_index, ArmImm12 imm12) { + cond_manager.CompileCond((ConditionCode)cond); + + LoadStoreCommon_AddrMode2(this, reg_alloc, P, W, + &JitX64::LoadAndStoreWordOrUnsignedByte_ImmediateOffset, + &JitX64::LoadAndStoreWordOrUnsignedByte_ImmediatePreIndexed, + &JitX64::LoadAndStoreWordOrUnsignedByte_ImmediatePostIndexed, + U, Rn_index, imm12); + + CompileCallHost(&Load8); + + reg_alloc.UnlockX64(ABI_PARAM1); + reg_alloc.LockX64(ABI_RETURN); + + // TODO: Could be optimized as a rebind instead. + X64Reg Rd = reg_alloc.BindArmForWrite(Rd_index); + code->MOVZX(32, 8, Rd, R(ABI_RETURN)); + reg_alloc.UnlockArm(Rd_index); + + reg_alloc.UnlockX64(ABI_RETURN); + + current.arm_pc += GetInstSize(); +} + +void JitX64::LDRB_reg(Cond cond, bool P, bool U, bool W, ArmReg Rn_index, ArmReg Rd_index, ArmImm5 imm5, ShiftType shift, ArmReg Rm_index) { + cond_manager.CompileCond((ConditionCode)cond); + + LoadStoreCommon_AddrMode2(this, reg_alloc, P, W, + &JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegisterOffset, + &JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegisterPreIndexed, + &JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegisterPostIndexed, + U, Rn_index, imm5, shift, Rm_index); + + CompileCallHost(&Load8); + + reg_alloc.UnlockX64(ABI_PARAM1); + reg_alloc.LockX64(ABI_RETURN); + + // TODO: Could be optimized as a rebind instead. + X64Reg Rd = reg_alloc.BindArmForWrite(Rd_index); + code->MOVZX(32, 8, Rd, R(ABI_RETURN)); + reg_alloc.UnlockArm(Rd_index); + + reg_alloc.UnlockX64(ABI_RETURN); + + current.arm_pc += GetInstSize(); +} + +void JitX64::STR_imm(Cond cond, bool P, bool U, bool W, ArmReg Rn_index, ArmReg Rd_index, ArmImm12 imm12) { + cond_manager.CompileCond((ConditionCode)cond); + + LoadStoreCommon_AddrMode2(this, reg_alloc, P, W, + &JitX64::LoadAndStoreWordOrUnsignedByte_ImmediateOffset, + &JitX64::LoadAndStoreWordOrUnsignedByte_ImmediatePreIndexed, + &JitX64::LoadAndStoreWordOrUnsignedByte_ImmediatePostIndexed, + U, Rn_index, imm12); + + reg_alloc.FlushX64(ABI_PARAM2); + reg_alloc.LockX64(ABI_PARAM2); + + GetValueOfRegister(code, reg_alloc, GetReg15Value(), ABI_PARAM2, Rd_index); + + CompileCallHost(!current.EFlag ? &Store32LE : &Store32BE); + + reg_alloc.UnlockX64(ABI_PARAM1); + reg_alloc.UnlockX64(ABI_PARAM2); + + current.arm_pc += GetInstSize(); +} + +void JitX64::STR_reg(Cond cond, bool P, bool U, bool W, ArmReg Rn_index, ArmReg Rd_index, ArmImm5 imm5, ShiftType shift, ArmReg Rm_index) { + cond_manager.CompileCond((ConditionCode)cond); + + LoadStoreCommon_AddrMode2(this, reg_alloc, P, W, + &JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegisterOffset, + &JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegisterPreIndexed, + &JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegisterPostIndexed, + U, Rn_index, imm5, shift, Rm_index); + + reg_alloc.FlushX64(ABI_PARAM2); + reg_alloc.LockX64(ABI_PARAM2); + + GetValueOfRegister(code, reg_alloc, GetReg15Value(), ABI_PARAM2, Rd_index); + + CompileCallHost(!current.EFlag ? &Store32LE : &Store32BE); + + reg_alloc.UnlockX64(ABI_PARAM1); + reg_alloc.UnlockX64(ABI_PARAM2); + + current.arm_pc += GetInstSize(); +} + +void JitX64::STRB_imm(Cond cond, bool P, bool U, bool W, ArmReg Rn_index, ArmReg Rd_index, ArmImm12 imm12) { + cond_manager.CompileCond((ConditionCode)cond); + + LoadStoreCommon_AddrMode2(this, reg_alloc, P, W, + &JitX64::LoadAndStoreWordOrUnsignedByte_ImmediateOffset, + &JitX64::LoadAndStoreWordOrUnsignedByte_ImmediatePreIndexed, + &JitX64::LoadAndStoreWordOrUnsignedByte_ImmediatePostIndexed, + U, Rn_index, imm12); + + reg_alloc.FlushX64(ABI_PARAM2); + reg_alloc.LockX64(ABI_PARAM2); + + GetValueOfRegister(code, reg_alloc, GetReg15Value(), ABI_PARAM2, Rd_index); + + CompileCallHost(&Store8); + + reg_alloc.UnlockX64(ABI_PARAM1); + reg_alloc.UnlockX64(ABI_PARAM2); + + current.arm_pc += GetInstSize(); +} + +void JitX64::STRB_reg(Cond cond, bool P, bool U, bool W, ArmReg Rn_index, ArmReg Rd_index, ArmImm5 imm5, ShiftType shift, ArmReg Rm_index) { + cond_manager.CompileCond((ConditionCode)cond); + + LoadStoreCommon_AddrMode2(this, reg_alloc, P, W, + &JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegisterOffset, + &JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegisterPreIndexed, + &JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegisterPostIndexed, + U, Rn_index, imm5, shift, Rm_index); + + reg_alloc.FlushX64(ABI_PARAM2); + reg_alloc.LockX64(ABI_PARAM2); + + GetValueOfRegister(code, reg_alloc, GetReg15Value(), ABI_PARAM2, Rd_index); + + CompileCallHost(&Store8); + + reg_alloc.UnlockX64(ABI_PARAM1); + reg_alloc.UnlockX64(ABI_PARAM2); + + current.arm_pc += GetInstSize(); +} + +/** +* This function implements address resolution logic common to all the addressing mode 3 store/load instructions. +* The address is loaded into ABI_PARAM1. +*/ +template +static void LoadStoreCommon_AddrMode3(JitX64* jit, RegAlloc& reg_alloc, bool P, bool W, OffsetFn offset_fn, PreFn pre_fn, PostFn post_fn, Args... args) { + constexpr X64Reg address = ABI_PARAM1; + reg_alloc.FlushX64(address); + reg_alloc.LockX64(address); + + if (P) { + if (!W) { + (jit->*offset_fn)(address, args...); + } else { + (jit->*pre_fn)(address, args...); + } + } else { + if (!W) { + (jit->*post_fn)(address, args...); + } else { + ASSERT_MSG(false, "UNPREDICTABLE"); + } + } +} + +static ArmImm8 CombineImm8ab(ArmImm4 imm8a, ArmImm4 imm8b) { + return (imm8a << 4) | imm8b; +} + +// Load/Store Instructions: Addressing Mode 3 + +void JitX64::LDRD_imm(Cond cond, bool P, bool U, bool W, ArmReg Rn_index, ArmReg Rd_index, ArmImm4 imm8a, ArmImm4 imm8b) { + cond_manager.CompileCond((ConditionCode)cond); + + LoadStoreCommon_AddrMode3(this, reg_alloc, P, W, + &JitX64::LoadAndStoreWordOrUnsignedByte_ImmediateOffset, + &JitX64::LoadAndStoreWordOrUnsignedByte_ImmediatePreIndexed, + &JitX64::LoadAndStoreWordOrUnsignedByte_ImmediatePostIndexed, + U, Rn_index, CombineImm8ab(imm8a, imm8b)); + + CompileCallHost(!current.EFlag ? &Load64LE : &Load64BE); + + reg_alloc.UnlockX64(ABI_PARAM1); + reg_alloc.LockX64(ABI_RETURN); + + ASSERT_MSG(Rd_index % 2 == 0 && Rd_index != 14, "UNPREDICTABLE"); + + X64Reg Rd0 = reg_alloc.BindArmForWrite(Rd_index + 0); + X64Reg Rd1 = reg_alloc.BindArmForWrite(Rd_index + 1); + code->MOV(64, R(Rd0), R(ABI_RETURN)); + code->SHR(64, R(ABI_RETURN), Imm8(32)); + code->MOV(32, R(Rd1), R(ABI_RETURN)); + reg_alloc.UnlockArm(Rd_index + 0); + reg_alloc.UnlockArm(Rd_index + 1); + + reg_alloc.UnlockX64(ABI_RETURN); + + current.arm_pc += GetInstSize(); +} + +void JitX64::LDRD_reg(Cond cond, bool P, bool U, bool W, ArmReg Rn_index, ArmReg Rd_index, ArmReg Rm_index) { + cond_manager.CompileCond((ConditionCode)cond); + + LoadStoreCommon_AddrMode3(this, reg_alloc, P, W, + &JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegisterOffset, + &JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegisterPreIndexed, + &JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegisterPostIndexed, + U, Rn_index, 0, 0, Rm_index); + + CompileCallHost(!current.EFlag ? &Load64LE : &Load64BE); + + reg_alloc.UnlockX64(ABI_PARAM1); + reg_alloc.LockX64(ABI_RETURN); + + ASSERT_MSG(Rd_index % 2 == 0 && Rd_index != 14, "UNPREDICTABLE"); + + X64Reg Rd0 = reg_alloc.BindArmForWrite(Rd_index + 0); + X64Reg Rd1 = reg_alloc.BindArmForWrite(Rd_index + 1); + code->MOV(64, R(Rd0), R(ABI_RETURN)); + code->SHR(64, R(ABI_RETURN), Imm8(32)); + code->MOV(32, R(Rd1), R(ABI_RETURN)); + reg_alloc.UnlockArm(Rd_index + 0); + reg_alloc.UnlockArm(Rd_index + 1); + + reg_alloc.UnlockX64(ABI_RETURN); + + current.arm_pc += GetInstSize(); +} + +void JitX64::LDRH_imm(Cond cond, bool P, bool U, bool W, ArmReg Rn_index, ArmReg Rd_index, ArmImm4 imm8a, ArmImm4 imm8b) { + cond_manager.CompileCond((ConditionCode)cond); + + LoadStoreCommon_AddrMode3(this, reg_alloc, P, W, + &JitX64::LoadAndStoreWordOrUnsignedByte_ImmediateOffset, + &JitX64::LoadAndStoreWordOrUnsignedByte_ImmediatePreIndexed, + &JitX64::LoadAndStoreWordOrUnsignedByte_ImmediatePostIndexed, + U, Rn_index, CombineImm8ab(imm8a, imm8b)); + + CompileCallHost(!current.EFlag ? &Load16LE : &Load16BE); + + reg_alloc.UnlockX64(ABI_PARAM1); + reg_alloc.LockX64(ABI_RETURN); + + ASSERT_MSG(Rd_index != 15, "UNPREDICTABLE"); + + X64Reg Rd = reg_alloc.BindArmForWrite(Rd_index); + code->MOVZX(32, 16, Rd, R(ABI_RETURN)); + reg_alloc.UnlockArm(Rd_index); + + reg_alloc.UnlockX64(ABI_RETURN); + + current.arm_pc += GetInstSize(); +} + +void JitX64::LDRH_reg(Cond cond, bool P, bool U, bool W, ArmReg Rn_index, ArmReg Rd_index, ArmReg Rm_index) { + cond_manager.CompileCond((ConditionCode)cond); + + LoadStoreCommon_AddrMode3(this, reg_alloc, P, W, + &JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegisterOffset, + &JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegisterPreIndexed, + &JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegisterPostIndexed, + U, Rn_index, 0, 0, Rm_index); + + CompileCallHost(!current.EFlag ? &Load16LE : &Load16BE); + + reg_alloc.UnlockX64(ABI_PARAM1); + reg_alloc.LockX64(ABI_RETURN); + + ASSERT_MSG(Rd_index != 15, "UNPREDICTABLE"); + + X64Reg Rd = reg_alloc.BindArmForWrite(Rd_index); + code->MOVZX(32, 16, Rd, R(ABI_RETURN)); + reg_alloc.UnlockArm(Rd_index); + + reg_alloc.UnlockX64(ABI_RETURN); + + current.arm_pc += GetInstSize(); +} + +void JitX64::LDRSB_imm(Cond cond, bool P, bool U, bool W, ArmReg Rn_index, ArmReg Rd_index, ArmImm4 imm8a, ArmImm4 imm8b) { + cond_manager.CompileCond((ConditionCode)cond); + + LoadStoreCommon_AddrMode3(this, reg_alloc, P, W, + &JitX64::LoadAndStoreWordOrUnsignedByte_ImmediateOffset, + &JitX64::LoadAndStoreWordOrUnsignedByte_ImmediatePreIndexed, + &JitX64::LoadAndStoreWordOrUnsignedByte_ImmediatePostIndexed, + U, Rn_index, CombineImm8ab(imm8a, imm8b)); + + CompileCallHost(&Load8); + + reg_alloc.UnlockX64(ABI_PARAM1); + reg_alloc.LockX64(ABI_RETURN); + + ASSERT_MSG(Rd_index != 15, "UNPREDICTABLE"); + + X64Reg Rd = reg_alloc.BindArmForWrite(Rd_index); + code->MOVSX(32, 8, Rd, R(ABI_RETURN)); + reg_alloc.UnlockArm(Rd_index); + + reg_alloc.UnlockX64(ABI_RETURN); + + current.arm_pc += GetInstSize(); +} + +void JitX64::LDRSB_reg(Cond cond, bool P, bool U, bool W, ArmReg Rn_index, ArmReg Rd_index, ArmReg Rm_index) { + cond_manager.CompileCond((ConditionCode)cond); + + LoadStoreCommon_AddrMode3(this, reg_alloc, P, W, + &JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegisterOffset, + &JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegisterPreIndexed, + &JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegisterPostIndexed, + U, Rn_index, 0, 0, Rm_index); + + CompileCallHost(&Load8); + + reg_alloc.UnlockX64(ABI_PARAM1); + reg_alloc.LockX64(ABI_RETURN); + + ASSERT_MSG(Rd_index != 15, "UNPREDICTABLE"); + + X64Reg Rd = reg_alloc.BindArmForWrite(Rd_index); + code->MOVSX(32, 8, Rd, R(ABI_RETURN)); + reg_alloc.UnlockArm(Rd_index); + + reg_alloc.UnlockX64(ABI_RETURN); + + current.arm_pc += GetInstSize(); +} + +void JitX64::LDRSH_imm(Cond cond, bool P, bool U, bool W, ArmReg Rn_index, ArmReg Rd_index, ArmImm4 imm8a, ArmImm4 imm8b) { + cond_manager.CompileCond((ConditionCode)cond); + + LoadStoreCommon_AddrMode3(this, reg_alloc, P, W, + &JitX64::LoadAndStoreWordOrUnsignedByte_ImmediateOffset, + &JitX64::LoadAndStoreWordOrUnsignedByte_ImmediatePreIndexed, + &JitX64::LoadAndStoreWordOrUnsignedByte_ImmediatePostIndexed, + U, Rn_index, CombineImm8ab(imm8a, imm8b)); + + CompileCallHost(!current.EFlag ? &Load16LE : &Load16BE); + + reg_alloc.UnlockX64(ABI_PARAM1); + reg_alloc.LockX64(ABI_RETURN); + + ASSERT_MSG(Rd_index != 15, "UNPREDICTABLE"); + + X64Reg Rd = reg_alloc.BindArmForWrite(Rd_index); + code->MOVSX(32, 16, Rd, R(ABI_RETURN)); + reg_alloc.UnlockArm(Rd_index); + + reg_alloc.UnlockX64(ABI_RETURN); + + current.arm_pc += GetInstSize(); +} + +void JitX64::LDRSH_reg(Cond cond, bool P, bool U, bool W, ArmReg Rn_index, ArmReg Rd_index, ArmReg Rm_index) { + cond_manager.CompileCond((ConditionCode)cond); + + LoadStoreCommon_AddrMode3(this, reg_alloc, P, W, + &JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegisterOffset, + &JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegisterPreIndexed, + &JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegisterPostIndexed, + U, Rn_index, 0, 0, Rm_index); + + CompileCallHost(!current.EFlag ? &Load16LE : &Load16BE); + + reg_alloc.UnlockX64(ABI_PARAM1); + reg_alloc.LockX64(ABI_RETURN); + + ASSERT_MSG(Rd_index != 15, "UNPREDICTABLE"); + + X64Reg Rd = reg_alloc.BindArmForWrite(Rd_index); + code->MOVSX(32, 16, Rd, R(ABI_RETURN)); + reg_alloc.UnlockArm(Rd_index); + + reg_alloc.UnlockX64(ABI_RETURN); + + current.arm_pc += GetInstSize(); +} + +void JitX64::STRD_imm(Cond cond, bool P, bool U, bool W, ArmReg Rn_index, ArmReg Rd_index, ArmImm4 imm8a, ArmImm4 imm8b) { + cond_manager.CompileCond((ConditionCode)cond); + + LoadStoreCommon_AddrMode3(this, reg_alloc, P, W, + &JitX64::LoadAndStoreWordOrUnsignedByte_ImmediateOffset, + &JitX64::LoadAndStoreWordOrUnsignedByte_ImmediatePreIndexed, + &JitX64::LoadAndStoreWordOrUnsignedByte_ImmediatePostIndexed, + U, Rn_index, CombineImm8ab(imm8a, imm8b)); + + reg_alloc.FlushX64(ABI_PARAM2); + reg_alloc.LockX64(ABI_PARAM2); + reg_alloc.FlushX64(ABI_PARAM3); + reg_alloc.LockX64(ABI_PARAM3); + + ASSERT_MSG(Rd_index % 2 == 0 && Rd_index != 14, ""); + + GetValueOfRegister(code, reg_alloc, GetReg15Value(), ABI_PARAM2, Rd_index + 0); + GetValueOfRegister(code, reg_alloc, GetReg15Value(), ABI_PARAM3, Rd_index + 1); + + CompileCallHost(!current.EFlag ? &Store64LE : &Store64BE); + + reg_alloc.UnlockX64(ABI_PARAM1); + reg_alloc.UnlockX64(ABI_PARAM2); + reg_alloc.UnlockX64(ABI_PARAM3); + + current.arm_pc += GetInstSize(); +} + +void JitX64::STRD_reg(Cond cond, bool P, bool U, bool W, ArmReg Rn_index, ArmReg Rd_index, ArmReg Rm_index) { + cond_manager.CompileCond((ConditionCode)cond); + + LoadStoreCommon_AddrMode3(this, reg_alloc, P, W, + &JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegisterOffset, + &JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegisterPreIndexed, + &JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegisterPostIndexed, + U, Rn_index, 0, 0, Rm_index); + + reg_alloc.FlushX64(ABI_PARAM2); + reg_alloc.LockX64(ABI_PARAM2); + reg_alloc.FlushX64(ABI_PARAM3); + reg_alloc.LockX64(ABI_PARAM3); + + ASSERT_MSG(Rd_index % 2 == 0 && Rd_index != 14, ""); + + GetValueOfRegister(code, reg_alloc, GetReg15Value(), ABI_PARAM2, Rd_index + 0); + GetValueOfRegister(code, reg_alloc, GetReg15Value(), ABI_PARAM3, Rd_index + 1); + + CompileCallHost(!current.EFlag ? &Store64LE : &Store64BE); + + reg_alloc.UnlockX64(ABI_PARAM1); + reg_alloc.UnlockX64(ABI_PARAM2); + reg_alloc.UnlockX64(ABI_PARAM3); + + current.arm_pc += GetInstSize(); +} + +void JitX64::STRH_imm(Cond cond, bool P, bool U, bool W, ArmReg Rn_index, ArmReg Rd_index, ArmImm4 imm8a, ArmImm4 imm8b) { + cond_manager.CompileCond((ConditionCode)cond); + + LoadStoreCommon_AddrMode3(this, reg_alloc, P, W, + &JitX64::LoadAndStoreWordOrUnsignedByte_ImmediateOffset, + &JitX64::LoadAndStoreWordOrUnsignedByte_ImmediatePreIndexed, + &JitX64::LoadAndStoreWordOrUnsignedByte_ImmediatePostIndexed, + U, Rn_index, CombineImm8ab(imm8a, imm8b)); + + reg_alloc.FlushX64(ABI_PARAM2); + reg_alloc.LockX64(ABI_PARAM2); + + GetValueOfRegister(code, reg_alloc, GetReg15Value(), ABI_PARAM2, Rd_index); + + CompileCallHost(!current.EFlag ? &Store16LE : &Store16BE); + + reg_alloc.UnlockX64(ABI_PARAM1); + reg_alloc.UnlockX64(ABI_PARAM2); + + current.arm_pc += GetInstSize(); +} + +void JitX64::STRH_reg(Cond cond, bool P, bool U, bool W, ArmReg Rn_index, ArmReg Rd_index, ArmReg Rm_index) { + cond_manager.CompileCond((ConditionCode)cond); + + LoadStoreCommon_AddrMode3(this, reg_alloc, P, W, + &JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegisterOffset, + &JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegisterPreIndexed, + &JitX64::LoadAndStoreWordOrUnsignedByte_ScaledRegisterPostIndexed, + U, Rn_index, 0, 0, Rm_index); + + reg_alloc.FlushX64(ABI_PARAM2); + reg_alloc.LockX64(ABI_PARAM2); + + GetValueOfRegister(code, reg_alloc, GetReg15Value(), ABI_PARAM2, Rd_index); + + CompileCallHost(!current.EFlag ? &Store16LE : &Store16BE); + + reg_alloc.UnlockX64(ABI_PARAM1); + reg_alloc.UnlockX64(ABI_PARAM2); + + current.arm_pc += GetInstSize(); +} + void JitX64::LDRBT() { CompileInterpretInstruction(); } -void JitX64::LDRD_imm() { CompileInterpretInstruction(); } -void JitX64::LDRD_reg() { CompileInterpretInstruction(); } -void JitX64::LDRH_imm() { CompileInterpretInstruction(); } -void JitX64::LDRH_reg() { CompileInterpretInstruction(); } void JitX64::LDRHT() { CompileInterpretInstruction(); } -void JitX64::LDRSB_imm() { CompileInterpretInstruction(); } -void JitX64::LDRSB_reg() { CompileInterpretInstruction(); } void JitX64::LDRSBT() { CompileInterpretInstruction(); } -void JitX64::LDRSH_imm() { CompileInterpretInstruction(); } -void JitX64::LDRSH_reg() { CompileInterpretInstruction(); } void JitX64::LDRSHT() { CompileInterpretInstruction(); } void JitX64::LDRT() { CompileInterpretInstruction(); } -void JitX64::STR_imm() { CompileInterpretInstruction(); } -void JitX64::STR_reg() { CompileInterpretInstruction(); } -void JitX64::STRB_imm() { CompileInterpretInstruction(); } -void JitX64::STRB_reg() { CompileInterpretInstruction(); } void JitX64::STRBT() { CompileInterpretInstruction(); } -void JitX64::STRD_imm() { CompileInterpretInstruction(); } -void JitX64::STRD_reg() { CompileInterpretInstruction(); } -void JitX64::STRH_imm() { CompileInterpretInstruction(); } -void JitX64::STRH_reg() { CompileInterpretInstruction(); } void JitX64::STRHT() { CompileInterpretInstruction(); } void JitX64::STRT() { CompileInterpretInstruction(); } diff --git a/src/core/arm/jit_x64/interface.cpp b/src/core/arm/jit_x64/interface.cpp index 46e2f267f..806e23e57 100644 --- a/src/core/arm/jit_x64/interface.cpp +++ b/src/core/arm/jit_x64/interface.cpp @@ -29,6 +29,10 @@ public: run_jit = this->GetCodePtr(); + // This serves two purposes: + // 1. It saves all the registers we as a callee need to save. + // 2. It aligns the stack so that the code the JIT emits can assume + // that the stack is appropriately aligned for CALLs. ABI_PushRegistersAndAdjustStack(ABI_ALL_CALLEE_SAVED, 8); MOV(64, MDisp(ABI_PARAM1, offsetof(JitX64::JitState, save_host_RSP)), R(RSP)); diff --git a/src/core/arm/jit_x64/interpret.cpp b/src/core/arm/jit_x64/interpret.cpp index 8ee2d735a..8dd6c42fc 100644 --- a/src/core/arm/jit_x64/interpret.cpp +++ b/src/core/arm/jit_x64/interpret.cpp @@ -44,25 +44,15 @@ void JitX64::CompileInterpretInstruction() { CompileUpdateCycles(); - ASSERT(reg_alloc.JitStateReg() != RSP); - code->MOV(64, R(RSP), MJitStateHostReturnRSP()); - code->MOV(64, R(Gen::ABI_PARAM1), R(reg_alloc.JitStateReg())); - code->MOV(64, R(Gen::ABI_PARAM2), Imm64(current.arm_pc)); - code->MOV(64, R(Gen::ABI_PARAM3), Imm64(current.TFlag)); - code->MOV(64, R(Gen::ABI_PARAM4), Imm64(current.EFlag)); + code->MOV(64, R(ABI_PARAM1), R(reg_alloc.JitStateReg())); + code->MOV(64, R(ABI_PARAM2), Imm64(current.arm_pc)); + code->MOV(64, R(ABI_PARAM3), Imm64(current.TFlag)); + code->MOV(64, R(ABI_PARAM4), Imm64(current.EFlag)); const void *const fn = reinterpret_cast(&CallInterpreter); + CompileCallHost(fn); - const u64 distance = reinterpret_cast(fn) - (reinterpret_cast(code->GetCodePtr()) + 5); - if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { - // Far call - code->MOV(64, R(RAX), ImmPtr(fn)); - code->CALLptr(R(RAX)); - } else { - code->CALL(fn); - } - - code->MOV(64, R(reg_alloc.JitStateReg()), R(Gen::ABI_RETURN)); + code->MOV(64, R(reg_alloc.JitStateReg()), R(ABI_RETURN)); // Return to dispatch code->JMPptr(MJitStateHostReturnRIP()); diff --git a/src/core/arm/jit_x64/jit_x64.cpp b/src/core/arm/jit_x64/jit_x64.cpp index a0b892aa3..a3ae154ef 100644 --- a/src/core/arm/jit_x64/jit_x64.cpp +++ b/src/core/arm/jit_x64/jit_x64.cpp @@ -159,6 +159,24 @@ void JitX64::CompileSingleThumbInstruction() { } } +void JitX64::CompileCallHost(const void* const fn) { + // There is no need to setup the stack as the stored RSP has already been properly aligned. + + reg_alloc.FlushABICallerSaved(); + + ASSERT(reg_alloc.JitStateReg() != RSP); + code->MOV(64, R(RSP), MJitStateHostReturnRSP()); + + const u64 distance = reinterpret_cast(fn) - (reinterpret_cast(code->GetCodePtr()) + 5); + if (distance >= 0x0000000080000000ULL && distance < 0xFFFFFFFF80000000ULL) { + // Far call + code->MOV(64, R(RAX), ImmPtr(fn)); + code->CALLptr(R(RAX)); + } else { + code->CALL(fn); + } +} + // Convenience functions: // We static_assert types because anything that calls these functions makes those assumptions. // If the types of the variables are changed please update all code that calls these functions. diff --git a/src/core/arm/jit_x64/jit_x64.h b/src/core/arm/jit_x64/jit_x64.h index 086e55fb2..66ed3aa26 100644 --- a/src/core/arm/jit_x64/jit_x64.h +++ b/src/core/arm/jit_x64/jit_x64.h @@ -94,7 +94,12 @@ private: Gen::OpArg MJitStateExclusiveTag(); Gen::OpArg MJitStateExclusiveState(); - u32 GetReg15Value() const { return static_cast((current.arm_pc & ~0x1) + GetInstSize() * 2); } + u32 GetReg15Value() const { + return (current.arm_pc & ~0x1) + static_cast(GetInstSize() * 2); + } + u32 GetReg15Value_WordAligned() const { + return (current.arm_pc & ~0x3) + static_cast(GetInstSize() * 2); + } void UpdateFlagsZVCN() { cond_manager.FlagsDirty(); @@ -139,6 +144,9 @@ private: private: void CompileInterpretInstruction(); + void CompileCallHost(const void* const fn); + /// dest must be a temporary that contains a copy of the value of Rm + void CompileShifter_imm(Gen::X64Reg dest, ArmImm5 imm5, ShiftType shift, bool do_shifter_carry_out); // Branch instructions void B(Cond cond, ArmImm24 imm24) override; @@ -158,8 +166,8 @@ private: void STC() override; // Data processing instructions - void CompileDataProcessingHelper(ArmReg Rn_index, ArmReg Rd_index, std::function body); - void CompileDataProcessingHelper_Reverse(ArmReg Rn_index, ArmReg Rd_index, std::function body); + void CompileDataProcessingHelper(ArmReg Rn, ArmReg Rd, std::function body); + void CompileDataProcessingHelper_Reverse(ArmReg Rn, ArmReg Rd, std::function body); Gen::X64Reg CompileDataProcessingHelper_reg(ArmImm5 imm5, ShiftType shift, ArmReg Rm, bool do_shifter_carry_out); Gen::X64Reg CompileDataProcessingHelper_rsr(ArmReg Rs, ShiftType shift, ArmReg Rm, bool do_shifter_carry_out); void ADC_imm(Cond cond, bool S, ArmReg Rn, ArmReg Rd, int rotate, ArmImm8 imm8) override; @@ -238,32 +246,41 @@ private: void YIELD() override; // Load/Store instructions - void LDR_imm() override; - void LDR_reg() override; - void LDRB_imm() override; - void LDRB_reg() override; + void LoadAndStoreWordOrUnsignedByte_Immediate_Helper(Gen::X64Reg dest, bool U, ArmReg Rn, ArmImm12 imm12); + void LoadAndStoreWordOrUnsignedByte_Register_Helper(Gen::X64Reg dest, bool U, ArmReg Rn, ArmReg Rm); + void LoadAndStoreWordOrUnsignedByte_ScaledRegister_Helper(Gen::X64Reg dest, bool U, ArmReg Rn, ArmImm5 imm5, ShiftType shift, ArmReg Rm); + void LoadAndStoreWordOrUnsignedByte_ImmediateOffset(Gen::X64Reg dest, bool U, ArmReg Rn, ArmImm12 imm12); + void LoadAndStoreWordOrUnsignedByte_ImmediatePreIndexed(Gen::X64Reg dest, bool U, ArmReg Rn_index, ArmImm12 imm12); + void LoadAndStoreWordOrUnsignedByte_ImmediatePostIndexed(Gen::X64Reg dest, bool U, ArmReg Rn_index, ArmImm12 imm12); + void LoadAndStoreWordOrUnsignedByte_ScaledRegisterOffset(Gen::X64Reg dest, bool U, ArmReg Rn, ArmImm5 imm5, ShiftType shift, ArmReg Rm); + void LoadAndStoreWordOrUnsignedByte_ScaledRegisterPreIndexed(Gen::X64Reg dest, bool U, ArmReg Rn, ArmImm5 imm5, ShiftType shift, ArmReg Rm); + void LoadAndStoreWordOrUnsignedByte_ScaledRegisterPostIndexed(Gen::X64Reg dest, bool U, ArmReg Rn, ArmImm5 imm5, ShiftType shift, ArmReg Rm); + void LDR_imm(Cond cond, bool P, bool U, bool W, ArmReg Rn, ArmReg Rd, ArmImm11 imm11) override; + void LDR_reg(Cond cond, bool P, bool U, bool W, ArmReg Rn, ArmReg Rd, ArmImm5 imm5, ShiftType shift, ArmReg Rm) override; + void LDRB_imm(Cond cond, bool P, bool U, bool W, ArmReg Rn, ArmReg Rd, ArmImm11 imm11) override; + void LDRB_reg(Cond cond, bool P, bool U, bool W, ArmReg Rn, ArmReg Rd, ArmImm5 imm5, ShiftType shift, ArmReg Rm) override; void LDRBT() override; - void LDRD_imm() override; - void LDRD_reg() override; - void LDRH_imm() override; - void LDRH_reg() override; + void LDRD_imm(Cond cond, bool P, bool U, bool W, ArmReg Rn, ArmReg Rd, ArmImm4 imm8a, ArmImm4 imm8b) override; + void LDRD_reg(Cond cond, bool P, bool U, bool W, ArmReg Rn, ArmReg Rd, ArmReg Rm) override; + void LDRH_imm(Cond cond, bool P, bool U, bool W, ArmReg Rn, ArmReg Rd, ArmImm4 imm8a, ArmImm4 imm8b) override; + void LDRH_reg(Cond cond, bool P, bool U, bool W, ArmReg Rn, ArmReg Rd, ArmReg Rm) override; void LDRHT() override; - void LDRSB_imm() override; - void LDRSB_reg() override; + void LDRSB_imm(Cond cond, bool P, bool U, bool W, ArmReg Rn, ArmReg Rd, ArmImm4 imm8a, ArmImm4 imm8b) override; + void LDRSB_reg(Cond cond, bool P, bool U, bool W, ArmReg Rn, ArmReg Rd, ArmReg Rm) override; void LDRSBT() override; - void LDRSH_imm() override; - void LDRSH_reg() override; + void LDRSH_imm(Cond cond, bool P, bool U, bool W, ArmReg Rn, ArmReg Rd, ArmImm4 imm8a, ArmImm4 imm8b) override; + void LDRSH_reg(Cond cond, bool P, bool U, bool W, ArmReg Rn, ArmReg Rd, ArmReg Rm) override; void LDRSHT() override; void LDRT() override; - void STR_imm() override; - void STR_reg() override; - void STRB_imm() override; - void STRB_reg() override; + void STR_imm(Cond cond, bool P, bool U, bool W, ArmReg Rn, ArmReg Rd, ArmImm11 imm11) override; + void STR_reg(Cond cond, bool P, bool U, bool W, ArmReg Rn, ArmReg Rd, ArmImm5 imm5, ShiftType shift, ArmReg Rm) override; + void STRB_imm(Cond cond, bool P, bool U, bool W, ArmReg Rn, ArmReg Rd, ArmImm11 imm11) override; + void STRB_reg(Cond cond, bool P, bool U, bool W, ArmReg Rn, ArmReg Rd, ArmImm5 imm5, ShiftType shift, ArmReg Rm) override; void STRBT() override; - void STRD_imm() override; - void STRD_reg() override; - void STRH_imm() override; - void STRH_reg() override; + void STRD_imm(Cond cond, bool P, bool U, bool W, ArmReg Rn, ArmReg Rd, ArmImm4 imm8a, ArmImm4 imm8b) override; + void STRD_reg(Cond cond, bool P, bool U, bool W, ArmReg Rn, ArmReg Rd, ArmReg Rm) override; + void STRH_imm(Cond cond, bool P, bool U, bool W, ArmReg Rn, ArmReg Rd, ArmImm4 imm8a, ArmImm4 imm8b) override; + void STRH_reg(Cond cond, bool P, bool U, bool W, ArmReg Rn, ArmReg Rd, ArmReg Rm) override; void STRHT() override; void STRT() override; diff --git a/src/core/arm/jit_x64/reg_alloc.cpp b/src/core/arm/jit_x64/reg_alloc.cpp index 556957999..ff3d6fd88 100644 --- a/src/core/arm/jit_x64/reg_alloc.cpp +++ b/src/core/arm/jit_x64/reg_alloc.cpp @@ -6,6 +6,7 @@ #include #include "common/assert.h" +#include "common/x64/abi.h" #include "core/arm/jit_x64/reg_alloc.h" @@ -81,9 +82,14 @@ void RegAlloc::FlushX64(Gen::X64Reg x64_reg) { arm_state.location = MJitStateCpuReg(state.arm_reg); break; } - case X64State::State::DirtyArmReg: + case X64State::State::DirtyArmReg: { + ArmState& arm_state = arm_gpr[state.arm_reg]; + ASSERT(arm_state.location.IsSimpleReg()); + ASSERT(arm_state.location.GetSimpleReg() == x64_reg); FlushArm(state.arm_reg); + ASSERT(state.state == X64State::State::Free); break; + } default: UNREACHABLE(); break; @@ -266,6 +272,23 @@ void RegAlloc::FlushEverything() { } } +void RegAlloc::FlushABICallerSaved() { + for (auto i : x64_reg_to_index) { + if (ABI_ALL_CALLER_SAVED.m_val & (1 << i.first)) { + X64State& x64_state = x64_gpr[i.second]; + if (x64_state.state != X64State::State::UserManuallyLocked) { + ASSERT(!x64_state.locked); + FlushX64(i.first); + ASSERT(x64_state.state == X64State::State::Free); + } else { + ASSERT(x64_state.locked); + } + } + } + + ASSERT(!ABI_ALL_CALLER_SAVED[JitStateReg()]); +} + Gen::X64Reg RegAlloc::GetX64For(ArmReg arm_reg) { const ArmState& arm_state = arm_gpr[arm_reg]; diff --git a/src/core/arm/jit_x64/reg_alloc.h b/src/core/arm/jit_x64/reg_alloc.h index 8d5c113a2..ddad18a6b 100644 --- a/src/core/arm/jit_x64/reg_alloc.h +++ b/src/core/arm/jit_x64/reg_alloc.h @@ -168,6 +168,13 @@ public: */ void FlushEverything(); + /** + * Flush only those registers that are caller-saved in the ABI. + * All registers must be unlocked except those locked by LockX64. + * (We assume you know what you're doing if you've manually locked registers.) + */ + void FlushABICallerSaved(); + // Debug: void AssertNoLocked(); diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 71d59f1aa..034dcdfea 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -10,6 +10,7 @@ if(ARCHITECTURE_x86_64) core/arm/jit_x64/fuzz_arm_branch.cpp core/arm/jit_x64/fuzz_arm_common.cpp core/arm/jit_x64/fuzz_arm_data_processing.cpp + core/arm/jit_x64/fuzz_arm_load_store.cpp core/arm/jit_x64/fuzz_thumb.cpp ) diff --git a/src/tests/core/arm/jit_x64/fuzz_arm_common.cpp b/src/tests/core/arm/jit_x64/fuzz_arm_common.cpp index 13c6fb8ff..92df73c62 100644 --- a/src/tests/core/arm/jit_x64/fuzz_arm_common.cpp +++ b/src/tests/core/arm/jit_x64/fuzz_arm_common.cpp @@ -4,6 +4,8 @@ #include #include +#include +#include #include @@ -15,6 +17,7 @@ #include "core/arm/jit_x64/interface.h" #include "core/core.h" #include "core/memory_setup.h" +#include "core/mmio.h" #include "tests/core/arm/jit_x64/fuzz_arm_common.h" #include "tests/core/arm/jit_x64/rand_int.h" @@ -42,16 +45,53 @@ std::pair FromBitString32(const char* str) { return{ bits, mask }; } +class TestMemory final : public Memory::MMIORegion { +public: + static constexpr size_t CODE_MEMORY_SIZE = 4096 * 2; + std::array code_mem{}; + + u8 Read8(VAddr addr) override { return addr; } + u16 Read16(VAddr addr) override { return addr; } + u32 Read32(VAddr addr) override { + if (addr < CODE_MEMORY_SIZE) { + return code_mem[addr/4]; + } else { + return addr; + } + } + u64 Read64(VAddr addr) override { return addr; } + + struct WriteRecord { + WriteRecord(size_t size, VAddr addr, u64 data) : size(size), addr(addr), data(data) {} + size_t size; + VAddr addr; + u64 data; + bool operator==(const WriteRecord& o) const { + return std::tie(size, addr, data) == std::tie(o.size, o.addr, o.data); + } + }; + + std::vector recording; + + void Write8(VAddr addr, u8 data) override { recording.emplace_back(1, addr, data); } + void Write16(VAddr addr, u16 data) override { recording.emplace_back(2, addr, data); } + void Write32(VAddr addr, u32 data) override { recording.emplace_back(4, addr, data); } + void Write64(VAddr addr, u64 data) override { recording.emplace_back(8, addr, data); } +}; + void FuzzJit(const int instruction_count, const int instructions_to_execute_count, const int run_count, const std::function instruction_generator) { // Init core Core::Init(); SCOPE_EXIT({ Core::Shutdown(); }); // Prepare memory - constexpr size_t MEMORY_SIZE = 4096 * 2; - std::array test_mem{}; - Memory::MapMemoryRegion(0, MEMORY_SIZE, test_mem.data()); - SCOPE_EXIT({ Memory::UnmapRegion(0, MEMORY_SIZE); }); + std::shared_ptr test_mem = std::make_unique(); + Memory::MapIoRegion(0x00000000, 0x80000000, test_mem); + Memory::MapIoRegion(0x80000000, 0x80000000, test_mem); + SCOPE_EXIT({ + Memory::UnmapRegion(0x00000000, 0x80000000); + Memory::UnmapRegion(0x80000000, 0x80000000); + }); // Prepare test subjects JitX64::ARM_Jit jit(PrivilegeMode::USER32MODE); @@ -81,14 +121,18 @@ void FuzzJit(const int instruction_count, const int instructions_to_execute_coun for (int i = 0; i < instruction_count; i++) { u32 inst = instruction_generator(); - - Memory::Write32(i * 4, inst); + test_mem->code_mem[i] = inst; } - Memory::Write32(instruction_count * 4, 0xEAFFFFFE); // b +#0 // busy wait loop + test_mem->code_mem[instruction_count] = 0xEAFFFFFE; // b +#0 // busy wait loop + test_mem->recording.clear(); interp.ExecuteInstructions(instructions_to_execute_count); + auto interp_mem_recording = test_mem->recording; + + test_mem->recording.clear(); jit.ExecuteInstructions(instructions_to_execute_count); + auto jit_mem_recording = test_mem->recording; bool pass = true; @@ -96,6 +140,7 @@ void FuzzJit(const int instruction_count, const int instructions_to_execute_coun for (int i = 0; i <= 15; i++) { if (interp.GetReg(i) != jit.GetReg(i)) pass = false; } + if (interp_mem_recording != jit_mem_recording) pass = false; if (!pass) { printf("Failed at execution number %i\n", run_number); @@ -112,6 +157,18 @@ void FuzzJit(const int instruction_count, const int instructions_to_execute_coun } printf("CPSR: %08x %08x %s\n", interp.GetCPSR(), jit.GetCPSR(), interp.GetCPSR() != jit.GetCPSR() ? "*" : ""); + if (interp_mem_recording != jit_mem_recording) { + printf("memory write recording mismatch *\n"); + size_t i = 0; + while (i < interp_mem_recording.size() && i < jit_mem_recording.size()) { + if (i < interp_mem_recording.size()) + printf("interp: %i %08x %08x\n", interp_mem_recording[i].size, interp_mem_recording[i].addr, interp_mem_recording[i].data); + if (i < jit_mem_recording.size()) + printf("jit : %i %08x %08x\n", jit_mem_recording[i].size, jit_mem_recording[i].addr, jit_mem_recording[i].data); + i++; + } + } + printf("\nInterpreter walkthrough:\n"); interp.ClearCache(); interp.SetPC(0); @@ -120,6 +177,7 @@ void FuzzJit(const int instruction_count, const int instructions_to_execute_coun interp.SetReg(i, initial_regs[i]); printf("%4i: %08x\n", i, interp.GetReg(i)); } + test_mem->recording.clear(); for (int inst = 0; inst < instruction_count; inst++) { printf("%s\n", ARM_Disasm::Disassemble(inst * 4, Memory::Read32(inst * 4)).c_str()); interp.Step(); diff --git a/src/tests/core/arm/jit_x64/fuzz_arm_load_store.cpp b/src/tests/core/arm/jit_x64/fuzz_arm_load_store.cpp new file mode 100644 index 000000000..e4a24505b --- /dev/null +++ b/src/tests/core/arm/jit_x64/fuzz_arm_load_store.cpp @@ -0,0 +1,96 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include + +#include + +#include "common/common_types.h" + +#include "tests/core/arm/jit_x64/rand_int.h" +#include "tests/core/arm/jit_x64/fuzz_arm_common.h" + +TEST_CASE("Fuzz ARM load/store instructions (byte, half-word, word)", "[JitX64]") { + const std::array, 16> instructions = {{ + FromBitString32("cccc010pu0w1nnnnddddvvvvvvvvvvvv"), // LDR_imm + FromBitString32("cccc011pu0w1nnnnddddvvvvvrr0mmmm"), // LDR_reg + FromBitString32("cccc010pu1w1nnnnddddvvvvvvvvvvvv"), // LDRB_imm + FromBitString32("cccc011pu1w1nnnnddddvvvvvrr0mmmm"), // LDRB_reg + FromBitString32("cccc010pu0w0nnnnddddvvvvvvvvvvvv"), // STR_imm + FromBitString32("cccc011pu0w0nnnnddddvvvvvrr0mmmm"), // STR_reg + FromBitString32("cccc010pu1w0nnnnddddvvvvvvvvvvvv"), // STRB_imm + FromBitString32("cccc011pu1w0nnnnddddvvvvvrr0mmmm"), // STRB_reg + FromBitString32("cccc000pu1w1nnnnddddvvvv1011vvvv"), // LDRH_imm + FromBitString32("cccc000pu0w1nnnndddd00001011mmmm"), // LDRH_reg + FromBitString32("cccc000pu1w1nnnnddddvvvv1101vvvv"), // LDRSB_imm + FromBitString32("cccc000pu0w1nnnndddd00001101mmmm"), // LDRSB_reg + FromBitString32("cccc000pu1w1nnnnddddvvvv1111vvvv"), // LDRSH_imm + FromBitString32("cccc000pu0w1nnnndddd00001111mmmm"), // LDRSH_reg + FromBitString32("cccc000pu1w0nnnnddddvvvv1011vvvv"), // STRH_imm + FromBitString32("cccc000pu0w0nnnndddd00001011mmmm"), // STRH_reg + }}; + + auto instruction_select = [&]() -> u32 { + size_t inst_index = RandInt(0, instructions.size() - 1); + + u32 cond = 0xE; + // Have a one-in-twenty-five chance of actually having a cond. + if (RandInt(1, 25) == 1) { + cond = RandInt(0x0, 0xD); + } + + u32 Rn = RandInt(0, 14); + u32 Rd = RandInt(0, 14); + u32 W = 0; + u32 P = RandInt(0, 1); + if (P) W = RandInt(0, 1); + u32 U = RandInt(0, 1); + u32 rand = RandInt(0, 0xF); + u32 Rm = RandInt(0, 14); + + u32 assemble_randoms = (Rm << 0) | (rand << 4) | (Rd << 12) | (Rn << 16) | (W << 21) | (U << 23) | (P << 24) | (cond << 28); + + return instructions[inst_index].first | (assemble_randoms & (~instructions[inst_index].second)); + }; + + SECTION("short blocks") { + FuzzJit(1, 2, 5000, instruction_select); + } +} + +TEST_CASE("Fuzz ARM load/store instructions (double-word)", "[JitX64]") { + const std::array, 4> instructions = {{ + FromBitString32("cccc000pu1w0nnnnddddvvvv1101vvvv"), // LDRD_imm + FromBitString32("cccc000pu0w0nnnndddd00001101mmmm"), // LDRD_reg + FromBitString32("cccc000pu1w0nnnnddddvvvv1111vvvv"), // STRD_imm + FromBitString32("cccc000pu0w0nnnndddd00001111mmmm"), // STRD_reg + }}; + + auto instruction_select = [&]() -> u32 { + size_t inst_index = RandInt(0, instructions.size() - 1); + + u32 cond = 0xE; + // Have a one-in-twenty-five chance of actually having a cond. + if (RandInt(1, 25) == 1) { + cond = RandInt(0x0, 0xD); + } + + u32 Rn = RandInt(0, 6) * 2; + u32 Rd = RandInt(0, 6) * 2; + u32 W = 0; + u32 P = RandInt(0, 1); + if (P) W = RandInt(0, 1); + u32 U = RandInt(0, 1); + u32 rand = RandInt(0, 0xF); + u32 Rm = RandInt(0, 14); + + u32 assemble_randoms = (Rm << 0) | (rand << 4) | (Rd << 12) | (Rn << 16) | (W << 21) | (U << 23) | (P << 24) | (cond << 28); + + return instructions[inst_index].first | (assemble_randoms & (~instructions[inst_index].second)); + }; + + SECTION("short blocks") { + FuzzJit(1, 2, 5000, instruction_select); + } +} \ No newline at end of file