externals: Update Xbyak to 5.96

I made a request on the Xbyak issue tracker to allow some constructors
to be constexpr in order to avoid static constructors from needing to
execute for some of our register constants.

This request was implemented, so this updates Xbyak so that we can make
use of it.
This commit is contained in:
Lioncash 2020-08-30 04:58:45 -04:00 committed by FearlessTobi
parent 045d20e076
commit 9c08409e0e
4 changed files with 63 additions and 56 deletions

2
externals/xbyak vendored

@ -1 +1 @@
Subproject commit 18c9caaa0a3ed5706c39f5aa86cce0db6e65b174 Subproject commit c306b8e5786eeeb87b8925a8af5c3bf057ff5a90

View File

@ -4,14 +4,14 @@
#pragma once #pragma once
#include <bitset>
#include <initializer_list> #include <initializer_list>
#include <xbyak.h> #include <xbyak.h>
#include "common/assert.h" #include "common/assert.h"
#include "common/bit_set.h"
namespace Common::X64 { namespace Common::X64 {
inline std::size_t RegToIndex(const Xbyak::Reg& reg) { constexpr std::size_t RegToIndex(const Xbyak::Reg& reg) {
using Kind = Xbyak::Reg::Kind; using Kind = Xbyak::Reg::Kind;
ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0, ASSERT_MSG((reg.getKind() & (Kind::REG | Kind::XMM)) != 0,
"RegSet only support GPRs and XMM registers."); "RegSet only support GPRs and XMM registers.");
@ -19,17 +19,17 @@ inline std::size_t RegToIndex(const Xbyak::Reg& reg) {
return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16); return reg.getIdx() + (reg.getKind() == Kind::REG ? 0 : 16);
} }
inline Xbyak::Reg64 IndexToReg64(std::size_t reg_index) { constexpr Xbyak::Reg64 IndexToReg64(std::size_t reg_index) {
ASSERT(reg_index < 16); ASSERT(reg_index < 16);
return Xbyak::Reg64(static_cast<int>(reg_index)); return Xbyak::Reg64(static_cast<int>(reg_index));
} }
inline Xbyak::Xmm IndexToXmm(std::size_t reg_index) { constexpr Xbyak::Xmm IndexToXmm(std::size_t reg_index) {
ASSERT(reg_index >= 16 && reg_index < 32); ASSERT(reg_index >= 16 && reg_index < 32);
return Xbyak::Xmm(static_cast<int>(reg_index - 16)); return Xbyak::Xmm(static_cast<int>(reg_index - 16));
} }
inline Xbyak::Reg IndexToReg(std::size_t reg_index) { constexpr Xbyak::Reg IndexToReg(std::size_t reg_index) {
if (reg_index < 16) { if (reg_index < 16) {
return IndexToReg64(reg_index); return IndexToReg64(reg_index);
} else { } else {
@ -37,27 +37,27 @@ inline Xbyak::Reg IndexToReg(std::size_t reg_index) {
} }
} }
inline BitSet32 BuildRegSet(std::initializer_list<Xbyak::Reg> regs) { inline std::bitset<32> BuildRegSet(std::initializer_list<Xbyak::Reg> regs) {
BitSet32 bits; std::bitset<32> bits;
for (const Xbyak::Reg& reg : regs) { for (const Xbyak::Reg& reg : regs) {
bits[RegToIndex(reg)] = true; bits[RegToIndex(reg)] = true;
} }
return bits; return bits;
} }
const BitSet32 ABI_ALL_GPRS(0x0000FFFF); constexpr inline std::bitset<32> ABI_ALL_GPRS(0x0000FFFF);
const BitSet32 ABI_ALL_XMMS(0xFFFF0000); constexpr inline std::bitset<32> ABI_ALL_XMMS(0xFFFF0000);
#ifdef _WIN32 #ifdef _WIN32
// Microsoft x64 ABI // Microsoft x64 ABI
const Xbyak::Reg ABI_RETURN = Xbyak::util::rax; constexpr inline Xbyak::Reg ABI_RETURN = Xbyak::util::rax;
const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rcx; constexpr inline Xbyak::Reg ABI_PARAM1 = Xbyak::util::rcx;
const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rdx; constexpr inline Xbyak::Reg ABI_PARAM2 = Xbyak::util::rdx;
const Xbyak::Reg ABI_PARAM3 = Xbyak::util::r8; constexpr inline Xbyak::Reg ABI_PARAM3 = Xbyak::util::r8;
const Xbyak::Reg ABI_PARAM4 = Xbyak::util::r9; constexpr inline Xbyak::Reg ABI_PARAM4 = Xbyak::util::r9;
const BitSet32 ABI_ALL_CALLER_SAVED = BuildRegSet({ const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({
// GPRs // GPRs
Xbyak::util::rcx, Xbyak::util::rcx,
Xbyak::util::rdx, Xbyak::util::rdx,
@ -74,7 +74,7 @@ const BitSet32 ABI_ALL_CALLER_SAVED = BuildRegSet({
Xbyak::util::xmm5, Xbyak::util::xmm5,
}); });
const BitSet32 ABI_ALL_CALLEE_SAVED = BuildRegSet({ const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({
// GPRs // GPRs
Xbyak::util::rbx, Xbyak::util::rbx,
Xbyak::util::rsi, Xbyak::util::rsi,
@ -102,13 +102,13 @@ constexpr std::size_t ABI_SHADOW_SPACE = 0x20;
#else #else
// System V x86-64 ABI // System V x86-64 ABI
const Xbyak::Reg ABI_RETURN = Xbyak::util::rax; constexpr inline Xbyak::Reg ABI_RETURN = Xbyak::util::rax;
const Xbyak::Reg ABI_PARAM1 = Xbyak::util::rdi; constexpr inline Xbyak::Reg ABI_PARAM1 = Xbyak::util::rdi;
const Xbyak::Reg ABI_PARAM2 = Xbyak::util::rsi; constexpr inline Xbyak::Reg ABI_PARAM2 = Xbyak::util::rsi;
const Xbyak::Reg ABI_PARAM3 = Xbyak::util::rdx; constexpr inline Xbyak::Reg ABI_PARAM3 = Xbyak::util::rdx;
const Xbyak::Reg ABI_PARAM4 = Xbyak::util::rcx; constexpr inline Xbyak::Reg ABI_PARAM4 = Xbyak::util::rcx;
const BitSet32 ABI_ALL_CALLER_SAVED = BuildRegSet({ const std::bitset<32> ABI_ALL_CALLER_SAVED = BuildRegSet({
// GPRs // GPRs
Xbyak::util::rcx, Xbyak::util::rcx,
Xbyak::util::rdx, Xbyak::util::rdx,
@ -137,7 +137,7 @@ const BitSet32 ABI_ALL_CALLER_SAVED = BuildRegSet({
Xbyak::util::xmm15, Xbyak::util::xmm15,
}); });
const BitSet32 ABI_ALL_CALLEE_SAVED = BuildRegSet({ const std::bitset<32> ABI_ALL_CALLEE_SAVED = BuildRegSet({
// GPRs // GPRs
Xbyak::util::rbx, Xbyak::util::rbx,
Xbyak::util::rbp, Xbyak::util::rbp,
@ -156,12 +156,12 @@ struct ABIFrameInfo {
s32 xmm_offset; s32 xmm_offset;
}; };
inline ABIFrameInfo ABI_CalculateFrameSize(BitSet32 regs, std::size_t rsp_alignment, inline ABIFrameInfo ABI_CalculateFrameSize(std::bitset<32> regs, std::size_t rsp_alignment,
std::size_t needed_frame_size) { std::size_t needed_frame_size) {
int count = (regs & ABI_ALL_GPRS).Count(); int count = (regs & ABI_ALL_GPRS).count();
rsp_alignment -= count * 8; rsp_alignment -= count * 8;
std::size_t subtraction = 0; std::size_t subtraction = 0;
int xmm_count = (regs & ABI_ALL_XMMS).Count(); int xmm_count = (regs & ABI_ALL_XMMS).count();
if (xmm_count) { if (xmm_count) {
// If we have any XMMs to save, we must align the stack here. // If we have any XMMs to save, we must align the stack here.
subtraction = rsp_alignment & 0xF; subtraction = rsp_alignment & 0xF;
@ -178,36 +178,42 @@ inline ABIFrameInfo ABI_CalculateFrameSize(BitSet32 regs, std::size_t rsp_alignm
static_cast<s32>(subtraction - xmm_base_subtraction)}; static_cast<s32>(subtraction - xmm_base_subtraction)};
} }
inline std::size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, BitSet32 regs, inline std::size_t ABI_PushRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
std::size_t rsp_alignment, std::size_t rsp_alignment,
std::size_t needed_frame_size = 0) { std::size_t needed_frame_size = 0) {
auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size); auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
for (int reg_index : (regs & ABI_ALL_GPRS)) { for (std::size_t i = 0; i < regs.size(); ++i) {
code.push(IndexToReg64(reg_index)); if (regs[i] && ABI_ALL_GPRS[i]) {
code.push(IndexToReg64(i));
}
} }
if (frame_info.subtraction != 0) { if (frame_info.subtraction != 0) {
code.sub(code.rsp, frame_info.subtraction); code.sub(code.rsp, frame_info.subtraction);
} }
for (int reg_index : (regs & ABI_ALL_XMMS)) { for (std::size_t i = 0; i < regs.size(); ++i) {
code.movaps(code.xword[code.rsp + frame_info.xmm_offset], IndexToXmm(reg_index)); if (regs[i] && ABI_ALL_XMMS[i]) {
code.movaps(code.xword[code.rsp + frame_info.xmm_offset], IndexToXmm(i));
frame_info.xmm_offset += 0x10; frame_info.xmm_offset += 0x10;
} }
}
return ABI_SHADOW_SPACE; return ABI_SHADOW_SPACE;
} }
inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, BitSet32 regs, inline void ABI_PopRegistersAndAdjustStack(Xbyak::CodeGenerator& code, std::bitset<32> regs,
std::size_t rsp_alignment, std::size_t rsp_alignment,
std::size_t needed_frame_size = 0) { std::size_t needed_frame_size = 0) {
auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size); auto frame_info = ABI_CalculateFrameSize(regs, rsp_alignment, needed_frame_size);
for (int reg_index : (regs & ABI_ALL_XMMS)) { for (std::size_t i = 0; i < regs.size(); ++i) {
code.movaps(IndexToXmm(reg_index), code.xword[code.rsp + frame_info.xmm_offset]); if (regs[i] && ABI_ALL_XMMS[i]) {
code.movaps(IndexToXmm(i), code.xword[code.rsp + frame_info.xmm_offset]);
frame_info.xmm_offset += 0x10; frame_info.xmm_offset += 0x10;
} }
}
if (frame_info.subtraction != 0) { if (frame_info.subtraction != 0) {
code.add(code.rsp, frame_info.subtraction); code.add(code.rsp, frame_info.subtraction);

View File

@ -102,40 +102,40 @@ const JitFunction instr_table[64] = {
// purposes, as documented below: // purposes, as documented below:
/// Pointer to the uniform memory /// Pointer to the uniform memory
static const Reg64 UNIFORMS = r9; constexpr Reg64 UNIFORMS = r9;
/// The two 32-bit VS address offset registers set by the MOVA instruction /// The two 32-bit VS address offset registers set by the MOVA instruction
static const Reg64 ADDROFFS_REG_0 = r10; constexpr Reg64 ADDROFFS_REG_0 = r10;
static const Reg64 ADDROFFS_REG_1 = r11; constexpr Reg64 ADDROFFS_REG_1 = r11;
/// VS loop count register (Multiplied by 16) /// VS loop count register (Multiplied by 16)
static const Reg32 LOOPCOUNT_REG = r12d; constexpr Reg32 LOOPCOUNT_REG = r12d;
/// Current VS loop iteration number (we could probably use LOOPCOUNT_REG, but this quicker) /// Current VS loop iteration number (we could probably use LOOPCOUNT_REG, but this quicker)
static const Reg32 LOOPCOUNT = esi; constexpr Reg32 LOOPCOUNT = esi;
/// Number to increment LOOPCOUNT_REG by on each loop iteration (Multiplied by 16) /// Number to increment LOOPCOUNT_REG by on each loop iteration (Multiplied by 16)
static const Reg32 LOOPINC = edi; constexpr Reg32 LOOPINC = edi;
/// Result of the previous CMP instruction for the X-component comparison /// Result of the previous CMP instruction for the X-component comparison
static const Reg64 COND0 = r13; constexpr Reg64 COND0 = r13;
/// Result of the previous CMP instruction for the Y-component comparison /// Result of the previous CMP instruction for the Y-component comparison
static const Reg64 COND1 = r14; constexpr Reg64 COND1 = r14;
/// Pointer to the UnitState instance for the current VS unit /// Pointer to the UnitState instance for the current VS unit
static const Reg64 STATE = r15; constexpr Reg64 STATE = r15;
/// SIMD scratch register /// SIMD scratch register
static const Xmm SCRATCH = xmm0; constexpr Xmm SCRATCH = xmm0;
/// Loaded with the first swizzled source register, otherwise can be used as a scratch register /// Loaded with the first swizzled source register, otherwise can be used as a scratch register
static const Xmm SRC1 = xmm1; constexpr Xmm SRC1 = xmm1;
/// Loaded with the second swizzled source register, otherwise can be used as a scratch register /// Loaded with the second swizzled source register, otherwise can be used as a scratch register
static const Xmm SRC2 = xmm2; constexpr Xmm SRC2 = xmm2;
/// Loaded with the third swizzled source register, otherwise can be used as a scratch register /// Loaded with the third swizzled source register, otherwise can be used as a scratch register
static const Xmm SRC3 = xmm3; constexpr Xmm SRC3 = xmm3;
/// Additional scratch register /// Additional scratch register
static const Xmm SCRATCH2 = xmm4; constexpr Xmm SCRATCH2 = xmm4;
/// Constant vector of [1.0f, 1.0f, 1.0f, 1.0f], used to efficiently set a vector to one /// Constant vector of [1.0f, 1.0f, 1.0f, 1.0f], used to efficiently set a vector to one
static const Xmm ONE = xmm14; constexpr Xmm ONE = xmm14;
/// Constant vector of [-0.f, -0.f, -0.f, -0.f], used to efficiently negate a vector with XOR /// Constant vector of [-0.f, -0.f, -0.f, -0.f], used to efficiently negate a vector with XOR
static const Xmm NEGBIT = xmm15; constexpr Xmm NEGBIT = xmm15;
// State registers that must not be modified by external functions calls // State registers that must not be modified by external functions calls
// Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed // Scratch registers, e.g., SRC1 and SCRATCH, have to be saved on the side if needed
static const BitSet32 persistent_regs = BuildRegSet({ static const std::bitset<32> persistent_regs = BuildRegSet({
// Pointers to register blocks // Pointers to register blocks
UNIFORMS, UNIFORMS,
STATE, STATE,
@ -356,7 +356,7 @@ void JitShader::Compile_UniformCondition(Instruction instr) {
cmp(byte[UNIFORMS + offset], 0); cmp(byte[UNIFORMS + offset], 0);
} }
BitSet32 JitShader::PersistentCallerSavedRegs() { std::bitset<32> JitShader::PersistentCallerSavedRegs() {
return persistent_regs & ABI_ALL_CALLER_SAVED; return persistent_regs & ABI_ALL_CALLER_SAVED;
} }

View File

@ -5,6 +5,7 @@
#pragma once #pragma once
#include <array> #include <array>
#include <bitset>
#include <cstddef> #include <cstddef>
#include <optional> #include <optional>
#include <utility> #include <utility>
@ -91,7 +92,7 @@ private:
*/ */
void Compile_Return(); void Compile_Return();
BitSet32 PersistentCallerSavedRegs(); std::bitset<32> PersistentCallerSavedRegs();
/** /**
* Assertion evaluated at compile-time, but only triggered if executed at runtime. * Assertion evaluated at compile-time, but only triggered if executed at runtime.