shader/jit: Use xbyak::util::Cpu for Host capabilities (#6643)

Xbyak has a complete utility-class for determining the host-processor's
ISA-features such as SSE4.1, AVX, AVX2, AVX512{F,VL,DQ,VBMI,etc}, and so
on for further potential optimizations.
This commit is contained in:
Wunk 2023-07-01 19:35:07 -07:00 committed by GitHub
parent 3d0a3c2c45
commit 71aea7e571
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -10,6 +10,7 @@
#include <cstdint> #include <cstdint>
#include <nihstro/shader_bytecode.h> #include <nihstro/shader_bytecode.h>
#include <smmintrin.h> #include <smmintrin.h>
#include <xbyak/xbyak_util.h>
#include <xmmintrin.h> #include <xmmintrin.h>
#include "common/assert.h" #include "common/assert.h"
#include "common/logging/log.h" #include "common/logging/log.h"
@ -32,6 +33,8 @@ using Xbyak::Xmm;
using nihstro::DestRegister; using nihstro::DestRegister;
using nihstro::RegisterType; using nihstro::RegisterType;
static const Xbyak::util::Cpu host_caps;
namespace Pica::Shader { namespace Pica::Shader {
typedef void (JitShader::*JitFunction)(Instruction instr); typedef void (JitShader::*JitFunction)(Instruction instr);
@ -306,7 +309,7 @@ void JitShader::Compile_DestEnable(Instruction instr, Xmm src) {
// register... // register...
movaps(SCRATCH, xword[STATE + dest_offset_disp]); movaps(SCRATCH, xword[STATE + dest_offset_disp]);
if (Common::GetCPUCaps().sse4_1) { if (host_caps.has(Cpu::tSSE41)) {
u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) | u8 mask = ((swiz.dest_mask & 1) << 3) | ((swiz.dest_mask & 8) >> 3) |
((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1); ((swiz.dest_mask & 2) << 1) | ((swiz.dest_mask & 4) >> 1);
blendps(SCRATCH, src, mask); blendps(SCRATCH, src, mask);
@ -437,7 +440,7 @@ void JitShader::Compile_DPH(Instruction instr) {
Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2); Compile_SwizzleSrc(instr, 2, instr.common.src2, SRC2);
} }
if (Common::GetCPUCaps().sse4_1) { if (host_caps.has(Cpu::tSSE41)) {
// Set 4th component to 1.0 // Set 4th component to 1.0
blendps(SRC1, ONE, 0b1000); blendps(SRC1, ONE, 0b1000);
} else { } else {
@ -507,7 +510,7 @@ void JitShader::Compile_SLT(Instruction instr) {
void JitShader::Compile_FLR(Instruction instr) { void JitShader::Compile_FLR(Instruction instr) {
Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1); Compile_SwizzleSrc(instr, 1, instr.common.src1, SRC1);
if (Common::GetCPUCaps().sse4_1) { if (host_caps.has(Cpu::tSSE41)) {
roundps(SRC1, SRC1, _MM_FROUND_FLOOR); roundps(SRC1, SRC1, _MM_FROUND_FLOOR);
} else { } else {
cvttps2dq(SRC1, SRC1); cvttps2dq(SRC1, SRC1);