From 9dd09cb60dfd861f54aab948652cc06ab92da351 Mon Sep 17 00:00:00 2001 From: 1 <1> Date: Wed, 4 Oct 2017 14:25:02 +0100 Subject: [PATCH] AVX --- src/common/CMakeLists.txt | 1 + src/common/avx_utils.h | 22 ++++++++++++++++++++++ src/core/arm/dynarmic/arm_dynarmic.cpp | 2 ++ src/video_core/shader/shader_jit_x64.cpp | 2 ++ 4 files changed, 27 insertions(+) create mode 100644 src/common/avx_utils.h diff --git a/src/common/CMakeLists.txt b/src/common/CMakeLists.txt index 7e83e64b0..1126f4f29 100644 --- a/src/common/CMakeLists.txt +++ b/src/common/CMakeLists.txt @@ -45,6 +45,7 @@ set(SRCS set(HEADERS alignment.h assert.h + avx_utils.h bit_field.h bit_set.h break_points.h diff --git a/src/common/avx_utils.h b/src/common/avx_utils.h new file mode 100644 index 000000000..6dfcf3ca0 --- /dev/null +++ b/src/common/avx_utils.h @@ -0,0 +1,22 @@ + +#pragma once + +#if defined(_MSC_VER) +/* Microsoft C/C++-compatible compiler */ +#include +#elif defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) +/* GCC-compatible compiler, targeting x86/x86-64 */ +#include +#endif + +// It seems a lot of folks out there compile citra with avx. This lil function +// fixes the penalty by mixing SSE instructions with AVX. For further info: +// https://software.intel.com/en-us/articles/avoiding-avx-sse-transition-penalties +// call this function before running dynamicly generated code. +inline void ZeroUpperAVX() { +#ifdef __AVX__ + _mm256_zeroupper(); +#else +// Do Nothing +#endif +} diff --git a/src/core/arm/dynarmic/arm_dynarmic.cpp b/src/core/arm/dynarmic/arm_dynarmic.cpp index 6e238009f..375eec39c 100644 --- a/src/core/arm/dynarmic/arm_dynarmic.cpp +++ b/src/core/arm/dynarmic/arm_dynarmic.cpp @@ -5,6 +5,7 @@ #include #include #include "common/assert.h" +#include "common/avx_utils.h" #include "common/microprofile.h" #include "core/arm/dynarmic/arm_dynarmic.h" #include "core/arm/dynarmic/arm_dynarmic_cp15.h" @@ -130,6 +131,7 @@ void ARM_Dynarmic::ExecuteInstructions(int num_instructions) { ASSERT(Memory::GetCurrentPageTable() == current_page_table); MICROPROFILE_SCOPE(ARM_Jit); + ZeroUpperAVX(); std::size_t ticks_executed = jit->Run(static_cast(num_instructions)); CoreTiming::AddTicks(4000); diff --git a/src/video_core/shader/shader_jit_x64.cpp b/src/video_core/shader/shader_jit_x64.cpp index 73c21871c..33b061d9f 100644 --- a/src/video_core/shader/shader_jit_x64.cpp +++ b/src/video_core/shader/shader_jit_x64.cpp @@ -2,6 +2,7 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include "common/avx_utils.h" #include "common/hash.h" #include "common/microprofile.h" #include "video_core/shader/shader.h" @@ -41,6 +42,7 @@ void JitX64Engine::Run(const ShaderSetup& setup, UnitState& state) const { MICROPROFILE_SCOPE(GPU_Shader); const JitShader* shader = static_cast(setup.engine_data.cached_shader); + ZeroUpperAVX(); shader->Run(setup, state, setup.engine_data.entry_point); }