OpenGL: Add Local Memory warmup shader
This commit is contained in:
		@@ -33,6 +33,7 @@ set(SHADER_FILES
 | 
			
		||||
    opengl_fidelityfx_fsr.frag
 | 
			
		||||
    opengl_fidelityfx_fsr_easu.frag
 | 
			
		||||
    opengl_fidelityfx_fsr_rcas.frag
 | 
			
		||||
    opengl_lmem_warmup.comp
 | 
			
		||||
    opengl_present.frag
 | 
			
		||||
    opengl_present.vert
 | 
			
		||||
    opengl_present_scaleforce.frag
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										47
									
								
								src/video_core/host_shaders/opengl_lmem_warmup.comp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										47
									
								
								src/video_core/host_shaders/opengl_lmem_warmup.comp
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,47 @@
 | 
			
		||||
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
 | 
			
		||||
// SPDX-License-Identifier: GPL-2.0-or-later
 | 
			
		||||
 | 
			
		||||
// This shader is a workaround for a quirk in NVIDIA OpenGL drivers
 | 
			
		||||
// Shaders using local memory see a great performance benefit if a shader that was dispatched
 | 
			
		||||
// before it had more local memory allocated.
 | 
			
		||||
// This shader allocates the maximum local memory allowed on NVIDIA drivers to ensure that
 | 
			
		||||
// subsequent shaders see the performance boost.
 | 
			
		||||
 | 
			
		||||
// NOTE: This shader does no actual meaningful work and returns immediately,
 | 
			
		||||
// it is simply a means to have the driver expect a shader using lots of local memory.
 | 
			
		||||
 | 
			
		||||
#version 450
 | 
			
		||||
 | 
			
		||||
layout(local_size_x = 1, local_size_y = 1, local_size_z = 1) in;
 | 
			
		||||
 | 
			
		||||
layout(location = 0) uniform uint uniform_data;
 | 
			
		||||
 | 
			
		||||
layout(binding = 0, rgba8) uniform writeonly restrict image2DArray dest_image;
 | 
			
		||||
 | 
			
		||||
#define MAX_LMEM_SIZE 4080 // Size chosen to avoid errors in Nvidia's GLSL compiler
 | 
			
		||||
#define NUM_LMEM_CONSTANTS 1
 | 
			
		||||
#define ARRAY_SIZE MAX_LMEM_SIZE - NUM_LMEM_CONSTANTS
 | 
			
		||||
 | 
			
		||||
uint lmem_0[ARRAY_SIZE];
 | 
			
		||||
const uvec4 constant_values[NUM_LMEM_CONSTANTS] = uvec4[](uvec4(0));
 | 
			
		||||
 | 
			
		||||
void main() {
 | 
			
		||||
    const uint global_id = gl_GlobalInvocationID.x;
 | 
			
		||||
    if (global_id <= 128) {
 | 
			
		||||
        // Since the shader is called with a dispatch of 1x1x1
 | 
			
		||||
        // This should always be the case, and this shader will not actually execute
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
    for (uint t = 0; t < uniform_data; t++) {
 | 
			
		||||
        const uint offset = (t * uniform_data);
 | 
			
		||||
        lmem_0[offset] = t;
 | 
			
		||||
    }
 | 
			
		||||
    const uint offset = (gl_GlobalInvocationID.y * uniform_data + gl_GlobalInvocationID.x);
 | 
			
		||||
    const uint value = lmem_0[offset];
 | 
			
		||||
    const uint const_value = constant_values[offset / 4][offset % 4];
 | 
			
		||||
    const uvec4 color = uvec4(value + const_value);
 | 
			
		||||
 | 
			
		||||
    // A "side-effect" is needed so the variables don't get optimized out,
 | 
			
		||||
    // but this should never execute so there should be no clobbering of previously bound state.
 | 
			
		||||
    imageStore(dest_image, ivec3(gl_GlobalInvocationID), color);
 | 
			
		||||
}
 | 
			
		||||
@@ -222,6 +222,7 @@ void RasterizerOpenGL::PrepareDraw(bool is_indexed, Func&& draw_func) {
 | 
			
		||||
    gpu.TickWork();
 | 
			
		||||
 | 
			
		||||
    std::scoped_lock lock{buffer_cache.mutex, texture_cache.mutex};
 | 
			
		||||
    program_manager.LocalMemoryWarmup();
 | 
			
		||||
    pipeline->SetEngine(maxwell3d, gpu_memory);
 | 
			
		||||
    pipeline->Configure(is_indexed);
 | 
			
		||||
 | 
			
		||||
@@ -371,6 +372,7 @@ void RasterizerOpenGL::DispatchCompute() {
 | 
			
		||||
    if (!pipeline) {
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
    program_manager.LocalMemoryWarmup();
 | 
			
		||||
    pipeline->SetEngine(kepler_compute, gpu_memory);
 | 
			
		||||
    pipeline->Configure();
 | 
			
		||||
    const auto& qmd{kepler_compute->launch_description};
 | 
			
		||||
 
 | 
			
		||||
@@ -3,7 +3,9 @@
 | 
			
		||||
 | 
			
		||||
#include <glad/glad.h>
 | 
			
		||||
 | 
			
		||||
#include "video_core/host_shaders/opengl_lmem_warmup_comp.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_shader_manager.h"
 | 
			
		||||
#include "video_core/renderer_opengl/gl_shader_util.h"
 | 
			
		||||
 | 
			
		||||
namespace OpenGL {
 | 
			
		||||
 | 
			
		||||
@@ -12,7 +14,8 @@ static constexpr std::array ASSEMBLY_PROGRAM_ENUMS{
 | 
			
		||||
    GL_GEOMETRY_PROGRAM_NV, GL_FRAGMENT_PROGRAM_NV,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
ProgramManager::ProgramManager(const Device& device) {
 | 
			
		||||
ProgramManager::ProgramManager(const Device& device)
 | 
			
		||||
    : lmem_warmup_program(CreateProgram(HostShaders::OPENGL_LMEM_WARMUP_COMP, GL_COMPUTE_SHADER)) {
 | 
			
		||||
    glCreateProgramPipelines(1, &pipeline.handle);
 | 
			
		||||
    if (device.UseAssemblyShaders()) {
 | 
			
		||||
        glEnable(GL_COMPUTE_PROGRAM_NV);
 | 
			
		||||
@@ -98,6 +101,11 @@ void ProgramManager::BindAssemblyPrograms(std::span<const OGLAssemblyProgram, NU
 | 
			
		||||
 | 
			
		||||
void ProgramManager::RestoreGuestCompute() {}
 | 
			
		||||
 | 
			
		||||
void ProgramManager::LocalMemoryWarmup() {
 | 
			
		||||
    BindComputeProgram(lmem_warmup_program.handle);
 | 
			
		||||
    glDispatchCompute(1, 1, 1);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void ProgramManager::BindPipeline() {
 | 
			
		||||
    if (!is_pipeline_bound) {
 | 
			
		||||
        is_pipeline_bound = true;
 | 
			
		||||
 
 | 
			
		||||
@@ -30,6 +30,8 @@ public:
 | 
			
		||||
 | 
			
		||||
    void RestoreGuestCompute();
 | 
			
		||||
 | 
			
		||||
    void LocalMemoryWarmup();
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
    void BindPipeline();
 | 
			
		||||
 | 
			
		||||
@@ -44,6 +46,7 @@ private:
 | 
			
		||||
    u32 current_stage_mask = 0;
 | 
			
		||||
    std::array<GLuint, NUM_STAGES> current_programs{};
 | 
			
		||||
    GLuint current_assembly_compute_program = 0;
 | 
			
		||||
    OGLProgram lmem_warmup_program;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
} // namespace OpenGL
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user