shader: Optimize NVN Fallthrough
This commit is contained in:
		@@ -840,6 +840,9 @@ void EmitContext::DefineGlobalMemoryFunctions(const Info& info) {
 | 
			
		||||
        AddLabel();
 | 
			
		||||
        const size_t num_buffers{info.storage_buffers_descriptors.size()};
 | 
			
		||||
        for (size_t index = 0; index < num_buffers; ++index) {
 | 
			
		||||
            if (!info.nvn_buffer_used[index]) {
 | 
			
		||||
                continue;
 | 
			
		||||
            }
 | 
			
		||||
            const auto& ssbo{info.storage_buffers_descriptors[index]};
 | 
			
		||||
            const Id ssbo_addr_cbuf_offset{Const(ssbo.cbuf_offset / 8)};
 | 
			
		||||
            const Id ssbo_size_cbuf_offset{Const(ssbo.cbuf_offset / 4 + 2)};
 | 
			
		||||
 
 | 
			
		||||
@@ -88,17 +88,20 @@ void AddNVNStorageBuffers(IR::Program& program) {
 | 
			
		||||
    }()};
 | 
			
		||||
    auto& descs{program.info.storage_buffers_descriptors};
 | 
			
		||||
    for (u32 index = 0; index < num_buffers; ++index) {
 | 
			
		||||
        if (!program.info.nvn_buffer_used[index]) {
 | 
			
		||||
            continue;
 | 
			
		||||
        }
 | 
			
		||||
        const u32 offset{base + index * descriptor_size};
 | 
			
		||||
        const auto it{std::ranges::find(descs, offset, &StorageBufferDescriptor::cbuf_offset)};
 | 
			
		||||
        if (it != descs.end()) {
 | 
			
		||||
            it->is_written |= program.info.stores_global_memory;
 | 
			
		||||
            continue;
 | 
			
		||||
        }
 | 
			
		||||
        // Assume these are written for now
 | 
			
		||||
        descs.push_back({
 | 
			
		||||
            .cbuf_index = driver_cbuf,
 | 
			
		||||
            .cbuf_offset = offset,
 | 
			
		||||
            .count = 1,
 | 
			
		||||
            .is_written = true,
 | 
			
		||||
            .is_written = program.info.stores_global_memory,
 | 
			
		||||
        });
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -132,6 +132,30 @@ void SetPatch(Info& info, IR::Patch patch) {
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void CheckCBufNVN(Info& info, IR::Inst& inst) {
 | 
			
		||||
    const IR::Value cbuf_index{inst.Arg(0)};
 | 
			
		||||
    if (!cbuf_index.IsImmediate()) {
 | 
			
		||||
        info.nvn_buffer_used.set();
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
    const u32 index{cbuf_index.U32()};
 | 
			
		||||
    if (index != 0) {
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
    const IR::Value cbuf_offset{inst.Arg(1)};
 | 
			
		||||
    if (!cbuf_offset.IsImmediate()) {
 | 
			
		||||
        info.nvn_buffer_used.set();
 | 
			
		||||
        return;
 | 
			
		||||
    }
 | 
			
		||||
    const u32 offset{cbuf_offset.U32()};
 | 
			
		||||
    const u32 descriptor_size{0x10};
 | 
			
		||||
    const u32 upper_limit{info.nvn_buffer_base + descriptor_size * 16};
 | 
			
		||||
    if (offset >= info.nvn_buffer_base && offset < upper_limit) {
 | 
			
		||||
        const std::size_t nvn_index{(offset - info.nvn_buffer_base) / descriptor_size};
 | 
			
		||||
        info.nvn_buffer_used.set(nvn_index, true);
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void VisitUsages(Info& info, IR::Inst& inst) {
 | 
			
		||||
    switch (inst.GetOpcode()) {
 | 
			
		||||
    case IR::Opcode::CompositeConstructF16x2:
 | 
			
		||||
@@ -382,13 +406,6 @@ void VisitUsages(Info& info, IR::Inst& inst) {
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    switch (inst.GetOpcode()) {
 | 
			
		||||
    case IR::Opcode::LoadGlobalU8:
 | 
			
		||||
    case IR::Opcode::LoadGlobalS8:
 | 
			
		||||
    case IR::Opcode::LoadGlobalU16:
 | 
			
		||||
    case IR::Opcode::LoadGlobalS16:
 | 
			
		||||
    case IR::Opcode::LoadGlobal32:
 | 
			
		||||
    case IR::Opcode::LoadGlobal64:
 | 
			
		||||
    case IR::Opcode::LoadGlobal128:
 | 
			
		||||
    case IR::Opcode::WriteGlobalU8:
 | 
			
		||||
    case IR::Opcode::WriteGlobalS8:
 | 
			
		||||
    case IR::Opcode::WriteGlobalU16:
 | 
			
		||||
@@ -423,6 +440,15 @@ void VisitUsages(Info& info, IR::Inst& inst) {
 | 
			
		||||
    case IR::Opcode::GlobalAtomicMinF32x2:
 | 
			
		||||
    case IR::Opcode::GlobalAtomicMaxF16x2:
 | 
			
		||||
    case IR::Opcode::GlobalAtomicMaxF32x2:
 | 
			
		||||
        info.stores_global_memory = true;
 | 
			
		||||
        [[fallthrough]];
 | 
			
		||||
    case IR::Opcode::LoadGlobalU8:
 | 
			
		||||
    case IR::Opcode::LoadGlobalS8:
 | 
			
		||||
    case IR::Opcode::LoadGlobalU16:
 | 
			
		||||
    case IR::Opcode::LoadGlobalS16:
 | 
			
		||||
    case IR::Opcode::LoadGlobal32:
 | 
			
		||||
    case IR::Opcode::LoadGlobal64:
 | 
			
		||||
    case IR::Opcode::LoadGlobal128:
 | 
			
		||||
        info.uses_int64 = true;
 | 
			
		||||
        info.uses_global_memory = true;
 | 
			
		||||
        info.used_constant_buffer_types |= IR::Type::U32 | IR::Type::U32x2;
 | 
			
		||||
@@ -800,9 +826,27 @@ void VisitFpModifiers(Info& info, IR::Inst& inst) {
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void VisitCbufs(Info& info, IR::Inst& inst) {
 | 
			
		||||
    switch (inst.GetOpcode()) {
 | 
			
		||||
    case IR::Opcode::GetCbufU8:
 | 
			
		||||
    case IR::Opcode::GetCbufS8:
 | 
			
		||||
    case IR::Opcode::GetCbufU16:
 | 
			
		||||
    case IR::Opcode::GetCbufS16:
 | 
			
		||||
    case IR::Opcode::GetCbufU32:
 | 
			
		||||
    case IR::Opcode::GetCbufF32:
 | 
			
		||||
    case IR::Opcode::GetCbufU32x2: {
 | 
			
		||||
        CheckCBufNVN(info, inst);
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    default:
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void Visit(Info& info, IR::Inst& inst) {
 | 
			
		||||
    VisitUsages(info, inst);
 | 
			
		||||
    VisitFpModifiers(info, inst);
 | 
			
		||||
    VisitCbufs(info, inst);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void GatherInfoFromHeader(Environment& env, Info& info) {
 | 
			
		||||
@@ -839,6 +883,26 @@ void GatherInfoFromHeader(Environment& env, Info& info) {
 | 
			
		||||
 | 
			
		||||
void CollectShaderInfoPass(Environment& env, IR::Program& program) {
 | 
			
		||||
    Info& info{program.info};
 | 
			
		||||
    const u32 base{[&] {
 | 
			
		||||
        switch (program.stage) {
 | 
			
		||||
        case Stage::VertexA:
 | 
			
		||||
        case Stage::VertexB:
 | 
			
		||||
            return 0x110u;
 | 
			
		||||
        case Stage::TessellationControl:
 | 
			
		||||
            return 0x210u;
 | 
			
		||||
        case Stage::TessellationEval:
 | 
			
		||||
            return 0x310u;
 | 
			
		||||
        case Stage::Geometry:
 | 
			
		||||
            return 0x410u;
 | 
			
		||||
        case Stage::Fragment:
 | 
			
		||||
            return 0x510u;
 | 
			
		||||
        case Stage::Compute:
 | 
			
		||||
            return 0x310u;
 | 
			
		||||
        }
 | 
			
		||||
        throw InvalidArgument("Invalid stage {}", program.stage);
 | 
			
		||||
    }()};
 | 
			
		||||
    info.nvn_buffer_base = base;
 | 
			
		||||
 | 
			
		||||
    for (IR::Block* const block : program.post_order_blocks) {
 | 
			
		||||
        for (IR::Inst& inst : block->Instructions()) {
 | 
			
		||||
            Visit(info, inst);
 | 
			
		||||
 
 | 
			
		||||
@@ -5,6 +5,7 @@
 | 
			
		||||
#pragma once
 | 
			
		||||
 | 
			
		||||
#include <array>
 | 
			
		||||
#include <bitset>
 | 
			
		||||
 | 
			
		||||
#include "common/common_types.h"
 | 
			
		||||
#include "shader_recompiler/frontend/ir/type.h"
 | 
			
		||||
@@ -140,6 +141,7 @@ struct Info {
 | 
			
		||||
    bool stores_tess_level_outer{};
 | 
			
		||||
    bool stores_tess_level_inner{};
 | 
			
		||||
    bool stores_indexed_attributes{};
 | 
			
		||||
    bool stores_global_memory{};
 | 
			
		||||
 | 
			
		||||
    bool uses_fp16{};
 | 
			
		||||
    bool uses_fp64{};
 | 
			
		||||
@@ -180,6 +182,8 @@ struct Info {
 | 
			
		||||
    IR::Type used_storage_buffer_types{};
 | 
			
		||||
 | 
			
		||||
    u32 constant_buffer_mask{};
 | 
			
		||||
    u32 nvn_buffer_base{};
 | 
			
		||||
    std::bitset<16> nvn_buffer_used{};
 | 
			
		||||
 | 
			
		||||
    boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS>
 | 
			
		||||
        constant_buffer_descriptors;
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user