Merge pull request #3312 from ReinUsesLisp/atoms-u32
shader/memory: Implement ATOMS.ADD.U32
This commit is contained in:
		@@ -215,6 +215,18 @@ enum class F2fRoundingOp : u64 {
 | 
			
		||||
    Trunc = 11,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
enum class AtomicOp : u64 {
 | 
			
		||||
    Add = 0,
 | 
			
		||||
    Min = 1,
 | 
			
		||||
    Max = 2,
 | 
			
		||||
    Inc = 3,
 | 
			
		||||
    Dec = 4,
 | 
			
		||||
    And = 5,
 | 
			
		||||
    Or = 6,
 | 
			
		||||
    Xor = 7,
 | 
			
		||||
    Exch = 8,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
enum class UniformType : u64 {
 | 
			
		||||
    UnsignedByte = 0,
 | 
			
		||||
    SignedByte = 1,
 | 
			
		||||
@@ -236,6 +248,13 @@ enum class StoreType : u64 {
 | 
			
		||||
    Bits128 = 6,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
enum class AtomicType : u64 {
 | 
			
		||||
    U32 = 0,
 | 
			
		||||
    S32 = 1,
 | 
			
		||||
    U64 = 2,
 | 
			
		||||
    S64 = 3,
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
enum class IMinMaxExchange : u64 {
 | 
			
		||||
    None = 0,
 | 
			
		||||
    XLo = 1,
 | 
			
		||||
@@ -938,6 +957,16 @@ union Instruction {
 | 
			
		||||
        BitField<46, 2, u64> cache_mode;
 | 
			
		||||
    } stg;
 | 
			
		||||
 | 
			
		||||
    union {
 | 
			
		||||
        BitField<52, 4, AtomicOp> operation;
 | 
			
		||||
        BitField<28, 2, AtomicType> type;
 | 
			
		||||
        BitField<30, 22, s64> offset;
 | 
			
		||||
 | 
			
		||||
        s32 GetImmediateOffset() const {
 | 
			
		||||
            return static_cast<s32>(offset << 2);
 | 
			
		||||
        }
 | 
			
		||||
    } atoms;
 | 
			
		||||
 | 
			
		||||
    union {
 | 
			
		||||
        BitField<32, 1, PhysicalAttributeDirection> direction;
 | 
			
		||||
        BitField<47, 3, AttributeSize> size;
 | 
			
		||||
@@ -1659,9 +1688,10 @@ public:
 | 
			
		||||
        ST_A,
 | 
			
		||||
        ST_L,
 | 
			
		||||
        ST_S,
 | 
			
		||||
        ST,   // Store in generic memory
 | 
			
		||||
        STG,  // Store in global memory
 | 
			
		||||
        AL2P, // Transforms attribute memory into physical memory
 | 
			
		||||
        ST,    // Store in generic memory
 | 
			
		||||
        STG,   // Store in global memory
 | 
			
		||||
        ATOMS, // Atomic operation on shared memory
 | 
			
		||||
        AL2P,  // Transforms attribute memory into physical memory
 | 
			
		||||
        TEX,
 | 
			
		||||
        TEX_B,  // Texture Load Bindless
 | 
			
		||||
        TXQ,    // Texture Query
 | 
			
		||||
@@ -1964,6 +1994,7 @@ private:
 | 
			
		||||
            INST("1110111101010---", Id::ST_L, Type::Memory, "ST_L"),
 | 
			
		||||
            INST("101-------------", Id::ST, Type::Memory, "ST"),
 | 
			
		||||
            INST("1110111011011---", Id::STG, Type::Memory, "STG"),
 | 
			
		||||
            INST("11101100--------", Id::ATOMS, Type::Memory, "ATOMS"),
 | 
			
		||||
            INST("1110111110100---", Id::AL2P, Type::Memory, "AL2P"),
 | 
			
		||||
            INST("110000----111---", Id::TEX, Type::Texture, "TEX"),
 | 
			
		||||
            INST("1101111010111---", Id::TEX_B, Type::Texture, "TEX_B"),
 | 
			
		||||
 
 | 
			
		||||
@@ -1856,6 +1856,16 @@ private:
 | 
			
		||||
                Type::Uint};
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    template <const std::string_view& opname, Type type>
 | 
			
		||||
    Expression Atomic(Operation operation) {
 | 
			
		||||
        ASSERT(stage == ShaderType::Compute);
 | 
			
		||||
        auto& smem = std::get<SmemNode>(*operation[0]);
 | 
			
		||||
 | 
			
		||||
        return {fmt::format("atomic{}(smem[{} >> 2], {})", opname, Visit(smem.GetAddress()).AsInt(),
 | 
			
		||||
                            Visit(operation[1]).As(type)),
 | 
			
		||||
                type};
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    Expression Branch(Operation operation) {
 | 
			
		||||
        const auto target = std::get_if<ImmediateNode>(&*operation[0]);
 | 
			
		||||
        UNIMPLEMENTED_IF(!target);
 | 
			
		||||
@@ -2194,6 +2204,8 @@ private:
 | 
			
		||||
        &GLSLDecompiler::AtomicImage<Func::Xor>,
 | 
			
		||||
        &GLSLDecompiler::AtomicImage<Func::Exchange>,
 | 
			
		||||
 | 
			
		||||
        &GLSLDecompiler::Atomic<Func::Add, Type::Uint>,
 | 
			
		||||
 | 
			
		||||
        &GLSLDecompiler::Branch,
 | 
			
		||||
        &GLSLDecompiler::BranchIndirect,
 | 
			
		||||
        &GLSLDecompiler::PushFlowStack,
 | 
			
		||||
 
 | 
			
		||||
@@ -1796,6 +1796,11 @@ private:
 | 
			
		||||
        return {};
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    Expression UAtomicAdd(Operation) {
 | 
			
		||||
        UNIMPLEMENTED();
 | 
			
		||||
        return {};
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    Expression Branch(Operation operation) {
 | 
			
		||||
        const auto& target = std::get<ImmediateNode>(*operation[0]);
 | 
			
		||||
        OpStore(jmp_to, Constant(t_uint, target.GetValue()));
 | 
			
		||||
@@ -2373,6 +2378,8 @@ private:
 | 
			
		||||
        &SPIRVDecompiler::AtomicImageXor,
 | 
			
		||||
        &SPIRVDecompiler::AtomicImageExchange,
 | 
			
		||||
 | 
			
		||||
        &SPIRVDecompiler::UAtomicAdd,
 | 
			
		||||
 | 
			
		||||
        &SPIRVDecompiler::Branch,
 | 
			
		||||
        &SPIRVDecompiler::BranchIndirect,
 | 
			
		||||
        &SPIRVDecompiler::PushFlowStack,
 | 
			
		||||
 
 | 
			
		||||
@@ -16,6 +16,8 @@
 | 
			
		||||
 | 
			
		||||
namespace VideoCommon::Shader {
 | 
			
		||||
 | 
			
		||||
using Tegra::Shader::AtomicOp;
 | 
			
		||||
using Tegra::Shader::AtomicType;
 | 
			
		||||
using Tegra::Shader::Attribute;
 | 
			
		||||
using Tegra::Shader::Instruction;
 | 
			
		||||
using Tegra::Shader::OpCode;
 | 
			
		||||
@@ -333,6 +335,23 @@ u32 ShaderIR::DecodeMemory(NodeBlock& bb, u32 pc) {
 | 
			
		||||
        }
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    case OpCode::Id::ATOMS: {
 | 
			
		||||
        UNIMPLEMENTED_IF_MSG(instr.atoms.operation != AtomicOp::Add, "operation={}",
 | 
			
		||||
                             static_cast<int>(instr.atoms.operation.Value()));
 | 
			
		||||
        UNIMPLEMENTED_IF_MSG(instr.atoms.type != AtomicType::U32, "type={}",
 | 
			
		||||
                             static_cast<int>(instr.atoms.type.Value()));
 | 
			
		||||
 | 
			
		||||
        const s32 offset = instr.atoms.GetImmediateOffset();
 | 
			
		||||
        Node address = GetRegister(instr.gpr8);
 | 
			
		||||
        address = Operation(OperationCode::IAdd, std::move(address), Immediate(offset));
 | 
			
		||||
 | 
			
		||||
        Node memory = GetSharedMemory(std::move(address));
 | 
			
		||||
        Node data = GetRegister(instr.gpr20);
 | 
			
		||||
 | 
			
		||||
        Node value = Operation(OperationCode::UAtomicAdd, std::move(memory), std::move(data));
 | 
			
		||||
        SetRegister(bb, instr.gpr0, std::move(value));
 | 
			
		||||
        break;
 | 
			
		||||
    }
 | 
			
		||||
    case OpCode::Id::AL2P: {
 | 
			
		||||
        // Ignore al2p.direction since we don't care about it.
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -162,6 +162,8 @@ enum class OperationCode {
 | 
			
		||||
    AtomicImageXor,      /// (MetaImage, int[N] coords) -> void
 | 
			
		||||
    AtomicImageExchange, /// (MetaImage, int[N] coords) -> void
 | 
			
		||||
 | 
			
		||||
    UAtomicAdd, /// (smem, uint) -> uint
 | 
			
		||||
 | 
			
		||||
    Branch,         /// (uint branch_target) -> void
 | 
			
		||||
    BranchIndirect, /// (uint branch_target) -> void
 | 
			
		||||
    PushFlowStack,  /// (uint branch_target) -> void
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user