mirror of
https://github.com/yuzu-emu/yuzu.git
synced 2025-01-23 22:20:04 +00:00
shader: Initial support for textures and TEX
This commit is contained in:
parent
7d6ba5b984
commit
ab46371247
@ -9,6 +9,7 @@ add_library(shader_recompiler STATIC
|
||||
backend/spirv/emit_spirv_control_flow.cpp
|
||||
backend/spirv/emit_spirv_convert.cpp
|
||||
backend/spirv/emit_spirv_floating_point.cpp
|
||||
backend/spirv/emit_spirv_image.cpp
|
||||
backend/spirv/emit_spirv_integer.cpp
|
||||
backend/spirv/emit_spirv_logical.cpp
|
||||
backend/spirv/emit_spirv_memory.cpp
|
||||
@ -100,6 +101,7 @@ add_library(shader_recompiler STATIC
|
||||
frontend/maxwell/translate/impl/predicate_set_predicate.cpp
|
||||
frontend/maxwell/translate/impl/predicate_set_register.cpp
|
||||
frontend/maxwell/translate/impl/select_source_with_predicate.cpp
|
||||
frontend/maxwell/translate/impl/texture_sample.cpp
|
||||
frontend/maxwell/translate/translate.cpp
|
||||
frontend/maxwell/translate/translate.h
|
||||
ir_opt/collect_shader_info_pass.cpp
|
||||
@ -110,6 +112,7 @@ add_library(shader_recompiler STATIC
|
||||
ir_opt/lower_fp16_to_fp32.cpp
|
||||
ir_opt/passes.h
|
||||
ir_opt/ssa_rewrite_pass.cpp
|
||||
ir_opt/texture_pass.cpp
|
||||
ir_opt/verification_pass.cpp
|
||||
object_pool.h
|
||||
profile.h
|
||||
|
@ -12,6 +12,43 @@
|
||||
#include "shader_recompiler/backend/spirv/emit_context.h"
|
||||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
namespace {
|
||||
Id ImageType(EmitContext& ctx, const TextureDescriptor& desc) {
|
||||
const spv::ImageFormat format{spv::ImageFormat::Unknown};
|
||||
const Id type{ctx.F32[1]};
|
||||
switch (desc.type) {
|
||||
case TextureType::Color1D:
|
||||
return ctx.TypeImage(type, spv::Dim::Dim1D, false, false, false, 1, format);
|
||||
case TextureType::ColorArray1D:
|
||||
return ctx.TypeImage(type, spv::Dim::Dim1D, false, true, false, 1, format);
|
||||
case TextureType::Color2D:
|
||||
return ctx.TypeImage(type, spv::Dim::Dim2D, false, false, false, 1, format);
|
||||
case TextureType::ColorArray2D:
|
||||
return ctx.TypeImage(type, spv::Dim::Dim2D, false, true, false, 1, format);
|
||||
case TextureType::Color3D:
|
||||
return ctx.TypeImage(type, spv::Dim::Dim3D, false, false, false, 1, format);
|
||||
case TextureType::ColorCube:
|
||||
return ctx.TypeImage(type, spv::Dim::Cube, false, false, false, 1, format);
|
||||
case TextureType::ColorArrayCube:
|
||||
return ctx.TypeImage(type, spv::Dim::Cube, false, true, false, 1, format);
|
||||
case TextureType::Shadow1D:
|
||||
return ctx.TypeImage(type, spv::Dim::Dim1D, true, false, false, 1, format);
|
||||
case TextureType::ShadowArray1D:
|
||||
return ctx.TypeImage(type, spv::Dim::Dim1D, true, true, false, 1, format);
|
||||
case TextureType::Shadow2D:
|
||||
return ctx.TypeImage(type, spv::Dim::Dim2D, true, false, false, 1, format);
|
||||
case TextureType::ShadowArray2D:
|
||||
return ctx.TypeImage(type, spv::Dim::Dim2D, true, true, false, 1, format);
|
||||
case TextureType::Shadow3D:
|
||||
return ctx.TypeImage(type, spv::Dim::Dim3D, true, false, false, 1, format);
|
||||
case TextureType::ShadowCube:
|
||||
return ctx.TypeImage(type, spv::Dim::Cube, true, false, false, 1, format);
|
||||
case TextureType::ShadowArrayCube:
|
||||
return ctx.TypeImage(type, spv::Dim::Cube, false, true, false, 1, format);
|
||||
}
|
||||
throw InvalidArgument("Invalid texture type {}", desc.type);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void VectorTypes::Define(Sirit::Module& sirit_ctx, Id base_type, std::string_view name) {
|
||||
defs[0] = sirit_ctx.Name(base_type, name);
|
||||
@ -35,6 +72,7 @@ EmitContext::EmitContext(const Profile& profile_, IR::Program& program)
|
||||
u32 binding{};
|
||||
DefineConstantBuffers(program.info, binding);
|
||||
DefineStorageBuffers(program.info, binding);
|
||||
DefineTextures(program.info, binding);
|
||||
|
||||
DefineLabels(program);
|
||||
}
|
||||
@ -46,6 +84,10 @@ Id EmitContext::Def(const IR::Value& value) {
|
||||
return value.Inst()->Definition<Id>();
|
||||
}
|
||||
switch (value.Type()) {
|
||||
case IR::Type::Void:
|
||||
// Void instructions are used for optional arguments (e.g. texture offsets)
|
||||
// They are not meant to be used in the SPIR-V module
|
||||
return Id{};
|
||||
case IR::Type::U1:
|
||||
return value.U1() ? true_value : false_value;
|
||||
case IR::Type::U32:
|
||||
@ -122,7 +164,7 @@ void EmitContext::DefineConstantBuffers(const Info& info, u32& binding) {
|
||||
uniform_u32 = TypePointer(spv::StorageClass::Uniform, U32[1]);
|
||||
|
||||
u32 index{};
|
||||
for (const Info::ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) {
|
||||
for (const ConstantBufferDescriptor& desc : info.constant_buffer_descriptors) {
|
||||
const Id id{AddGlobalVariable(uniform_type, spv::StorageClass::Uniform)};
|
||||
Decorate(id, spv::Decoration::Binding, binding);
|
||||
Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
||||
@ -152,7 +194,7 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) {
|
||||
storage_u32 = TypePointer(spv::StorageClass::StorageBuffer, U32[1]);
|
||||
|
||||
u32 index{};
|
||||
for (const Info::StorageBufferDescriptor& desc : info.storage_buffers_descriptors) {
|
||||
for (const StorageBufferDescriptor& desc : info.storage_buffers_descriptors) {
|
||||
const Id id{AddGlobalVariable(storage_type, spv::StorageClass::StorageBuffer)};
|
||||
Decorate(id, spv::Decoration::Binding, binding);
|
||||
Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
||||
@ -163,6 +205,29 @@ void EmitContext::DefineStorageBuffers(const Info& info, u32& binding) {
|
||||
}
|
||||
}
|
||||
|
||||
void EmitContext::DefineTextures(const Info& info, u32& binding) {
|
||||
textures.reserve(info.texture_descriptors.size());
|
||||
for (const TextureDescriptor& desc : info.texture_descriptors) {
|
||||
if (desc.count != 1) {
|
||||
throw NotImplementedException("Array of textures");
|
||||
}
|
||||
const Id type{TypeSampledImage(ImageType(*this, desc))};
|
||||
const Id pointer_type{TypePointer(spv::StorageClass::UniformConstant, type)};
|
||||
const Id id{AddGlobalVariable(pointer_type, spv::StorageClass::UniformConstant)};
|
||||
Decorate(id, spv::Decoration::Binding, binding);
|
||||
Decorate(id, spv::Decoration::DescriptorSet, 0U);
|
||||
Name(id, fmt::format("tex{}_{:02x}", desc.cbuf_index, desc.cbuf_offset));
|
||||
for (u32 index = 0; index < desc.count; ++index) {
|
||||
// TODO: Pass count info
|
||||
textures.push_back(TextureDefinition{
|
||||
.id{id},
|
||||
.type{type},
|
||||
});
|
||||
}
|
||||
binding += desc.count;
|
||||
}
|
||||
}
|
||||
|
||||
void EmitContext::DefineLabels(IR::Program& program) {
|
||||
for (const IR::Function& function : program.functions) {
|
||||
for (IR::Block* const block : function.blocks) {
|
||||
|
@ -29,6 +29,11 @@ private:
|
||||
std::array<Id, 4> defs{};
|
||||
};
|
||||
|
||||
struct TextureDefinition {
|
||||
Id id;
|
||||
Id type;
|
||||
};
|
||||
|
||||
class EmitContext final : public Sirit::Module {
|
||||
public:
|
||||
explicit EmitContext(const Profile& profile, IR::Program& program);
|
||||
@ -56,6 +61,7 @@ public:
|
||||
|
||||
std::array<Id, Info::MAX_CBUFS> cbufs{};
|
||||
std::array<Id, Info::MAX_SSBOS> ssbos{};
|
||||
std::vector<TextureDefinition> textures;
|
||||
|
||||
Id workgroup_id{};
|
||||
Id local_invocation_id{};
|
||||
@ -66,6 +72,7 @@ private:
|
||||
void DefineSpecialVariables(const Info& info);
|
||||
void DefineConstantBuffers(const Info& info, u32& binding);
|
||||
void DefineStorageBuffers(const Info& info, u32& binding);
|
||||
void DefineTextures(const Info& info, u32& binding);
|
||||
void DefineLabels(IR::Program& program);
|
||||
};
|
||||
|
||||
|
@ -221,6 +221,14 @@ std::vector<u32> EmitSPIRV(const Profile& profile, Environment& env, IR::Program
|
||||
workgroup_size[2]);
|
||||
|
||||
SetupDenormControl(profile, program, ctx, func);
|
||||
if (info.uses_sampled_1d) {
|
||||
ctx.AddCapability(spv::Capability::Sampled1D);
|
||||
}
|
||||
if (info.uses_sparse_residency) {
|
||||
ctx.AddCapability(spv::Capability::SparseResidency);
|
||||
}
|
||||
// TODO: Track this usage
|
||||
ctx.AddCapability(spv::Capability::ImageGatherExtended);
|
||||
|
||||
return ctx.Assemble();
|
||||
}
|
||||
@ -259,4 +267,8 @@ void EmitGetOverflowFromOp(EmitContext&) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
}
|
||||
|
||||
void EmitGetSparseFromOp(EmitContext&) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
@ -83,7 +83,8 @@ void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Va
|
||||
Id value);
|
||||
void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
void EmitWriteStorage128(EmitContext& ctx);
|
||||
void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value);
|
||||
Id EmitCompositeConstructU32x2(EmitContext& ctx, Id e1, Id e2);
|
||||
Id EmitCompositeConstructU32x3(EmitContext& ctx, Id e1, Id e2, Id e3);
|
||||
Id EmitCompositeConstructU32x4(EmitContext& ctx, Id e1, Id e2, Id e3, Id e4);
|
||||
@ -145,6 +146,7 @@ void EmitGetZeroFromOp(EmitContext& ctx);
|
||||
void EmitGetSignFromOp(EmitContext& ctx);
|
||||
void EmitGetCarryFromOp(EmitContext& ctx);
|
||||
void EmitGetOverflowFromOp(EmitContext& ctx);
|
||||
void EmitGetSparseFromOp(EmitContext& ctx);
|
||||
Id EmitFPAbs16(EmitContext& ctx, Id value);
|
||||
Id EmitFPAbs32(EmitContext& ctx, Id value);
|
||||
Id EmitFPAbs64(EmitContext& ctx, Id value);
|
||||
@ -291,5 +293,33 @@ Id EmitConvertF16F32(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF32F16(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF32F64(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF64F32(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF16S32(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF16S64(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF16U32(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF16U64(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF32S32(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF32S64(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF32U32(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF32U64(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF64S32(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF64S64(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF64U32(EmitContext& ctx, Id value);
|
||||
Id EmitConvertF64U64(EmitContext& ctx, Id value);
|
||||
Id EmitBindlessImageSampleImplicitLod(EmitContext&);
|
||||
Id EmitBindlessImageSampleExplicitLod(EmitContext&);
|
||||
Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&);
|
||||
Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&);
|
||||
Id EmitBoundImageSampleImplicitLod(EmitContext&);
|
||||
Id EmitBoundImageSampleExplicitLod(EmitContext&);
|
||||
Id EmitBoundImageSampleDrefImplicitLod(EmitContext&);
|
||||
Id EmitBoundImageSampleDrefExplicitLod(EmitContext&);
|
||||
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||
Id bias_lc, Id offset);
|
||||
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||
Id lod_lc, Id offset);
|
||||
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
|
||||
Id coords, Id dref, Id bias_lc, Id offset);
|
||||
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
|
||||
Id coords, Id dref, Id lod_lc, Id offset);
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
@ -102,4 +102,52 @@ Id EmitConvertF64F32(EmitContext& ctx, Id value) {
|
||||
return ctx.OpFConvert(ctx.F64[1], value);
|
||||
}
|
||||
|
||||
Id EmitConvertF16S32(EmitContext& ctx, Id value) {
|
||||
return ctx.OpConvertSToF(ctx.F16[1], value);
|
||||
}
|
||||
|
||||
Id EmitConvertF16S64(EmitContext& ctx, Id value) {
|
||||
return ctx.OpConvertSToF(ctx.F16[1], value);
|
||||
}
|
||||
|
||||
Id EmitConvertF16U32(EmitContext& ctx, Id value) {
|
||||
return ctx.OpConvertUToF(ctx.F16[1], value);
|
||||
}
|
||||
|
||||
Id EmitConvertF16U64(EmitContext& ctx, Id value) {
|
||||
return ctx.OpConvertUToF(ctx.F16[1], value);
|
||||
}
|
||||
|
||||
Id EmitConvertF32S32(EmitContext& ctx, Id value) {
|
||||
return ctx.OpConvertSToF(ctx.F32[1], value);
|
||||
}
|
||||
|
||||
Id EmitConvertF32S64(EmitContext& ctx, Id value) {
|
||||
return ctx.OpConvertSToF(ctx.F32[1], value);
|
||||
}
|
||||
|
||||
Id EmitConvertF32U32(EmitContext& ctx, Id value) {
|
||||
return ctx.OpConvertUToF(ctx.F32[1], value);
|
||||
}
|
||||
|
||||
Id EmitConvertF32U64(EmitContext& ctx, Id value) {
|
||||
return ctx.OpConvertUToF(ctx.F32[1], value);
|
||||
}
|
||||
|
||||
Id EmitConvertF64S32(EmitContext& ctx, Id value) {
|
||||
return ctx.OpConvertSToF(ctx.F64[1], value);
|
||||
}
|
||||
|
||||
Id EmitConvertF64S64(EmitContext& ctx, Id value) {
|
||||
return ctx.OpConvertSToF(ctx.F64[1], value);
|
||||
}
|
||||
|
||||
Id EmitConvertF64U32(EmitContext& ctx, Id value) {
|
||||
return ctx.OpConvertUToF(ctx.F64[1], value);
|
||||
}
|
||||
|
||||
Id EmitConvertF64U64(EmitContext& ctx, Id value) {
|
||||
return ctx.OpConvertUToF(ctx.F64[1], value);
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
146
src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
Normal file
146
src/shader_recompiler/backend/spirv/emit_spirv_image.cpp
Normal file
@ -0,0 +1,146 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
#include "shader_recompiler/backend/spirv/emit_spirv.h"
|
||||
#include "shader_recompiler/frontend/ir/modifiers.h"
|
||||
|
||||
namespace Shader::Backend::SPIRV {
|
||||
namespace {
|
||||
class ImageOperands {
|
||||
public:
|
||||
explicit ImageOperands(EmitContext& ctx, bool has_bias, bool has_lod, bool has_lod_clamp,
|
||||
Id lod, Id offset) {
|
||||
if (has_bias) {
|
||||
const Id bias{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod};
|
||||
Add(spv::ImageOperandsMask::Bias, bias);
|
||||
}
|
||||
if (has_lod) {
|
||||
const Id lod_value{has_lod_clamp ? ctx.OpCompositeExtract(ctx.F32[1], lod, 0) : lod};
|
||||
Add(spv::ImageOperandsMask::Lod, lod_value);
|
||||
}
|
||||
if (Sirit::ValidId(offset)) {
|
||||
Add(spv::ImageOperandsMask::Offset, offset);
|
||||
}
|
||||
if (has_lod_clamp) {
|
||||
const Id lod_clamp{has_bias ? ctx.OpCompositeExtract(ctx.F32[1], lod, 1) : lod};
|
||||
Add(spv::ImageOperandsMask::MinLod, lod_clamp);
|
||||
}
|
||||
}
|
||||
|
||||
void Add(spv::ImageOperandsMask new_mask, Id value) {
|
||||
mask = static_cast<spv::ImageOperandsMask>(static_cast<unsigned>(mask) |
|
||||
static_cast<unsigned>(new_mask));
|
||||
operands.push_back(value);
|
||||
}
|
||||
|
||||
std::span<const Id> Span() const noexcept {
|
||||
return std::span{operands.data(), operands.size()};
|
||||
}
|
||||
|
||||
spv::ImageOperandsMask Mask() const noexcept {
|
||||
return mask;
|
||||
}
|
||||
|
||||
private:
|
||||
boost::container::static_vector<Id, 3> operands;
|
||||
spv::ImageOperandsMask mask{};
|
||||
};
|
||||
|
||||
Id Texture(EmitContext& ctx, const IR::Value& index) {
|
||||
if (index.IsImmediate()) {
|
||||
const TextureDefinition def{ctx.textures.at(index.U32())};
|
||||
return ctx.OpLoad(def.type, def.id);
|
||||
}
|
||||
throw NotImplementedException("Indirect texture sample");
|
||||
}
|
||||
|
||||
template <typename MethodPtrType, typename... Args>
|
||||
Id Emit(MethodPtrType sparse_ptr, MethodPtrType non_sparse_ptr, EmitContext& ctx, IR::Inst* inst,
|
||||
Id result_type, Args&&... args) {
|
||||
IR::Inst* const sparse{inst->GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp)};
|
||||
if (!sparse) {
|
||||
return (ctx.*non_sparse_ptr)(result_type, std::forward<Args>(args)...);
|
||||
}
|
||||
const Id struct_type{ctx.TypeStruct(ctx.U32[1], result_type)};
|
||||
const Id sample{(ctx.*sparse_ptr)(struct_type, std::forward<Args>(args)...)};
|
||||
const Id resident_code{ctx.OpCompositeExtract(ctx.U32[1], sample, 0U)};
|
||||
sparse->SetDefinition(ctx.OpImageSparseTexelsResident(ctx.U1, resident_code));
|
||||
sparse->Invalidate();
|
||||
return ctx.OpCompositeExtract(result_type, sample, 1U);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
Id EmitBindlessImageSampleImplicitLod(EmitContext&) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
}
|
||||
|
||||
Id EmitBindlessImageSampleExplicitLod(EmitContext&) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
}
|
||||
|
||||
Id EmitBindlessImageSampleDrefImplicitLod(EmitContext&) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
}
|
||||
|
||||
Id EmitBindlessImageSampleDrefExplicitLod(EmitContext&) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
}
|
||||
|
||||
Id EmitBoundImageSampleImplicitLod(EmitContext&) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
}
|
||||
|
||||
Id EmitBoundImageSampleExplicitLod(EmitContext&) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
}
|
||||
|
||||
Id EmitBoundImageSampleDrefImplicitLod(EmitContext&) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
}
|
||||
|
||||
Id EmitBoundImageSampleDrefExplicitLod(EmitContext&) {
|
||||
throw LogicError("Unreachable instruction");
|
||||
}
|
||||
|
||||
Id EmitImageSampleImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||
Id bias_lc, Id offset) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias_lc,
|
||||
offset);
|
||||
return Emit(&EmitContext::OpImageSparseSampleImplicitLod,
|
||||
&EmitContext::OpImageSampleImplicitLod, ctx, inst, ctx.F32[4], Texture(ctx, index),
|
||||
coords, operands.Mask(), operands.Span());
|
||||
}
|
||||
|
||||
Id EmitImageSampleExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index, Id coords,
|
||||
Id lod_lc, Id offset) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod_lc, offset);
|
||||
return Emit(&EmitContext::OpImageSparseSampleExplicitLod,
|
||||
&EmitContext::OpImageSampleExplicitLod, ctx, inst, ctx.F32[4], Texture(ctx, index),
|
||||
coords, operands.Mask(), operands.Span());
|
||||
}
|
||||
|
||||
Id EmitImageSampleDrefImplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
|
||||
Id coords, Id dref, Id bias_lc, Id offset) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
const ImageOperands operands(ctx, info.has_bias != 0, false, info.has_lod_clamp != 0, bias_lc,
|
||||
offset);
|
||||
return Emit(&EmitContext::OpImageSparseSampleDrefImplicitLod,
|
||||
&EmitContext::OpImageSampleDrefImplicitLod, ctx, inst, ctx.F32[1],
|
||||
Texture(ctx, index), coords, dref, operands.Mask(), operands.Span());
|
||||
}
|
||||
|
||||
Id EmitImageSampleDrefExplicitLod(EmitContext& ctx, IR::Inst* inst, const IR::Value& index,
|
||||
Id coords, Id dref, Id lod_lc, Id offset) {
|
||||
const auto info{inst->Flags<IR::TextureInstInfo>()};
|
||||
const ImageOperands operands(ctx, false, true, info.has_lod_clamp != 0, lod_lc, offset);
|
||||
return Emit(&EmitContext::OpImageSparseSampleDrefExplicitLod,
|
||||
&EmitContext::OpImageSampleDrefExplicitLod, ctx, inst, ctx.F32[1],
|
||||
Texture(ctx, index), coords, dref, operands.Mask(), operands.Span());
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
@ -154,8 +154,22 @@ void EmitWriteStorage64(EmitContext& ctx, const IR::Value& binding, const IR::Va
|
||||
ctx.OpStore(high_pointer, ctx.OpCompositeExtract(ctx.U32[1], value, 1U));
|
||||
}
|
||||
|
||||
void EmitWriteStorage128(EmitContext&) {
|
||||
throw NotImplementedException("SPIR-V Instruction");
|
||||
void EmitWriteStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset,
|
||||
Id value) {
|
||||
if (!binding.IsImmediate()) {
|
||||
throw NotImplementedException("Dynamic storage buffer indexing");
|
||||
}
|
||||
// TODO: Support reinterpreting bindings, guaranteed to be aligned
|
||||
const Id ssbo{ctx.ssbos[binding.U32()]};
|
||||
const Id base_index{StorageIndex(ctx, offset, sizeof(u32))};
|
||||
for (u32 element = 0; element < 4; ++element) {
|
||||
Id index = base_index;
|
||||
if (element > 0) {
|
||||
index = ctx.OpIAdd(ctx.U32[1], base_index, ctx.Constant(ctx.U32[1], element));
|
||||
}
|
||||
const Id pointer{ctx.OpAccessChain(ctx.storage_u32, ssbo, ctx.u32_zero_value, index)};
|
||||
ctx.OpStore(pointer, ctx.OpCompositeExtract(ctx.U32[1], value, element));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Backend::SPIRV
|
||||
|
@ -12,6 +12,8 @@ public:
|
||||
|
||||
[[nodiscard]] virtual u64 ReadInstruction(u32 address) = 0;
|
||||
|
||||
[[nodiscard]] virtual u32 TextureBoundBuffer() = 0;
|
||||
|
||||
[[nodiscard]] virtual std::array<u32, 3> WorkgroupSize() = 0;
|
||||
};
|
||||
|
||||
|
@ -39,6 +39,10 @@ u64 FileEnvironment::ReadInstruction(u32 offset) {
|
||||
return data[offset / 8];
|
||||
}
|
||||
|
||||
u32 FileEnvironment::TextureBoundBuffer() {
|
||||
throw NotImplementedException("Texture bound buffer serialization");
|
||||
}
|
||||
|
||||
std::array<u32, 3> FileEnvironment::WorkgroupSize() {
|
||||
return {1, 1, 1};
|
||||
}
|
||||
|
@ -3,7 +3,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "environment.h"
|
||||
#include "shader_recompiler/environment.h"
|
||||
|
||||
namespace Shader {
|
||||
|
||||
@ -14,6 +14,8 @@ public:
|
||||
|
||||
u64 ReadInstruction(u32 offset) override;
|
||||
|
||||
u32 TextureBoundBuffer() override;
|
||||
|
||||
std::array<u32, 3> WorkgroupSize() override;
|
||||
|
||||
private:
|
||||
|
@ -7,11 +7,24 @@
|
||||
#include "shader_recompiler/frontend/ir/value.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
[[noreturn]] static void ThrowInvalidType(Type type) {
|
||||
namespace {
|
||||
[[noreturn]] void ThrowInvalidType(Type type) {
|
||||
throw InvalidArgument("Invalid type {}", type);
|
||||
}
|
||||
|
||||
Value MakeLodClampPair(IREmitter& ir, const F32& bias_lod, const F32& lod_clamp) {
|
||||
if (!bias_lod.IsEmpty() && !lod_clamp.IsEmpty()) {
|
||||
return ir.CompositeConstruct(bias_lod, lod_clamp);
|
||||
} else if (!bias_lod.IsEmpty()) {
|
||||
return bias_lod;
|
||||
} else if (!lod_clamp.IsEmpty()) {
|
||||
return lod_clamp;
|
||||
} else {
|
||||
return Value{};
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
U1 IREmitter::Imm1(bool value) const {
|
||||
return U1{Value{value}};
|
||||
}
|
||||
@ -261,6 +274,10 @@ U1 IREmitter::GetOverflowFromOp(const Value& op) {
|
||||
return Inst<U1>(Opcode::GetOverflowFromOp, op);
|
||||
}
|
||||
|
||||
U1 IREmitter::GetSparseFromOp(const Value& op) {
|
||||
return Inst<U1>(Opcode::GetSparseFromOp, op);
|
||||
}
|
||||
|
||||
F16F32F64 IREmitter::FPAdd(const F16F32F64& a, const F16F32F64& b, FpControl control) {
|
||||
if (a.Type() != a.Type()) {
|
||||
throw InvalidArgument("Mismatching types {} and {}", a.Type(), b.Type());
|
||||
@ -1035,6 +1052,82 @@ U32U64 IREmitter::ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& v
|
||||
}
|
||||
}
|
||||
|
||||
F16F32F64 IREmitter::ConvertSToF(size_t bitsize, const U32U64& value) {
|
||||
switch (bitsize) {
|
||||
case 16:
|
||||
switch (value.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<F16>(Opcode::ConvertF16S32, value);
|
||||
case Type::U64:
|
||||
return Inst<F16>(Opcode::ConvertF16S64, value);
|
||||
default:
|
||||
ThrowInvalidType(value.Type());
|
||||
}
|
||||
case 32:
|
||||
switch (value.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<F32>(Opcode::ConvertF32S32, value);
|
||||
case Type::U64:
|
||||
return Inst<F32>(Opcode::ConvertF32S64, value);
|
||||
default:
|
||||
ThrowInvalidType(value.Type());
|
||||
}
|
||||
case 64:
|
||||
switch (value.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<F16>(Opcode::ConvertF64S32, value);
|
||||
case Type::U64:
|
||||
return Inst<F16>(Opcode::ConvertF64S64, value);
|
||||
default:
|
||||
ThrowInvalidType(value.Type());
|
||||
}
|
||||
default:
|
||||
throw InvalidArgument("Invalid destination bitsize {}", bitsize);
|
||||
}
|
||||
}
|
||||
|
||||
F16F32F64 IREmitter::ConvertUToF(size_t bitsize, const U32U64& value) {
|
||||
switch (bitsize) {
|
||||
case 16:
|
||||
switch (value.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<F16>(Opcode::ConvertF16U32, value);
|
||||
case Type::U64:
|
||||
return Inst<F16>(Opcode::ConvertF16U64, value);
|
||||
default:
|
||||
ThrowInvalidType(value.Type());
|
||||
}
|
||||
case 32:
|
||||
switch (value.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<F32>(Opcode::ConvertF32U32, value);
|
||||
case Type::U64:
|
||||
return Inst<F32>(Opcode::ConvertF32U64, value);
|
||||
default:
|
||||
ThrowInvalidType(value.Type());
|
||||
}
|
||||
case 64:
|
||||
switch (value.Type()) {
|
||||
case Type::U32:
|
||||
return Inst<F16>(Opcode::ConvertF64U32, value);
|
||||
case Type::U64:
|
||||
return Inst<F16>(Opcode::ConvertF64U64, value);
|
||||
default:
|
||||
ThrowInvalidType(value.Type());
|
||||
}
|
||||
default:
|
||||
throw InvalidArgument("Invalid destination bitsize {}", bitsize);
|
||||
}
|
||||
}
|
||||
|
||||
F16F32F64 IREmitter::ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value) {
|
||||
if (is_signed) {
|
||||
return ConvertSToF(bitsize, value);
|
||||
} else {
|
||||
return ConvertUToF(bitsize, value);
|
||||
}
|
||||
}
|
||||
|
||||
U32U64 IREmitter::UConvert(size_t result_bitsize, const U32U64& value) {
|
||||
switch (result_bitsize) {
|
||||
case 32:
|
||||
@ -1107,4 +1200,40 @@ F16F32F64 IREmitter::FPConvert(size_t result_bitsize, const F16F32F64& value) {
|
||||
throw NotImplementedException("Conversion from {} to {} bits", value.Type(), result_bitsize);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageSampleImplicitLod(const Value& handle, const Value& coords, const F32& bias,
|
||||
const Value& offset, const F32& lod_clamp,
|
||||
TextureInstInfo info) {
|
||||
const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)};
|
||||
const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleImplicitLod
|
||||
: Opcode::BindlessImageSampleImplicitLod};
|
||||
return Inst(op, Flags{info}, handle, coords, bias_lc, offset);
|
||||
}
|
||||
|
||||
Value IREmitter::ImageSampleExplicitLod(const Value& handle, const Value& coords, const F32& lod,
|
||||
const Value& offset, const F32& lod_clamp,
|
||||
TextureInstInfo info) {
|
||||
const Value lod_lc{MakeLodClampPair(*this, lod, lod_clamp)};
|
||||
const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleExplicitLod
|
||||
: Opcode::BindlessImageSampleExplicitLod};
|
||||
return Inst(op, Flags{info}, handle, coords, lod_lc, offset);
|
||||
}
|
||||
|
||||
F32 IREmitter::ImageSampleDrefImplicitLod(const Value& handle, const Value& coords, const F32& dref,
|
||||
const F32& bias, const Value& offset,
|
||||
const F32& lod_clamp, TextureInstInfo info) {
|
||||
const Value bias_lc{MakeLodClampPair(*this, bias, lod_clamp)};
|
||||
const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefImplicitLod
|
||||
: Opcode::BindlessImageSampleDrefImplicitLod};
|
||||
return Inst<F32>(op, Flags{info}, handle, coords, dref, bias_lc, offset);
|
||||
}
|
||||
|
||||
F32 IREmitter::ImageSampleDrefExplicitLod(const Value& handle, const Value& coords, const F32& dref,
|
||||
const F32& lod, const Value& offset, const F32& lod_clamp,
|
||||
TextureInstInfo info) {
|
||||
const Value lod_lc{MakeLodClampPair(*this, lod, lod_clamp)};
|
||||
const Opcode op{handle.IsImmediate() ? Opcode::BoundImageSampleDrefExplicitLod
|
||||
: Opcode::BindlessImageSampleDrefExplicitLod};
|
||||
return Inst<F32>(op, Flags{info}, handle, coords, dref, lod_lc, offset);
|
||||
}
|
||||
|
||||
} // namespace Shader::IR
|
||||
|
@ -91,6 +91,7 @@ public:
|
||||
[[nodiscard]] U1 GetSignFromOp(const Value& op);
|
||||
[[nodiscard]] U1 GetCarryFromOp(const Value& op);
|
||||
[[nodiscard]] U1 GetOverflowFromOp(const Value& op);
|
||||
[[nodiscard]] U1 GetSparseFromOp(const Value& op);
|
||||
|
||||
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2);
|
||||
[[nodiscard]] Value CompositeConstruct(const Value& e1, const Value& e2, const Value& e3);
|
||||
@ -159,7 +160,7 @@ public:
|
||||
[[nodiscard]] U32 BitFieldInsert(const U32& base, const U32& insert, const U32& offset,
|
||||
const U32& count);
|
||||
[[nodiscard]] U32 BitFieldExtract(const U32& base, const U32& offset, const U32& count,
|
||||
bool is_signed);
|
||||
bool is_signed = false);
|
||||
[[nodiscard]] U32 BitReverse(const U32& value);
|
||||
[[nodiscard]] U32 BitCount(const U32& value);
|
||||
[[nodiscard]] U32 BitwiseNot(const U32& value);
|
||||
@ -186,10 +187,28 @@ public:
|
||||
[[nodiscard]] U32U64 ConvertFToS(size_t bitsize, const F16F32F64& value);
|
||||
[[nodiscard]] U32U64 ConvertFToU(size_t bitsize, const F16F32F64& value);
|
||||
[[nodiscard]] U32U64 ConvertFToI(size_t bitsize, bool is_signed, const F16F32F64& value);
|
||||
[[nodiscard]] F16F32F64 ConvertSToF(size_t bitsize, const U32U64& value);
|
||||
[[nodiscard]] F16F32F64 ConvertUToF(size_t bitsize, const U32U64& value);
|
||||
[[nodiscard]] F16F32F64 ConvertIToF(size_t bitsize, bool is_signed, const U32U64& value);
|
||||
|
||||
[[nodiscard]] U32U64 UConvert(size_t result_bitsize, const U32U64& value);
|
||||
[[nodiscard]] F16F32F64 FPConvert(size_t result_bitsize, const F16F32F64& value);
|
||||
|
||||
[[nodiscard]] Value ImageSampleImplicitLod(const Value& handle, const Value& coords,
|
||||
const F32& bias, const Value& offset,
|
||||
const F32& lod_clamp, TextureInstInfo info);
|
||||
[[nodiscard]] Value ImageSampleExplicitLod(const Value& handle, const Value& coords,
|
||||
const F32& lod, const Value& offset,
|
||||
const F32& lod_clamp, TextureInstInfo info);
|
||||
[[nodiscard]] F32 ImageSampleDrefImplicitLod(const Value& handle, const Value& coords,
|
||||
const F32& dref, const F32& bias,
|
||||
const Value& offset, const F32& lod_clamp,
|
||||
TextureInstInfo info);
|
||||
[[nodiscard]] F32 ImageSampleDrefExplicitLod(const Value& handle, const Value& coords,
|
||||
const F32& dref, const F32& lod,
|
||||
const Value& offset, const F32& lod_clamp,
|
||||
TextureInstInfo info);
|
||||
|
||||
private:
|
||||
IR::Block::iterator insertion_point;
|
||||
|
||||
|
@ -10,26 +10,27 @@
|
||||
#include "shader_recompiler/frontend/ir/type.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
static void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) {
|
||||
namespace {
|
||||
void CheckPseudoInstruction(IR::Inst* inst, IR::Opcode opcode) {
|
||||
if (inst && inst->Opcode() != opcode) {
|
||||
throw LogicError("Invalid pseudo-instruction");
|
||||
}
|
||||
}
|
||||
|
||||
static void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) {
|
||||
void SetPseudoInstruction(IR::Inst*& dest_inst, IR::Inst* pseudo_inst) {
|
||||
if (dest_inst) {
|
||||
throw LogicError("Only one of each type of pseudo-op allowed");
|
||||
}
|
||||
dest_inst = pseudo_inst;
|
||||
}
|
||||
|
||||
static void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) {
|
||||
void RemovePseudoInstruction(IR::Inst*& inst, IR::Opcode expected_opcode) {
|
||||
if (inst->Opcode() != expected_opcode) {
|
||||
throw LogicError("Undoing use of invalid pseudo-op");
|
||||
}
|
||||
inst = nullptr;
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
Inst::Inst(IR::Opcode op_, u32 flags_) noexcept : op{op_}, flags{flags_} {
|
||||
if (op == Opcode::Phi) {
|
||||
@ -82,6 +83,7 @@ bool Inst::IsPseudoInstruction() const noexcept {
|
||||
case Opcode::GetSignFromOp:
|
||||
case Opcode::GetCarryFromOp:
|
||||
case Opcode::GetOverflowFromOp:
|
||||
case Opcode::GetSparseFromOp:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
@ -96,25 +98,26 @@ bool Inst::AreAllArgsImmediates() const {
|
||||
[](const IR::Value& value) { return value.IsImmediate(); });
|
||||
}
|
||||
|
||||
bool Inst::HasAssociatedPseudoOperation() const noexcept {
|
||||
return zero_inst || sign_inst || carry_inst || overflow_inst;
|
||||
}
|
||||
|
||||
Inst* Inst::GetAssociatedPseudoOperation(IR::Opcode opcode) {
|
||||
// This is faster than doing a search through the block.
|
||||
if (!associated_insts) {
|
||||
return nullptr;
|
||||
}
|
||||
switch (opcode) {
|
||||
case Opcode::GetZeroFromOp:
|
||||
CheckPseudoInstruction(zero_inst, Opcode::GetZeroFromOp);
|
||||
return zero_inst;
|
||||
CheckPseudoInstruction(associated_insts->zero_inst, Opcode::GetZeroFromOp);
|
||||
return associated_insts->zero_inst;
|
||||
case Opcode::GetSignFromOp:
|
||||
CheckPseudoInstruction(sign_inst, Opcode::GetSignFromOp);
|
||||
return sign_inst;
|
||||
CheckPseudoInstruction(associated_insts->sign_inst, Opcode::GetSignFromOp);
|
||||
return associated_insts->sign_inst;
|
||||
case Opcode::GetCarryFromOp:
|
||||
CheckPseudoInstruction(carry_inst, Opcode::GetCarryFromOp);
|
||||
return carry_inst;
|
||||
CheckPseudoInstruction(associated_insts->carry_inst, Opcode::GetCarryFromOp);
|
||||
return associated_insts->carry_inst;
|
||||
case Opcode::GetOverflowFromOp:
|
||||
CheckPseudoInstruction(overflow_inst, Opcode::GetOverflowFromOp);
|
||||
return overflow_inst;
|
||||
CheckPseudoInstruction(associated_insts->overflow_inst, Opcode::GetOverflowFromOp);
|
||||
return associated_insts->overflow_inst;
|
||||
case Opcode::GetSparseFromOp:
|
||||
CheckPseudoInstruction(associated_insts->sparse_inst, Opcode::GetSparseFromOp);
|
||||
return associated_insts->sparse_inst;
|
||||
default:
|
||||
throw InvalidArgument("{} is not a pseudo-instruction", opcode);
|
||||
}
|
||||
@ -220,22 +223,37 @@ void Inst::ReplaceOpcode(IR::Opcode opcode) {
|
||||
op = opcode;
|
||||
}
|
||||
|
||||
void AllocAssociatedInsts(std::unique_ptr<AssociatedInsts>& associated_insts) {
|
||||
if (!associated_insts) {
|
||||
associated_insts = std::make_unique<AssociatedInsts>();
|
||||
}
|
||||
}
|
||||
|
||||
void Inst::Use(const Value& value) {
|
||||
Inst* const inst{value.Inst()};
|
||||
++inst->use_count;
|
||||
|
||||
std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts};
|
||||
switch (op) {
|
||||
case Opcode::GetZeroFromOp:
|
||||
SetPseudoInstruction(inst->zero_inst, this);
|
||||
AllocAssociatedInsts(assoc_inst);
|
||||
SetPseudoInstruction(assoc_inst->zero_inst, this);
|
||||
break;
|
||||
case Opcode::GetSignFromOp:
|
||||
SetPseudoInstruction(inst->sign_inst, this);
|
||||
AllocAssociatedInsts(assoc_inst);
|
||||
SetPseudoInstruction(assoc_inst->sign_inst, this);
|
||||
break;
|
||||
case Opcode::GetCarryFromOp:
|
||||
SetPseudoInstruction(inst->carry_inst, this);
|
||||
AllocAssociatedInsts(assoc_inst);
|
||||
SetPseudoInstruction(assoc_inst->carry_inst, this);
|
||||
break;
|
||||
case Opcode::GetOverflowFromOp:
|
||||
SetPseudoInstruction(inst->overflow_inst, this);
|
||||
AllocAssociatedInsts(assoc_inst);
|
||||
SetPseudoInstruction(assoc_inst->overflow_inst, this);
|
||||
break;
|
||||
case Opcode::GetSparseFromOp:
|
||||
AllocAssociatedInsts(assoc_inst);
|
||||
SetPseudoInstruction(assoc_inst->sparse_inst, this);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@ -246,18 +264,23 @@ void Inst::UndoUse(const Value& value) {
|
||||
Inst* const inst{value.Inst()};
|
||||
--inst->use_count;
|
||||
|
||||
std::unique_ptr<AssociatedInsts>& assoc_inst{inst->associated_insts};
|
||||
switch (op) {
|
||||
case Opcode::GetZeroFromOp:
|
||||
RemovePseudoInstruction(inst->zero_inst, Opcode::GetZeroFromOp);
|
||||
AllocAssociatedInsts(assoc_inst);
|
||||
RemovePseudoInstruction(assoc_inst->zero_inst, Opcode::GetZeroFromOp);
|
||||
break;
|
||||
case Opcode::GetSignFromOp:
|
||||
RemovePseudoInstruction(inst->sign_inst, Opcode::GetSignFromOp);
|
||||
AllocAssociatedInsts(assoc_inst);
|
||||
RemovePseudoInstruction(assoc_inst->sign_inst, Opcode::GetSignFromOp);
|
||||
break;
|
||||
case Opcode::GetCarryFromOp:
|
||||
RemovePseudoInstruction(inst->carry_inst, Opcode::GetCarryFromOp);
|
||||
AllocAssociatedInsts(assoc_inst);
|
||||
RemovePseudoInstruction(assoc_inst->carry_inst, Opcode::GetCarryFromOp);
|
||||
break;
|
||||
case Opcode::GetOverflowFromOp:
|
||||
RemovePseudoInstruction(inst->overflow_inst, Opcode::GetOverflowFromOp);
|
||||
AllocAssociatedInsts(assoc_inst);
|
||||
RemovePseudoInstruction(assoc_inst->overflow_inst, Opcode::GetOverflowFromOp);
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
@ -22,7 +22,7 @@ namespace Shader::IR {
|
||||
|
||||
class Block;
|
||||
|
||||
constexpr size_t MAX_ARG_COUNT = 4;
|
||||
struct AssociatedInsts;
|
||||
|
||||
class Inst : public boost::intrusive::list_base_hook<> {
|
||||
public:
|
||||
@ -50,6 +50,11 @@ public:
|
||||
return op;
|
||||
}
|
||||
|
||||
/// Determines if there is a pseudo-operation associated with this instruction.
|
||||
[[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept {
|
||||
return associated_insts != nullptr;
|
||||
}
|
||||
|
||||
/// Determines whether or not this instruction may have side effects.
|
||||
[[nodiscard]] bool MayHaveSideEffects() const noexcept;
|
||||
|
||||
@ -60,8 +65,6 @@ public:
|
||||
/// Determines if all arguments of this instruction are immediates.
|
||||
[[nodiscard]] bool AreAllArgsImmediates() const;
|
||||
|
||||
/// Determines if there is a pseudo-operation associated with this instruction.
|
||||
[[nodiscard]] bool HasAssociatedPseudoOperation() const noexcept;
|
||||
/// Gets a pseudo-operation associated with this instruction
|
||||
[[nodiscard]] Inst* GetAssociatedPseudoOperation(IR::Opcode opcode);
|
||||
|
||||
@ -122,14 +125,21 @@ private:
|
||||
u32 definition{};
|
||||
union {
|
||||
NonTriviallyDummy dummy{};
|
||||
std::array<Value, MAX_ARG_COUNT> args;
|
||||
std::vector<std::pair<Block*, Value>> phi_args;
|
||||
std::array<Value, 5> args;
|
||||
};
|
||||
std::unique_ptr<AssociatedInsts> associated_insts;
|
||||
};
|
||||
static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased");
|
||||
|
||||
struct AssociatedInsts {
|
||||
union {
|
||||
Inst* sparse_inst;
|
||||
Inst* zero_inst{};
|
||||
};
|
||||
Inst* zero_inst{};
|
||||
Inst* sign_inst{};
|
||||
Inst* carry_inst{};
|
||||
Inst* overflow_inst{};
|
||||
};
|
||||
static_assert(sizeof(Inst) <= 128, "Inst size unintentionally increased its size");
|
||||
|
||||
} // namespace Shader::IR
|
||||
|
@ -4,7 +4,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/shader_info.h"
|
||||
|
||||
namespace Shader::IR {
|
||||
|
||||
@ -30,4 +32,12 @@ struct FpControl {
|
||||
};
|
||||
static_assert(sizeof(FpControl) <= sizeof(u32));
|
||||
|
||||
union TextureInstInfo {
|
||||
u32 raw;
|
||||
BitField<0, 8, TextureType> type;
|
||||
BitField<8, 1, u32> has_bias;
|
||||
BitField<16, 1, u32> has_lod_clamp;
|
||||
};
|
||||
static_assert(sizeof(TextureInstInfo) <= sizeof(u32));
|
||||
|
||||
} // namespace Shader::IR
|
||||
|
@ -14,7 +14,7 @@ namespace {
|
||||
struct OpcodeMeta {
|
||||
std::string_view name;
|
||||
Type type;
|
||||
std::array<Type, 4> arg_types;
|
||||
std::array<Type, 5> arg_types;
|
||||
};
|
||||
|
||||
using enum Type;
|
||||
|
@ -2,301 +2,330 @@
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, ...
|
||||
OPCODE(Phi, Opaque, )
|
||||
OPCODE(Identity, Opaque, Opaque, )
|
||||
OPCODE(Void, Void, )
|
||||
// opcode name, return type, arg1 type, arg2 type, arg3 type, arg4 type, arg4 type, ...
|
||||
OPCODE(Phi, Opaque, )
|
||||
OPCODE(Identity, Opaque, Opaque, )
|
||||
OPCODE(Void, Void, )
|
||||
|
||||
// Control flow
|
||||
OPCODE(Branch, Void, Label, )
|
||||
OPCODE(BranchConditional, Void, U1, Label, Label, )
|
||||
OPCODE(LoopMerge, Void, Label, Label, )
|
||||
OPCODE(SelectionMerge, Void, Label, )
|
||||
OPCODE(Return, Void, )
|
||||
OPCODE(Branch, Void, Label, )
|
||||
OPCODE(BranchConditional, Void, U1, Label, Label, )
|
||||
OPCODE(LoopMerge, Void, Label, Label, )
|
||||
OPCODE(SelectionMerge, Void, Label, )
|
||||
OPCODE(Return, Void, )
|
||||
|
||||
// Context getters/setters
|
||||
OPCODE(GetRegister, U32, Reg, )
|
||||
OPCODE(SetRegister, Void, Reg, U32, )
|
||||
OPCODE(GetPred, U1, Pred, )
|
||||
OPCODE(SetPred, Void, Pred, U1, )
|
||||
OPCODE(GetGotoVariable, U1, U32, )
|
||||
OPCODE(SetGotoVariable, Void, U32, U1, )
|
||||
OPCODE(GetCbuf, U32, U32, U32, )
|
||||
OPCODE(GetAttribute, U32, Attribute, )
|
||||
OPCODE(SetAttribute, Void, Attribute, U32, )
|
||||
OPCODE(GetAttributeIndexed, U32, U32, )
|
||||
OPCODE(SetAttributeIndexed, Void, U32, U32, )
|
||||
OPCODE(GetZFlag, U1, Void, )
|
||||
OPCODE(GetSFlag, U1, Void, )
|
||||
OPCODE(GetCFlag, U1, Void, )
|
||||
OPCODE(GetOFlag, U1, Void, )
|
||||
OPCODE(SetZFlag, Void, U1, )
|
||||
OPCODE(SetSFlag, Void, U1, )
|
||||
OPCODE(SetCFlag, Void, U1, )
|
||||
OPCODE(SetOFlag, Void, U1, )
|
||||
OPCODE(WorkgroupId, U32x3, )
|
||||
OPCODE(LocalInvocationId, U32x3, )
|
||||
OPCODE(GetRegister, U32, Reg, )
|
||||
OPCODE(SetRegister, Void, Reg, U32, )
|
||||
OPCODE(GetPred, U1, Pred, )
|
||||
OPCODE(SetPred, Void, Pred, U1, )
|
||||
OPCODE(GetGotoVariable, U1, U32, )
|
||||
OPCODE(SetGotoVariable, Void, U32, U1, )
|
||||
OPCODE(GetCbuf, U32, U32, U32, )
|
||||
OPCODE(GetAttribute, U32, Attribute, )
|
||||
OPCODE(SetAttribute, Void, Attribute, U32, )
|
||||
OPCODE(GetAttributeIndexed, U32, U32, )
|
||||
OPCODE(SetAttributeIndexed, Void, U32, U32, )
|
||||
OPCODE(GetZFlag, U1, Void, )
|
||||
OPCODE(GetSFlag, U1, Void, )
|
||||
OPCODE(GetCFlag, U1, Void, )
|
||||
OPCODE(GetOFlag, U1, Void, )
|
||||
OPCODE(SetZFlag, Void, U1, )
|
||||
OPCODE(SetSFlag, Void, U1, )
|
||||
OPCODE(SetCFlag, Void, U1, )
|
||||
OPCODE(SetOFlag, Void, U1, )
|
||||
OPCODE(WorkgroupId, U32x3, )
|
||||
OPCODE(LocalInvocationId, U32x3, )
|
||||
|
||||
// Undefined
|
||||
OPCODE(UndefU1, U1, )
|
||||
OPCODE(UndefU8, U8, )
|
||||
OPCODE(UndefU16, U16, )
|
||||
OPCODE(UndefU32, U32, )
|
||||
OPCODE(UndefU64, U64, )
|
||||
OPCODE(UndefU1, U1, )
|
||||
OPCODE(UndefU8, U8, )
|
||||
OPCODE(UndefU16, U16, )
|
||||
OPCODE(UndefU32, U32, )
|
||||
OPCODE(UndefU64, U64, )
|
||||
|
||||
// Memory operations
|
||||
OPCODE(LoadGlobalU8, U32, U64, )
|
||||
OPCODE(LoadGlobalS8, U32, U64, )
|
||||
OPCODE(LoadGlobalU16, U32, U64, )
|
||||
OPCODE(LoadGlobalS16, U32, U64, )
|
||||
OPCODE(LoadGlobal32, U32, U64, )
|
||||
OPCODE(LoadGlobal64, U32x2, U64, )
|
||||
OPCODE(LoadGlobal128, U32x4, U64, )
|
||||
OPCODE(WriteGlobalU8, Void, U64, U32, )
|
||||
OPCODE(WriteGlobalS8, Void, U64, U32, )
|
||||
OPCODE(WriteGlobalU16, Void, U64, U32, )
|
||||
OPCODE(WriteGlobalS16, Void, U64, U32, )
|
||||
OPCODE(WriteGlobal32, Void, U64, U32, )
|
||||
OPCODE(WriteGlobal64, Void, U64, U32x2, )
|
||||
OPCODE(WriteGlobal128, Void, U64, U32x4, )
|
||||
OPCODE(LoadGlobalU8, U32, U64, )
|
||||
OPCODE(LoadGlobalS8, U32, U64, )
|
||||
OPCODE(LoadGlobalU16, U32, U64, )
|
||||
OPCODE(LoadGlobalS16, U32, U64, )
|
||||
OPCODE(LoadGlobal32, U32, U64, )
|
||||
OPCODE(LoadGlobal64, U32x2, U64, )
|
||||
OPCODE(LoadGlobal128, U32x4, U64, )
|
||||
OPCODE(WriteGlobalU8, Void, U64, U32, )
|
||||
OPCODE(WriteGlobalS8, Void, U64, U32, )
|
||||
OPCODE(WriteGlobalU16, Void, U64, U32, )
|
||||
OPCODE(WriteGlobalS16, Void, U64, U32, )
|
||||
OPCODE(WriteGlobal32, Void, U64, U32, )
|
||||
OPCODE(WriteGlobal64, Void, U64, U32x2, )
|
||||
OPCODE(WriteGlobal128, Void, U64, U32x4, )
|
||||
|
||||
// Storage buffer operations
|
||||
OPCODE(LoadStorageU8, U32, U32, U32, )
|
||||
OPCODE(LoadStorageS8, U32, U32, U32, )
|
||||
OPCODE(LoadStorageU16, U32, U32, U32, )
|
||||
OPCODE(LoadStorageS16, U32, U32, U32, )
|
||||
OPCODE(LoadStorage32, U32, U32, U32, )
|
||||
OPCODE(LoadStorage64, U32x2, U32, U32, )
|
||||
OPCODE(LoadStorage128, U32x4, U32, U32, )
|
||||
OPCODE(WriteStorageU8, Void, U32, U32, U32, )
|
||||
OPCODE(WriteStorageS8, Void, U32, U32, U32, )
|
||||
OPCODE(WriteStorageU16, Void, U32, U32, U32, )
|
||||
OPCODE(WriteStorageS16, Void, U32, U32, U32, )
|
||||
OPCODE(WriteStorage32, Void, U32, U32, U32, )
|
||||
OPCODE(WriteStorage64, Void, U32, U32, U32x2, )
|
||||
OPCODE(WriteStorage128, Void, U32, U32, U32x4, )
|
||||
OPCODE(LoadStorageU8, U32, U32, U32, )
|
||||
OPCODE(LoadStorageS8, U32, U32, U32, )
|
||||
OPCODE(LoadStorageU16, U32, U32, U32, )
|
||||
OPCODE(LoadStorageS16, U32, U32, U32, )
|
||||
OPCODE(LoadStorage32, U32, U32, U32, )
|
||||
OPCODE(LoadStorage64, U32x2, U32, U32, )
|
||||
OPCODE(LoadStorage128, U32x4, U32, U32, )
|
||||
OPCODE(WriteStorageU8, Void, U32, U32, U32, )
|
||||
OPCODE(WriteStorageS8, Void, U32, U32, U32, )
|
||||
OPCODE(WriteStorageU16, Void, U32, U32, U32, )
|
||||
OPCODE(WriteStorageS16, Void, U32, U32, U32, )
|
||||
OPCODE(WriteStorage32, Void, U32, U32, U32, )
|
||||
OPCODE(WriteStorage64, Void, U32, U32, U32x2, )
|
||||
OPCODE(WriteStorage128, Void, U32, U32, U32x4, )
|
||||
|
||||
// Vector utility
|
||||
OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )
|
||||
OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, )
|
||||
OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, )
|
||||
OPCODE(CompositeExtractU32x2, U32, U32x2, U32, )
|
||||
OPCODE(CompositeExtractU32x3, U32, U32x3, U32, )
|
||||
OPCODE(CompositeExtractU32x4, U32, U32x4, U32, )
|
||||
OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, )
|
||||
OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, )
|
||||
OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, )
|
||||
OPCODE(CompositeConstructF16x2, F16x2, F16, F16, )
|
||||
OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, )
|
||||
OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, )
|
||||
OPCODE(CompositeExtractF16x2, F16, F16x2, U32, )
|
||||
OPCODE(CompositeExtractF16x3, F16, F16x3, U32, )
|
||||
OPCODE(CompositeExtractF16x4, F16, F16x4, U32, )
|
||||
OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, )
|
||||
OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, )
|
||||
OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, )
|
||||
OPCODE(CompositeConstructF32x2, F32x2, F32, F32, )
|
||||
OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, )
|
||||
OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, )
|
||||
OPCODE(CompositeExtractF32x2, F32, F32x2, U32, )
|
||||
OPCODE(CompositeExtractF32x3, F32, F32x3, U32, )
|
||||
OPCODE(CompositeExtractF32x4, F32, F32x4, U32, )
|
||||
OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, )
|
||||
OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, )
|
||||
OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, )
|
||||
OPCODE(CompositeConstructF64x2, F64x2, F64, F64, )
|
||||
OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, )
|
||||
OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, )
|
||||
OPCODE(CompositeExtractF64x2, F64, F64x2, U32, )
|
||||
OPCODE(CompositeExtractF64x3, F64, F64x3, U32, )
|
||||
OPCODE(CompositeExtractF64x4, F64, F64x4, U32, )
|
||||
OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, )
|
||||
OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, )
|
||||
OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, )
|
||||
OPCODE(CompositeConstructU32x2, U32x2, U32, U32, )
|
||||
OPCODE(CompositeConstructU32x3, U32x3, U32, U32, U32, )
|
||||
OPCODE(CompositeConstructU32x4, U32x4, U32, U32, U32, U32, )
|
||||
OPCODE(CompositeExtractU32x2, U32, U32x2, U32, )
|
||||
OPCODE(CompositeExtractU32x3, U32, U32x3, U32, )
|
||||
OPCODE(CompositeExtractU32x4, U32, U32x4, U32, )
|
||||
OPCODE(CompositeInsertU32x2, U32x2, U32x2, U32, U32, )
|
||||
OPCODE(CompositeInsertU32x3, U32x3, U32x3, U32, U32, )
|
||||
OPCODE(CompositeInsertU32x4, U32x4, U32x4, U32, U32, )
|
||||
OPCODE(CompositeConstructF16x2, F16x2, F16, F16, )
|
||||
OPCODE(CompositeConstructF16x3, F16x3, F16, F16, F16, )
|
||||
OPCODE(CompositeConstructF16x4, F16x4, F16, F16, F16, F16, )
|
||||
OPCODE(CompositeExtractF16x2, F16, F16x2, U32, )
|
||||
OPCODE(CompositeExtractF16x3, F16, F16x3, U32, )
|
||||
OPCODE(CompositeExtractF16x4, F16, F16x4, U32, )
|
||||
OPCODE(CompositeInsertF16x2, F16x2, F16x2, F16, U32, )
|
||||
OPCODE(CompositeInsertF16x3, F16x3, F16x3, F16, U32, )
|
||||
OPCODE(CompositeInsertF16x4, F16x4, F16x4, F16, U32, )
|
||||
OPCODE(CompositeConstructF32x2, F32x2, F32, F32, )
|
||||
OPCODE(CompositeConstructF32x3, F32x3, F32, F32, F32, )
|
||||
OPCODE(CompositeConstructF32x4, F32x4, F32, F32, F32, F32, )
|
||||
OPCODE(CompositeExtractF32x2, F32, F32x2, U32, )
|
||||
OPCODE(CompositeExtractF32x3, F32, F32x3, U32, )
|
||||
OPCODE(CompositeExtractF32x4, F32, F32x4, U32, )
|
||||
OPCODE(CompositeInsertF32x2, F32x2, F32x2, F32, U32, )
|
||||
OPCODE(CompositeInsertF32x3, F32x3, F32x3, F32, U32, )
|
||||
OPCODE(CompositeInsertF32x4, F32x4, F32x4, F32, U32, )
|
||||
OPCODE(CompositeConstructF64x2, F64x2, F64, F64, )
|
||||
OPCODE(CompositeConstructF64x3, F64x3, F64, F64, F64, )
|
||||
OPCODE(CompositeConstructF64x4, F64x4, F64, F64, F64, F64, )
|
||||
OPCODE(CompositeExtractF64x2, F64, F64x2, U32, )
|
||||
OPCODE(CompositeExtractF64x3, F64, F64x3, U32, )
|
||||
OPCODE(CompositeExtractF64x4, F64, F64x4, U32, )
|
||||
OPCODE(CompositeInsertF64x2, F64x2, F64x2, F64, U32, )
|
||||
OPCODE(CompositeInsertF64x3, F64x3, F64x3, F64, U32, )
|
||||
OPCODE(CompositeInsertF64x4, F64x4, F64x4, F64, U32, )
|
||||
|
||||
// Select operations
|
||||
OPCODE(SelectU1, U1, U1, U1, U1, )
|
||||
OPCODE(SelectU8, U8, U1, U8, U8, )
|
||||
OPCODE(SelectU16, U16, U1, U16, U16, )
|
||||
OPCODE(SelectU32, U32, U1, U32, U32, )
|
||||
OPCODE(SelectU64, U64, U1, U64, U64, )
|
||||
OPCODE(SelectF16, F16, U1, F16, F16, )
|
||||
OPCODE(SelectF32, F32, U1, F32, F32, )
|
||||
OPCODE(SelectU1, U1, U1, U1, U1, )
|
||||
OPCODE(SelectU8, U8, U1, U8, U8, )
|
||||
OPCODE(SelectU16, U16, U1, U16, U16, )
|
||||
OPCODE(SelectU32, U32, U1, U32, U32, )
|
||||
OPCODE(SelectU64, U64, U1, U64, U64, )
|
||||
OPCODE(SelectF16, F16, U1, F16, F16, )
|
||||
OPCODE(SelectF32, F32, U1, F32, F32, )
|
||||
|
||||
// Bitwise conversions
|
||||
OPCODE(BitCastU16F16, U16, F16, )
|
||||
OPCODE(BitCastU32F32, U32, F32, )
|
||||
OPCODE(BitCastU64F64, U64, F64, )
|
||||
OPCODE(BitCastF16U16, F16, U16, )
|
||||
OPCODE(BitCastF32U32, F32, U32, )
|
||||
OPCODE(BitCastF64U64, F64, U64, )
|
||||
OPCODE(PackUint2x32, U64, U32x2, )
|
||||
OPCODE(UnpackUint2x32, U32x2, U64, )
|
||||
OPCODE(PackFloat2x16, U32, F16x2, )
|
||||
OPCODE(UnpackFloat2x16, F16x2, U32, )
|
||||
OPCODE(PackHalf2x16, U32, F32x2, )
|
||||
OPCODE(UnpackHalf2x16, F32x2, U32, )
|
||||
OPCODE(PackDouble2x32, F64, U32x2, )
|
||||
OPCODE(UnpackDouble2x32, U32x2, F64, )
|
||||
OPCODE(BitCastU16F16, U16, F16, )
|
||||
OPCODE(BitCastU32F32, U32, F32, )
|
||||
OPCODE(BitCastU64F64, U64, F64, )
|
||||
OPCODE(BitCastF16U16, F16, U16, )
|
||||
OPCODE(BitCastF32U32, F32, U32, )
|
||||
OPCODE(BitCastF64U64, F64, U64, )
|
||||
OPCODE(PackUint2x32, U64, U32x2, )
|
||||
OPCODE(UnpackUint2x32, U32x2, U64, )
|
||||
OPCODE(PackFloat2x16, U32, F16x2, )
|
||||
OPCODE(UnpackFloat2x16, F16x2, U32, )
|
||||
OPCODE(PackHalf2x16, U32, F32x2, )
|
||||
OPCODE(UnpackHalf2x16, F32x2, U32, )
|
||||
OPCODE(PackDouble2x32, F64, U32x2, )
|
||||
OPCODE(UnpackDouble2x32, U32x2, F64, )
|
||||
|
||||
// Pseudo-operation, handled specially at final emit
|
||||
OPCODE(GetZeroFromOp, U1, Opaque, )
|
||||
OPCODE(GetSignFromOp, U1, Opaque, )
|
||||
OPCODE(GetCarryFromOp, U1, Opaque, )
|
||||
OPCODE(GetOverflowFromOp, U1, Opaque, )
|
||||
OPCODE(GetZeroFromOp, U1, Opaque, )
|
||||
OPCODE(GetSignFromOp, U1, Opaque, )
|
||||
OPCODE(GetCarryFromOp, U1, Opaque, )
|
||||
OPCODE(GetOverflowFromOp, U1, Opaque, )
|
||||
OPCODE(GetSparseFromOp, U1, Opaque, )
|
||||
|
||||
// Floating-point operations
|
||||
OPCODE(FPAbs16, F16, F16, )
|
||||
OPCODE(FPAbs32, F32, F32, )
|
||||
OPCODE(FPAbs64, F64, F64, )
|
||||
OPCODE(FPAdd16, F16, F16, F16, )
|
||||
OPCODE(FPAdd32, F32, F32, F32, )
|
||||
OPCODE(FPAdd64, F64, F64, F64, )
|
||||
OPCODE(FPFma16, F16, F16, F16, F16, )
|
||||
OPCODE(FPFma32, F32, F32, F32, F32, )
|
||||
OPCODE(FPFma64, F64, F64, F64, F64, )
|
||||
OPCODE(FPMax32, F32, F32, F32, )
|
||||
OPCODE(FPMax64, F64, F64, F64, )
|
||||
OPCODE(FPMin32, F32, F32, F32, )
|
||||
OPCODE(FPMin64, F64, F64, F64, )
|
||||
OPCODE(FPMul16, F16, F16, F16, )
|
||||
OPCODE(FPMul32, F32, F32, F32, )
|
||||
OPCODE(FPMul64, F64, F64, F64, )
|
||||
OPCODE(FPNeg16, F16, F16, )
|
||||
OPCODE(FPNeg32, F32, F32, )
|
||||
OPCODE(FPNeg64, F64, F64, )
|
||||
OPCODE(FPRecip32, F32, F32, )
|
||||
OPCODE(FPRecip64, F64, F64, )
|
||||
OPCODE(FPRecipSqrt32, F32, F32, )
|
||||
OPCODE(FPRecipSqrt64, F64, F64, )
|
||||
OPCODE(FPSqrt, F32, F32, )
|
||||
OPCODE(FPSin, F32, F32, )
|
||||
OPCODE(FPExp2, F32, F32, )
|
||||
OPCODE(FPCos, F32, F32, )
|
||||
OPCODE(FPLog2, F32, F32, )
|
||||
OPCODE(FPSaturate16, F16, F16, )
|
||||
OPCODE(FPSaturate32, F32, F32, )
|
||||
OPCODE(FPSaturate64, F64, F64, )
|
||||
OPCODE(FPRoundEven16, F16, F16, )
|
||||
OPCODE(FPRoundEven32, F32, F32, )
|
||||
OPCODE(FPRoundEven64, F64, F64, )
|
||||
OPCODE(FPFloor16, F16, F16, )
|
||||
OPCODE(FPFloor32, F32, F32, )
|
||||
OPCODE(FPFloor64, F64, F64, )
|
||||
OPCODE(FPCeil16, F16, F16, )
|
||||
OPCODE(FPCeil32, F32, F32, )
|
||||
OPCODE(FPCeil64, F64, F64, )
|
||||
OPCODE(FPTrunc16, F16, F16, )
|
||||
OPCODE(FPTrunc32, F32, F32, )
|
||||
OPCODE(FPTrunc64, F64, F64, )
|
||||
OPCODE(FPAbs16, F16, F16, )
|
||||
OPCODE(FPAbs32, F32, F32, )
|
||||
OPCODE(FPAbs64, F64, F64, )
|
||||
OPCODE(FPAdd16, F16, F16, F16, )
|
||||
OPCODE(FPAdd32, F32, F32, F32, )
|
||||
OPCODE(FPAdd64, F64, F64, F64, )
|
||||
OPCODE(FPFma16, F16, F16, F16, F16, )
|
||||
OPCODE(FPFma32, F32, F32, F32, F32, )
|
||||
OPCODE(FPFma64, F64, F64, F64, F64, )
|
||||
OPCODE(FPMax32, F32, F32, F32, )
|
||||
OPCODE(FPMax64, F64, F64, F64, )
|
||||
OPCODE(FPMin32, F32, F32, F32, )
|
||||
OPCODE(FPMin64, F64, F64, F64, )
|
||||
OPCODE(FPMul16, F16, F16, F16, )
|
||||
OPCODE(FPMul32, F32, F32, F32, )
|
||||
OPCODE(FPMul64, F64, F64, F64, )
|
||||
OPCODE(FPNeg16, F16, F16, )
|
||||
OPCODE(FPNeg32, F32, F32, )
|
||||
OPCODE(FPNeg64, F64, F64, )
|
||||
OPCODE(FPRecip32, F32, F32, )
|
||||
OPCODE(FPRecip64, F64, F64, )
|
||||
OPCODE(FPRecipSqrt32, F32, F32, )
|
||||
OPCODE(FPRecipSqrt64, F64, F64, )
|
||||
OPCODE(FPSqrt, F32, F32, )
|
||||
OPCODE(FPSin, F32, F32, )
|
||||
OPCODE(FPExp2, F32, F32, )
|
||||
OPCODE(FPCos, F32, F32, )
|
||||
OPCODE(FPLog2, F32, F32, )
|
||||
OPCODE(FPSaturate16, F16, F16, )
|
||||
OPCODE(FPSaturate32, F32, F32, )
|
||||
OPCODE(FPSaturate64, F64, F64, )
|
||||
OPCODE(FPRoundEven16, F16, F16, )
|
||||
OPCODE(FPRoundEven32, F32, F32, )
|
||||
OPCODE(FPRoundEven64, F64, F64, )
|
||||
OPCODE(FPFloor16, F16, F16, )
|
||||
OPCODE(FPFloor32, F32, F32, )
|
||||
OPCODE(FPFloor64, F64, F64, )
|
||||
OPCODE(FPCeil16, F16, F16, )
|
||||
OPCODE(FPCeil32, F32, F32, )
|
||||
OPCODE(FPCeil64, F64, F64, )
|
||||
OPCODE(FPTrunc16, F16, F16, )
|
||||
OPCODE(FPTrunc32, F32, F32, )
|
||||
OPCODE(FPTrunc64, F64, F64, )
|
||||
|
||||
OPCODE(FPOrdEqual16, U1, F16, F16, )
|
||||
OPCODE(FPOrdEqual32, U1, F32, F32, )
|
||||
OPCODE(FPOrdEqual64, U1, F64, F64, )
|
||||
OPCODE(FPUnordEqual16, U1, F16, F16, )
|
||||
OPCODE(FPUnordEqual32, U1, F32, F32, )
|
||||
OPCODE(FPUnordEqual64, U1, F64, F64, )
|
||||
OPCODE(FPOrdNotEqual16, U1, F16, F16, )
|
||||
OPCODE(FPOrdNotEqual32, U1, F32, F32, )
|
||||
OPCODE(FPOrdNotEqual64, U1, F64, F64, )
|
||||
OPCODE(FPUnordNotEqual16, U1, F16, F16, )
|
||||
OPCODE(FPUnordNotEqual32, U1, F32, F32, )
|
||||
OPCODE(FPUnordNotEqual64, U1, F64, F64, )
|
||||
OPCODE(FPOrdLessThan16, U1, F16, F16, )
|
||||
OPCODE(FPOrdLessThan32, U1, F32, F32, )
|
||||
OPCODE(FPOrdLessThan64, U1, F64, F64, )
|
||||
OPCODE(FPUnordLessThan16, U1, F16, F16, )
|
||||
OPCODE(FPUnordLessThan32, U1, F32, F32, )
|
||||
OPCODE(FPUnordLessThan64, U1, F64, F64, )
|
||||
OPCODE(FPOrdGreaterThan16, U1, F16, F16, )
|
||||
OPCODE(FPOrdGreaterThan32, U1, F32, F32, )
|
||||
OPCODE(FPOrdGreaterThan64, U1, F64, F64, )
|
||||
OPCODE(FPUnordGreaterThan16, U1, F16, F16, )
|
||||
OPCODE(FPUnordGreaterThan32, U1, F32, F32, )
|
||||
OPCODE(FPUnordGreaterThan64, U1, F64, F64, )
|
||||
OPCODE(FPOrdLessThanEqual16, U1, F16, F16, )
|
||||
OPCODE(FPOrdLessThanEqual32, U1, F32, F32, )
|
||||
OPCODE(FPOrdLessThanEqual64, U1, F64, F64, )
|
||||
OPCODE(FPUnordLessThanEqual16, U1, F16, F16, )
|
||||
OPCODE(FPUnordLessThanEqual32, U1, F32, F32, )
|
||||
OPCODE(FPUnordLessThanEqual64, U1, F64, F64, )
|
||||
OPCODE(FPOrdGreaterThanEqual16, U1, F16, F16, )
|
||||
OPCODE(FPOrdGreaterThanEqual32, U1, F32, F32, )
|
||||
OPCODE(FPOrdGreaterThanEqual64, U1, F64, F64, )
|
||||
OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, )
|
||||
OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, )
|
||||
OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, )
|
||||
OPCODE(FPOrdEqual16, U1, F16, F16, )
|
||||
OPCODE(FPOrdEqual32, U1, F32, F32, )
|
||||
OPCODE(FPOrdEqual64, U1, F64, F64, )
|
||||
OPCODE(FPUnordEqual16, U1, F16, F16, )
|
||||
OPCODE(FPUnordEqual32, U1, F32, F32, )
|
||||
OPCODE(FPUnordEqual64, U1, F64, F64, )
|
||||
OPCODE(FPOrdNotEqual16, U1, F16, F16, )
|
||||
OPCODE(FPOrdNotEqual32, U1, F32, F32, )
|
||||
OPCODE(FPOrdNotEqual64, U1, F64, F64, )
|
||||
OPCODE(FPUnordNotEqual16, U1, F16, F16, )
|
||||
OPCODE(FPUnordNotEqual32, U1, F32, F32, )
|
||||
OPCODE(FPUnordNotEqual64, U1, F64, F64, )
|
||||
OPCODE(FPOrdLessThan16, U1, F16, F16, )
|
||||
OPCODE(FPOrdLessThan32, U1, F32, F32, )
|
||||
OPCODE(FPOrdLessThan64, U1, F64, F64, )
|
||||
OPCODE(FPUnordLessThan16, U1, F16, F16, )
|
||||
OPCODE(FPUnordLessThan32, U1, F32, F32, )
|
||||
OPCODE(FPUnordLessThan64, U1, F64, F64, )
|
||||
OPCODE(FPOrdGreaterThan16, U1, F16, F16, )
|
||||
OPCODE(FPOrdGreaterThan32, U1, F32, F32, )
|
||||
OPCODE(FPOrdGreaterThan64, U1, F64, F64, )
|
||||
OPCODE(FPUnordGreaterThan16, U1, F16, F16, )
|
||||
OPCODE(FPUnordGreaterThan32, U1, F32, F32, )
|
||||
OPCODE(FPUnordGreaterThan64, U1, F64, F64, )
|
||||
OPCODE(FPOrdLessThanEqual16, U1, F16, F16, )
|
||||
OPCODE(FPOrdLessThanEqual32, U1, F32, F32, )
|
||||
OPCODE(FPOrdLessThanEqual64, U1, F64, F64, )
|
||||
OPCODE(FPUnordLessThanEqual16, U1, F16, F16, )
|
||||
OPCODE(FPUnordLessThanEqual32, U1, F32, F32, )
|
||||
OPCODE(FPUnordLessThanEqual64, U1, F64, F64, )
|
||||
OPCODE(FPOrdGreaterThanEqual16, U1, F16, F16, )
|
||||
OPCODE(FPOrdGreaterThanEqual32, U1, F32, F32, )
|
||||
OPCODE(FPOrdGreaterThanEqual64, U1, F64, F64, )
|
||||
OPCODE(FPUnordGreaterThanEqual16, U1, F16, F16, )
|
||||
OPCODE(FPUnordGreaterThanEqual32, U1, F32, F32, )
|
||||
OPCODE(FPUnordGreaterThanEqual64, U1, F64, F64, )
|
||||
|
||||
// Integer operations
|
||||
OPCODE(IAdd32, U32, U32, U32, )
|
||||
OPCODE(IAdd64, U64, U64, U64, )
|
||||
OPCODE(ISub32, U32, U32, U32, )
|
||||
OPCODE(ISub64, U64, U64, U64, )
|
||||
OPCODE(IMul32, U32, U32, U32, )
|
||||
OPCODE(INeg32, U32, U32, )
|
||||
OPCODE(INeg64, U64, U64, )
|
||||
OPCODE(IAbs32, U32, U32, )
|
||||
OPCODE(ShiftLeftLogical32, U32, U32, U32, )
|
||||
OPCODE(ShiftLeftLogical64, U64, U64, U32, )
|
||||
OPCODE(ShiftRightLogical32, U32, U32, U32, )
|
||||
OPCODE(ShiftRightLogical64, U64, U64, U32, )
|
||||
OPCODE(ShiftRightArithmetic32, U32, U32, U32, )
|
||||
OPCODE(ShiftRightArithmetic64, U64, U64, U32, )
|
||||
OPCODE(BitwiseAnd32, U32, U32, U32, )
|
||||
OPCODE(BitwiseOr32, U32, U32, U32, )
|
||||
OPCODE(BitwiseXor32, U32, U32, U32, )
|
||||
OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, )
|
||||
OPCODE(BitFieldSExtract, U32, U32, U32, U32, )
|
||||
OPCODE(BitFieldUExtract, U32, U32, U32, U32, )
|
||||
OPCODE(BitReverse32, U32, U32, )
|
||||
OPCODE(BitCount32, U32, U32, )
|
||||
OPCODE(BitwiseNot32, U32, U32, )
|
||||
OPCODE(IAdd32, U32, U32, U32, )
|
||||
OPCODE(IAdd64, U64, U64, U64, )
|
||||
OPCODE(ISub32, U32, U32, U32, )
|
||||
OPCODE(ISub64, U64, U64, U64, )
|
||||
OPCODE(IMul32, U32, U32, U32, )
|
||||
OPCODE(INeg32, U32, U32, )
|
||||
OPCODE(INeg64, U64, U64, )
|
||||
OPCODE(IAbs32, U32, U32, )
|
||||
OPCODE(ShiftLeftLogical32, U32, U32, U32, )
|
||||
OPCODE(ShiftLeftLogical64, U64, U64, U32, )
|
||||
OPCODE(ShiftRightLogical32, U32, U32, U32, )
|
||||
OPCODE(ShiftRightLogical64, U64, U64, U32, )
|
||||
OPCODE(ShiftRightArithmetic32, U32, U32, U32, )
|
||||
OPCODE(ShiftRightArithmetic64, U64, U64, U32, )
|
||||
OPCODE(BitwiseAnd32, U32, U32, U32, )
|
||||
OPCODE(BitwiseOr32, U32, U32, U32, )
|
||||
OPCODE(BitwiseXor32, U32, U32, U32, )
|
||||
OPCODE(BitFieldInsert, U32, U32, U32, U32, U32, )
|
||||
OPCODE(BitFieldSExtract, U32, U32, U32, U32, )
|
||||
OPCODE(BitFieldUExtract, U32, U32, U32, U32, )
|
||||
OPCODE(BitReverse32, U32, U32, )
|
||||
OPCODE(BitCount32, U32, U32, )
|
||||
OPCODE(BitwiseNot32, U32, U32, )
|
||||
|
||||
OPCODE(FindSMsb32, U32, U32, )
|
||||
OPCODE(FindUMsb32, U32, U32, )
|
||||
OPCODE(SMin32, U32, U32, U32, )
|
||||
OPCODE(UMin32, U32, U32, U32, )
|
||||
OPCODE(SMax32, U32, U32, U32, )
|
||||
OPCODE(UMax32, U32, U32, U32, )
|
||||
OPCODE(SLessThan, U1, U32, U32, )
|
||||
OPCODE(ULessThan, U1, U32, U32, )
|
||||
OPCODE(IEqual, U1, U32, U32, )
|
||||
OPCODE(SLessThanEqual, U1, U32, U32, )
|
||||
OPCODE(ULessThanEqual, U1, U32, U32, )
|
||||
OPCODE(SGreaterThan, U1, U32, U32, )
|
||||
OPCODE(UGreaterThan, U1, U32, U32, )
|
||||
OPCODE(INotEqual, U1, U32, U32, )
|
||||
OPCODE(SGreaterThanEqual, U1, U32, U32, )
|
||||
OPCODE(UGreaterThanEqual, U1, U32, U32, )
|
||||
OPCODE(FindSMsb32, U32, U32, )
|
||||
OPCODE(FindUMsb32, U32, U32, )
|
||||
OPCODE(SMin32, U32, U32, U32, )
|
||||
OPCODE(UMin32, U32, U32, U32, )
|
||||
OPCODE(SMax32, U32, U32, U32, )
|
||||
OPCODE(UMax32, U32, U32, U32, )
|
||||
OPCODE(SLessThan, U1, U32, U32, )
|
||||
OPCODE(ULessThan, U1, U32, U32, )
|
||||
OPCODE(IEqual, U1, U32, U32, )
|
||||
OPCODE(SLessThanEqual, U1, U32, U32, )
|
||||
OPCODE(ULessThanEqual, U1, U32, U32, )
|
||||
OPCODE(SGreaterThan, U1, U32, U32, )
|
||||
OPCODE(UGreaterThan, U1, U32, U32, )
|
||||
OPCODE(INotEqual, U1, U32, U32, )
|
||||
OPCODE(SGreaterThanEqual, U1, U32, U32, )
|
||||
OPCODE(UGreaterThanEqual, U1, U32, U32, )
|
||||
|
||||
// Logical operations
|
||||
OPCODE(LogicalOr, U1, U1, U1, )
|
||||
OPCODE(LogicalAnd, U1, U1, U1, )
|
||||
OPCODE(LogicalXor, U1, U1, U1, )
|
||||
OPCODE(LogicalNot, U1, U1, )
|
||||
OPCODE(LogicalOr, U1, U1, U1, )
|
||||
OPCODE(LogicalAnd, U1, U1, U1, )
|
||||
OPCODE(LogicalXor, U1, U1, U1, )
|
||||
OPCODE(LogicalNot, U1, U1, )
|
||||
|
||||
// Conversion operations
|
||||
OPCODE(ConvertS16F16, U32, F16, )
|
||||
OPCODE(ConvertS16F32, U32, F32, )
|
||||
OPCODE(ConvertS16F64, U32, F64, )
|
||||
OPCODE(ConvertS32F16, U32, F16, )
|
||||
OPCODE(ConvertS32F32, U32, F32, )
|
||||
OPCODE(ConvertS32F64, U32, F64, )
|
||||
OPCODE(ConvertS64F16, U64, F16, )
|
||||
OPCODE(ConvertS64F32, U64, F32, )
|
||||
OPCODE(ConvertS64F64, U64, F64, )
|
||||
OPCODE(ConvertU16F16, U32, F16, )
|
||||
OPCODE(ConvertU16F32, U32, F32, )
|
||||
OPCODE(ConvertU16F64, U32, F64, )
|
||||
OPCODE(ConvertU32F16, U32, F16, )
|
||||
OPCODE(ConvertU32F32, U32, F32, )
|
||||
OPCODE(ConvertU32F64, U32, F64, )
|
||||
OPCODE(ConvertU64F16, U64, F16, )
|
||||
OPCODE(ConvertU64F32, U64, F32, )
|
||||
OPCODE(ConvertU64F64, U64, F64, )
|
||||
OPCODE(ConvertU64U32, U64, U32, )
|
||||
OPCODE(ConvertU32U64, U32, U64, )
|
||||
OPCODE(ConvertF16F32, F16, F32, )
|
||||
OPCODE(ConvertF32F16, F32, F16, )
|
||||
OPCODE(ConvertF32F64, F32, F64, )
|
||||
OPCODE(ConvertF64F32, F64, F32, )
|
||||
OPCODE(ConvertS16F16, U32, F16, )
|
||||
OPCODE(ConvertS16F32, U32, F32, )
|
||||
OPCODE(ConvertS16F64, U32, F64, )
|
||||
OPCODE(ConvertS32F16, U32, F16, )
|
||||
OPCODE(ConvertS32F32, U32, F32, )
|
||||
OPCODE(ConvertS32F64, U32, F64, )
|
||||
OPCODE(ConvertS64F16, U64, F16, )
|
||||
OPCODE(ConvertS64F32, U64, F32, )
|
||||
OPCODE(ConvertS64F64, U64, F64, )
|
||||
OPCODE(ConvertU16F16, U32, F16, )
|
||||
OPCODE(ConvertU16F32, U32, F32, )
|
||||
OPCODE(ConvertU16F64, U32, F64, )
|
||||
OPCODE(ConvertU32F16, U32, F16, )
|
||||
OPCODE(ConvertU32F32, U32, F32, )
|
||||
OPCODE(ConvertU32F64, U32, F64, )
|
||||
OPCODE(ConvertU64F16, U64, F16, )
|
||||
OPCODE(ConvertU64F32, U64, F32, )
|
||||
OPCODE(ConvertU64F64, U64, F64, )
|
||||
OPCODE(ConvertU64U32, U64, U32, )
|
||||
OPCODE(ConvertU32U64, U32, U64, )
|
||||
OPCODE(ConvertF16F32, F16, F32, )
|
||||
OPCODE(ConvertF32F16, F32, F16, )
|
||||
OPCODE(ConvertF32F64, F32, F64, )
|
||||
OPCODE(ConvertF64F32, F64, F32, )
|
||||
OPCODE(ConvertF16S32, F16, U32, )
|
||||
OPCODE(ConvertF16S64, F16, U64, )
|
||||
OPCODE(ConvertF16U32, F16, U32, )
|
||||
OPCODE(ConvertF16U64, F16, U64, )
|
||||
OPCODE(ConvertF32S32, F32, U32, )
|
||||
OPCODE(ConvertF32S64, F32, U64, )
|
||||
OPCODE(ConvertF32U32, F32, U32, )
|
||||
OPCODE(ConvertF32U64, F32, U64, )
|
||||
OPCODE(ConvertF64S32, F64, U32, )
|
||||
OPCODE(ConvertF64S64, F64, U64, )
|
||||
OPCODE(ConvertF64U32, F64, U32, )
|
||||
OPCODE(ConvertF64U64, F64, U64, )
|
||||
|
||||
// Image operations
|
||||
OPCODE(BindlessImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
|
||||
OPCODE(BindlessImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
|
||||
OPCODE(BindlessImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
|
||||
OPCODE(BindlessImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
|
||||
|
||||
OPCODE(BoundImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
|
||||
OPCODE(BoundImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
|
||||
OPCODE(BoundImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
|
||||
OPCODE(BoundImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
|
||||
|
||||
OPCODE(ImageSampleImplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
|
||||
OPCODE(ImageSampleExplicitLod, F32x4, U32, Opaque, Opaque, Opaque, )
|
||||
OPCODE(ImageSampleDrefImplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
|
||||
OPCODE(ImageSampleDrefExplicitLod, F32, U32, Opaque, F32, Opaque, Opaque, )
|
||||
|
@ -293,6 +293,17 @@ constexpr size_t NUM_REGS = 256;
|
||||
return reg + (-num);
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr Reg operator++(Reg& reg) {
|
||||
reg = reg + 1;
|
||||
return reg;
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr Reg operator++(Reg& reg, int) {
|
||||
const Reg copy{reg};
|
||||
reg = reg + 1;
|
||||
return copy;
|
||||
}
|
||||
|
||||
[[nodiscard]] constexpr size_t RegIndex(Reg reg) noexcept {
|
||||
return static_cast<size_t>(reg);
|
||||
}
|
||||
|
@ -75,6 +75,7 @@ private:
|
||||
f64 imm_f64;
|
||||
};
|
||||
};
|
||||
static_assert(std::is_trivially_copyable_v<Value>);
|
||||
|
||||
template <IR::Type type_>
|
||||
class TypedValue : public Value {
|
||||
|
@ -249,8 +249,8 @@ INST(SULD, "SULD", "1110 1011 000- ----")
|
||||
INST(SURED, "SURED", "1110 1011 010- ----")
|
||||
INST(SUST, "SUST", "1110 1011 001- ----")
|
||||
INST(SYNC, "SYNC", "1111 0000 1111 1---")
|
||||
INST(TEX, "TEX", "1100 00-- --11 1---")
|
||||
INST(TEX_b, "TEX (b)", "1101 1110 1011 1---")
|
||||
INST(TEX, "TEX", "1100 0--- ---- ----")
|
||||
INST(TEX_b, "TEX (b)", "1101 1110 10-- ----")
|
||||
INST(TEXS, "TEXS", "1101 -00- ---- ----")
|
||||
INST(TLD, "TLD", "1101 1100 --11 1---")
|
||||
INST(TLD_b, "TLD (b)", "1101 1101 --11 1---")
|
||||
|
@ -62,6 +62,7 @@ IR::Program TranslateProgram(ObjectPool<IR::Inst>& inst_pool, ObjectPool<IR::Blo
|
||||
Optimization::SsaRewritePass(function.post_order_blocks);
|
||||
}
|
||||
Optimization::GlobalMemoryToStorageBufferPass(program);
|
||||
Optimization::TexturePass(env, program);
|
||||
for (IR::Function& function : functions) {
|
||||
Optimization::PostOrderInvoke(Optimization::ConstantPropagationPass, function);
|
||||
Optimization::PostOrderInvoke(Optimization::DeadCodeEliminationPass, function);
|
||||
|
@ -585,14 +585,6 @@ void TranslatorVisitor::SYNC(u64) {
|
||||
ThrowNotImplemented(Opcode::SYNC);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::TEX(u64) {
|
||||
ThrowNotImplemented(Opcode::TEX);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::TEX_b(u64) {
|
||||
ThrowNotImplemented(Opcode::TEX_b);
|
||||
}
|
||||
|
||||
void TranslatorVisitor::TEXS(u64) {
|
||||
ThrowNotImplemented(Opcode::TEXS);
|
||||
}
|
||||
|
@ -0,0 +1,232 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <optional>
|
||||
|
||||
#include "common/bit_field.h"
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/frontend/ir/modifiers.h"
|
||||
#include "shader_recompiler/frontend/maxwell/translate/impl/impl.h"
|
||||
|
||||
namespace Shader::Maxwell {
|
||||
namespace {
|
||||
enum class Blod : u64 {
|
||||
None,
|
||||
LZ,
|
||||
LB,
|
||||
LL,
|
||||
INVALIDBLOD4,
|
||||
INVALIDBLOD5,
|
||||
LBA,
|
||||
LLA,
|
||||
};
|
||||
|
||||
enum class TextureType : u64 {
|
||||
_1D,
|
||||
ARRAY_1D,
|
||||
_2D,
|
||||
ARRAY_2D,
|
||||
_3D,
|
||||
ARRAY_3D,
|
||||
CUBE,
|
||||
ARRAY_CUBE,
|
||||
};
|
||||
|
||||
Shader::TextureType GetType(TextureType type, bool dc) {
|
||||
switch (type) {
|
||||
case TextureType::_1D:
|
||||
return dc ? Shader::TextureType::Shadow1D : Shader::TextureType::Color1D;
|
||||
case TextureType::ARRAY_1D:
|
||||
return dc ? Shader::TextureType::ShadowArray1D : Shader::TextureType::ColorArray1D;
|
||||
case TextureType::_2D:
|
||||
return dc ? Shader::TextureType::Shadow2D : Shader::TextureType::Color2D;
|
||||
case TextureType::ARRAY_2D:
|
||||
return dc ? Shader::TextureType::ShadowArray2D : Shader::TextureType::ColorArray2D;
|
||||
case TextureType::_3D:
|
||||
return dc ? Shader::TextureType::Shadow3D : Shader::TextureType::Color3D;
|
||||
case TextureType::ARRAY_3D:
|
||||
throw NotImplementedException("3D array texture type");
|
||||
case TextureType::CUBE:
|
||||
return dc ? Shader::TextureType::ShadowCube : Shader::TextureType::ColorCube;
|
||||
case TextureType::ARRAY_CUBE:
|
||||
return dc ? Shader::TextureType::ShadowArrayCube : Shader::TextureType::ColorArrayCube;
|
||||
}
|
||||
throw NotImplementedException("Invalid texture type {}", type);
|
||||
}
|
||||
|
||||
IR::Value MakeCoords(TranslatorVisitor& v, IR::Reg reg, TextureType type) {
|
||||
const auto read_array{[&]() -> IR::F32 { return v.ir.ConvertUToF(32, v.X(reg)); }};
|
||||
switch (type) {
|
||||
case TextureType::_1D:
|
||||
return v.F(reg);
|
||||
case TextureType::ARRAY_1D:
|
||||
return v.ir.CompositeConstruct(read_array(), v.F(reg + 1));
|
||||
case TextureType::_2D:
|
||||
return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1));
|
||||
case TextureType::ARRAY_2D:
|
||||
return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2));
|
||||
case TextureType::_3D:
|
||||
return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
|
||||
case TextureType::ARRAY_3D:
|
||||
throw NotImplementedException("3D array texture type");
|
||||
case TextureType::CUBE:
|
||||
return v.ir.CompositeConstruct(v.F(reg), v.F(reg + 1), v.F(reg + 2));
|
||||
case TextureType::ARRAY_CUBE:
|
||||
return v.ir.CompositeConstruct(read_array(), v.F(reg + 1), v.F(reg + 2), v.F(reg + 3));
|
||||
}
|
||||
throw NotImplementedException("Invalid texture type {}", type);
|
||||
}
|
||||
|
||||
IR::F32 MakeLod(TranslatorVisitor& v, IR::Reg& reg, Blod blod) {
|
||||
switch (blod) {
|
||||
case Blod::None:
|
||||
return v.ir.Imm32(0.0f);
|
||||
case Blod::LZ:
|
||||
return v.ir.Imm32(0.0f);
|
||||
case Blod::LB:
|
||||
case Blod::LL:
|
||||
case Blod::LBA:
|
||||
case Blod::LLA:
|
||||
return v.F(reg++);
|
||||
case Blod::INVALIDBLOD4:
|
||||
case Blod::INVALIDBLOD5:
|
||||
break;
|
||||
}
|
||||
throw NotImplementedException("Invalid blod {}", blod);
|
||||
}
|
||||
|
||||
IR::Value MakeOffset(TranslatorVisitor& v, IR::Reg& reg, TextureType type) {
|
||||
const IR::U32 value{v.X(reg++)};
|
||||
switch (type) {
|
||||
case TextureType::_1D:
|
||||
case TextureType::ARRAY_1D:
|
||||
return v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4));
|
||||
case TextureType::_2D:
|
||||
case TextureType::ARRAY_2D:
|
||||
return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)),
|
||||
v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4)));
|
||||
case TextureType::_3D:
|
||||
case TextureType::ARRAY_3D:
|
||||
return v.ir.CompositeConstruct(v.ir.BitFieldExtract(value, v.ir.Imm32(0), v.ir.Imm32(4)),
|
||||
v.ir.BitFieldExtract(value, v.ir.Imm32(4), v.ir.Imm32(4)),
|
||||
v.ir.BitFieldExtract(value, v.ir.Imm32(8), v.ir.Imm32(4)));
|
||||
case TextureType::CUBE:
|
||||
case TextureType::ARRAY_CUBE:
|
||||
throw NotImplementedException("Illegal offset on CUBE sample");
|
||||
}
|
||||
throw NotImplementedException("Invalid texture type {}", type);
|
||||
}
|
||||
|
||||
bool HasExplicitLod(Blod blod) {
|
||||
switch (blod) {
|
||||
case Blod::LL:
|
||||
case Blod::LLA:
|
||||
case Blod::LZ:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void Impl(TranslatorVisitor& v, u64 insn, bool aoffi, Blod blod, bool lc,
|
||||
std::optional<u32> cbuf_offset) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<35, 1, u64> ndv;
|
||||
BitField<49, 1, u64> nodep;
|
||||
BitField<50, 1, u64> dc;
|
||||
BitField<51, 3, IR::Pred> sparse_pred;
|
||||
BitField<0, 8, IR::Reg> dest_reg;
|
||||
BitField<8, 8, IR::Reg> coord_reg;
|
||||
BitField<20, 8, IR::Reg> meta_reg;
|
||||
BitField<28, 3, TextureType> type;
|
||||
BitField<31, 4, u64> mask;
|
||||
} const tex{insn};
|
||||
|
||||
if (lc) {
|
||||
throw NotImplementedException("LC");
|
||||
}
|
||||
const IR::Value coords{MakeCoords(v, tex.coord_reg, tex.type)};
|
||||
|
||||
IR::Reg meta_reg{tex.meta_reg};
|
||||
IR::Value handle;
|
||||
IR::Value offset;
|
||||
IR::F32 dref;
|
||||
IR::F32 lod_clamp;
|
||||
if (cbuf_offset) {
|
||||
handle = v.ir.Imm32(*cbuf_offset);
|
||||
} else {
|
||||
handle = v.X(meta_reg++);
|
||||
}
|
||||
const IR::F32 lod{MakeLod(v, meta_reg, blod)};
|
||||
if (aoffi) {
|
||||
offset = MakeOffset(v, meta_reg, tex.type);
|
||||
}
|
||||
if (tex.dc != 0) {
|
||||
dref = v.F(meta_reg++);
|
||||
}
|
||||
IR::TextureInstInfo info{};
|
||||
info.type.Assign(GetType(tex.type, tex.dc != 0));
|
||||
info.has_bias.Assign(blod == Blod::LB || blod == Blod::LBA ? 1 : 0);
|
||||
info.has_lod_clamp.Assign(lc ? 1 : 0);
|
||||
|
||||
const IR::Value sample{[&]() -> IR::Value {
|
||||
if (tex.dc == 0) {
|
||||
if (HasExplicitLod(blod)) {
|
||||
return v.ir.ImageSampleExplicitLod(handle, coords, lod, offset, lod_clamp, info);
|
||||
} else {
|
||||
return v.ir.ImageSampleImplicitLod(handle, coords, lod, offset, lod_clamp, info);
|
||||
}
|
||||
}
|
||||
if (HasExplicitLod(blod)) {
|
||||
return v.ir.ImageSampleDrefExplicitLod(handle, coords, dref, lod, offset, lod_clamp,
|
||||
info);
|
||||
} else {
|
||||
return v.ir.ImageSampleDrefImplicitLod(handle, coords, dref, lod, offset, lod_clamp,
|
||||
info);
|
||||
}
|
||||
}()};
|
||||
|
||||
for (int element = 0; element < 4; ++element) {
|
||||
if (((tex.mask >> element) & 1) == 0) {
|
||||
continue;
|
||||
}
|
||||
IR::F32 value;
|
||||
if (tex.dc != 0) {
|
||||
value = element < 3 ? IR::F32{sample} : v.ir.Imm32(1.0f);
|
||||
} else {
|
||||
value = IR::F32{v.ir.CompositeExtract(sample, element)};
|
||||
}
|
||||
v.F(tex.dest_reg + element, value);
|
||||
}
|
||||
if (tex.sparse_pred != IR::Pred::PT) {
|
||||
v.ir.SetPred(tex.sparse_pred, v.ir.LogicalNot(v.ir.GetSparseFromOp(sample)));
|
||||
}
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
void TranslatorVisitor::TEX(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<54, 1, u64> aoffi;
|
||||
BitField<55, 3, Blod> blod;
|
||||
BitField<58, 1, u64> lc;
|
||||
BitField<36, 13, u64> cbuf_offset;
|
||||
} const tex{insn};
|
||||
|
||||
Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, static_cast<u32>(tex.cbuf_offset));
|
||||
}
|
||||
|
||||
void TranslatorVisitor::TEX_b(u64 insn) {
|
||||
union {
|
||||
u64 raw;
|
||||
BitField<36, 1, u64> aoffi;
|
||||
BitField<37, 3, Blod> blod;
|
||||
BitField<40, 1, u64> lc;
|
||||
} const tex{insn};
|
||||
|
||||
Impl(*this, insn, tex.aoffi != 0, tex.blod, tex.lc != 0, std::nullopt);
|
||||
}
|
||||
|
||||
} // namespace Shader::Maxwell
|
@ -82,6 +82,25 @@ void VisitUsages(Info& info, IR::Inst& inst) {
|
||||
throw NotImplementedException("Constant buffer with non-immediate index");
|
||||
}
|
||||
break;
|
||||
case IR::Opcode::BindlessImageSampleImplicitLod:
|
||||
case IR::Opcode::BindlessImageSampleExplicitLod:
|
||||
case IR::Opcode::BindlessImageSampleDrefImplicitLod:
|
||||
case IR::Opcode::BindlessImageSampleDrefExplicitLod:
|
||||
case IR::Opcode::BoundImageSampleImplicitLod:
|
||||
case IR::Opcode::BoundImageSampleExplicitLod:
|
||||
case IR::Opcode::BoundImageSampleDrefImplicitLod:
|
||||
case IR::Opcode::BoundImageSampleDrefExplicitLod:
|
||||
case IR::Opcode::ImageSampleImplicitLod:
|
||||
case IR::Opcode::ImageSampleExplicitLod:
|
||||
case IR::Opcode::ImageSampleDrefImplicitLod:
|
||||
case IR::Opcode::ImageSampleDrefExplicitLod: {
|
||||
const TextureType type{inst.Flags<IR::TextureInstInfo>().type};
|
||||
info.uses_sampled_1d |= type == TextureType::Color1D || type == TextureType::ColorArray1D ||
|
||||
type == TextureType::Shadow1D || type == TextureType::ShadowArray1D;
|
||||
info.uses_sparse_residency |=
|
||||
inst.GetAssociatedPseudoOperation(IR::Opcode::GetSparseFromOp) != nullptr;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -226,6 +226,7 @@ std::optional<StorageBufferAddr> Track(IR::Block* block, const IR::Value& value,
|
||||
}
|
||||
// Reversed loops are more likely to find the right result
|
||||
for (size_t arg = inst->NumArgs(); arg--;) {
|
||||
IR::Block* inst_block{block};
|
||||
if (inst->Opcode() == IR::Opcode::Phi) {
|
||||
// If we are going through a phi node, mark the current block as visited
|
||||
visited.insert(block);
|
||||
@ -235,15 +236,11 @@ std::optional<StorageBufferAddr> Track(IR::Block* block, const IR::Value& value,
|
||||
// Already visited, skip
|
||||
continue;
|
||||
}
|
||||
const std::optional storage_buffer{Track(phi_block, inst->Arg(arg), bias, visited)};
|
||||
if (storage_buffer) {
|
||||
return *storage_buffer;
|
||||
}
|
||||
} else {
|
||||
const std::optional storage_buffer{Track(block, inst->Arg(arg), bias, visited)};
|
||||
if (storage_buffer) {
|
||||
return *storage_buffer;
|
||||
}
|
||||
inst_block = phi_block;
|
||||
}
|
||||
const std::optional storage_buffer{Track(inst_block, inst->Arg(arg), bias, visited)};
|
||||
if (storage_buffer) {
|
||||
return *storage_buffer;
|
||||
}
|
||||
}
|
||||
return std::nullopt;
|
||||
|
@ -6,6 +6,7 @@
|
||||
|
||||
#include <span>
|
||||
|
||||
#include "shader_recompiler/environment.h"
|
||||
#include "shader_recompiler/frontend/ir/basic_block.h"
|
||||
#include "shader_recompiler/frontend/ir/function.h"
|
||||
#include "shader_recompiler/frontend/ir/program.h"
|
||||
@ -26,6 +27,7 @@ void GlobalMemoryToStorageBufferPass(IR::Program& program);
|
||||
void IdentityRemovalPass(IR::Function& function);
|
||||
void LowerFp16ToFp32(IR::Program& program);
|
||||
void SsaRewritePass(std::span<IR::Block* const> post_order_blocks);
|
||||
void TexturePass(Environment& env, IR::Program& program);
|
||||
void VerificationPass(const IR::Function& function);
|
||||
|
||||
} // namespace Shader::Optimization
|
||||
|
199
src/shader_recompiler/ir_opt/texture_pass.cpp
Normal file
199
src/shader_recompiler/ir_opt/texture_pass.cpp
Normal file
@ -0,0 +1,199 @@
|
||||
// Copyright 2021 yuzu Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#include <optional>
|
||||
|
||||
#include <boost/container/flat_set.hpp>
|
||||
#include <boost/container/small_vector.hpp>
|
||||
|
||||
#include "shader_recompiler/environment.h"
|
||||
#include "shader_recompiler/frontend/ir/basic_block.h"
|
||||
#include "shader_recompiler/frontend/ir/ir_emitter.h"
|
||||
#include "shader_recompiler/ir_opt/passes.h"
|
||||
#include "shader_recompiler/shader_info.h"
|
||||
|
||||
namespace Shader::Optimization {
|
||||
namespace {
|
||||
struct ConstBufferAddr {
|
||||
u32 index;
|
||||
u32 offset;
|
||||
};
|
||||
|
||||
struct TextureInst {
|
||||
ConstBufferAddr cbuf;
|
||||
IR::Inst* inst;
|
||||
IR::Block* block;
|
||||
};
|
||||
|
||||
using TextureInstVector = boost::container::small_vector<TextureInst, 24>;
|
||||
|
||||
using VisitedBlocks = boost::container::flat_set<IR::Block*, std::less<IR::Block*>,
|
||||
boost::container::small_vector<IR::Block*, 2>>;
|
||||
|
||||
IR::Opcode IndexedInstruction(const IR::Inst& inst) {
|
||||
switch (inst.Opcode()) {
|
||||
case IR::Opcode::BindlessImageSampleImplicitLod:
|
||||
case IR::Opcode::BoundImageSampleImplicitLod:
|
||||
return IR::Opcode::ImageSampleImplicitLod;
|
||||
case IR::Opcode::BoundImageSampleExplicitLod:
|
||||
case IR::Opcode::BindlessImageSampleExplicitLod:
|
||||
return IR::Opcode::ImageSampleExplicitLod;
|
||||
case IR::Opcode::BoundImageSampleDrefImplicitLod:
|
||||
case IR::Opcode::BindlessImageSampleDrefImplicitLod:
|
||||
return IR::Opcode::ImageSampleDrefImplicitLod;
|
||||
case IR::Opcode::BoundImageSampleDrefExplicitLod:
|
||||
case IR::Opcode::BindlessImageSampleDrefExplicitLod:
|
||||
return IR::Opcode::ImageSampleDrefExplicitLod;
|
||||
default:
|
||||
return IR::Opcode::Void;
|
||||
}
|
||||
}
|
||||
|
||||
bool IsBindless(const IR::Inst& inst) {
|
||||
switch (inst.Opcode()) {
|
||||
case IR::Opcode::BindlessImageSampleImplicitLod:
|
||||
case IR::Opcode::BindlessImageSampleExplicitLod:
|
||||
case IR::Opcode::BindlessImageSampleDrefImplicitLod:
|
||||
case IR::Opcode::BindlessImageSampleDrefExplicitLod:
|
||||
return true;
|
||||
case IR::Opcode::BoundImageSampleImplicitLod:
|
||||
case IR::Opcode::BoundImageSampleExplicitLod:
|
||||
case IR::Opcode::BoundImageSampleDrefImplicitLod:
|
||||
case IR::Opcode::BoundImageSampleDrefExplicitLod:
|
||||
return false;
|
||||
default:
|
||||
throw InvalidArgument("Invalid opcode {}", inst.Opcode());
|
||||
}
|
||||
}
|
||||
|
||||
bool IsTextureInstruction(const IR::Inst& inst) {
|
||||
return IndexedInstruction(inst) != IR::Opcode::Void;
|
||||
}
|
||||
|
||||
std::optional<ConstBufferAddr> Track(IR::Block* block, const IR::Value& value,
|
||||
VisitedBlocks& visited) {
|
||||
if (value.IsImmediate()) {
|
||||
// Immediates can't be a storage buffer
|
||||
return std::nullopt;
|
||||
}
|
||||
const IR::Inst* const inst{value.InstRecursive()};
|
||||
if (inst->Opcode() == IR::Opcode::GetCbuf) {
|
||||
const IR::Value index{inst->Arg(0)};
|
||||
const IR::Value offset{inst->Arg(1)};
|
||||
if (!index.IsImmediate()) {
|
||||
// Reading a bindless texture from variable indices is valid
|
||||
// but not supported here at the moment
|
||||
return std::nullopt;
|
||||
}
|
||||
if (!offset.IsImmediate()) {
|
||||
// TODO: Support arrays of textures
|
||||
return std::nullopt;
|
||||
}
|
||||
return ConstBufferAddr{
|
||||
.index{index.U32()},
|
||||
.offset{offset.U32()},
|
||||
};
|
||||
}
|
||||
// Reversed loops are more likely to find the right result
|
||||
for (size_t arg = inst->NumArgs(); arg--;) {
|
||||
IR::Block* inst_block{block};
|
||||
if (inst->Opcode() == IR::Opcode::Phi) {
|
||||
// If we are going through a phi node, mark the current block as visited
|
||||
visited.insert(block);
|
||||
// and skip already visited blocks to avoid looping forever
|
||||
IR::Block* const phi_block{inst->PhiBlock(arg)};
|
||||
if (visited.contains(phi_block)) {
|
||||
// Already visited, skip
|
||||
continue;
|
||||
}
|
||||
inst_block = phi_block;
|
||||
}
|
||||
const std::optional storage_buffer{Track(inst_block, inst->Arg(arg), visited)};
|
||||
if (storage_buffer) {
|
||||
return *storage_buffer;
|
||||
}
|
||||
}
|
||||
return std::nullopt;
|
||||
}
|
||||
|
||||
TextureInst MakeInst(Environment& env, IR::Block* block, IR::Inst& inst) {
|
||||
ConstBufferAddr addr;
|
||||
if (IsBindless(inst)) {
|
||||
VisitedBlocks visited;
|
||||
const std::optional<ConstBufferAddr> track_addr{Track(block, IR::Value{&inst}, visited)};
|
||||
if (!track_addr) {
|
||||
throw NotImplementedException("Failed to track bindless texture constant buffer");
|
||||
}
|
||||
addr = *track_addr;
|
||||
} else {
|
||||
addr = ConstBufferAddr{
|
||||
.index{env.TextureBoundBuffer()},
|
||||
.offset{inst.Arg(0).U32()},
|
||||
};
|
||||
}
|
||||
return TextureInst{
|
||||
.cbuf{addr},
|
||||
.inst{&inst},
|
||||
.block{block},
|
||||
};
|
||||
}
|
||||
|
||||
class Descriptors {
|
||||
public:
|
||||
explicit Descriptors(TextureDescriptors& descriptors_) : descriptors{descriptors_} {}
|
||||
|
||||
u32 Add(const TextureDescriptor& descriptor) {
|
||||
// TODO: Handle arrays
|
||||
auto it{std::ranges::find_if(descriptors, [&descriptor](const TextureDescriptor& existing) {
|
||||
return descriptor.cbuf_index == existing.cbuf_index &&
|
||||
descriptor.cbuf_offset == existing.cbuf_offset &&
|
||||
descriptor.type == existing.type;
|
||||
})};
|
||||
if (it != descriptors.end()) {
|
||||
return static_cast<u32>(std::distance(descriptors.begin(), it));
|
||||
}
|
||||
descriptors.push_back(descriptor);
|
||||
return static_cast<u32>(descriptors.size()) - 1;
|
||||
}
|
||||
|
||||
private:
|
||||
TextureDescriptors& descriptors;
|
||||
};
|
||||
} // Anonymous namespace
|
||||
|
||||
void TexturePass(Environment& env, IR::Program& program) {
|
||||
TextureInstVector to_replace;
|
||||
for (IR::Function& function : program.functions) {
|
||||
for (IR::Block* const block : function.post_order_blocks) {
|
||||
for (IR::Inst& inst : block->Instructions()) {
|
||||
if (!IsTextureInstruction(inst)) {
|
||||
continue;
|
||||
}
|
||||
to_replace.push_back(MakeInst(env, block, inst));
|
||||
}
|
||||
}
|
||||
}
|
||||
// Sort instructions to visit textures by constant buffer index, then by offset
|
||||
std::ranges::sort(to_replace, [](const auto& lhs, const auto& rhs) {
|
||||
return lhs.cbuf.offset < rhs.cbuf.offset;
|
||||
});
|
||||
std::stable_sort(to_replace.begin(), to_replace.end(), [](const auto& lhs, const auto& rhs) {
|
||||
return lhs.cbuf.index < rhs.cbuf.index;
|
||||
});
|
||||
Descriptors descriptors{program.info.texture_descriptors};
|
||||
for (TextureInst& texture_inst : to_replace) {
|
||||
// TODO: Handle arrays
|
||||
IR::Inst* const inst{texture_inst.inst};
|
||||
const u32 index{descriptors.Add(TextureDescriptor{
|
||||
.type{inst->Flags<IR::TextureInstInfo>().type},
|
||||
.cbuf_index{texture_inst.cbuf.index},
|
||||
.cbuf_offset{texture_inst.cbuf.offset},
|
||||
.count{1},
|
||||
})};
|
||||
inst->ReplaceOpcode(IndexedInstruction(*inst));
|
||||
inst->SetArg(0, IR::Value{index});
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace Shader::Optimization
|
@ -8,25 +8,51 @@
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
#include <boost/container/small_vector.hpp>
|
||||
#include <boost/container/static_vector.hpp>
|
||||
|
||||
namespace Shader {
|
||||
|
||||
enum class TextureType : u32 {
|
||||
Color1D,
|
||||
ColorArray1D,
|
||||
Color2D,
|
||||
ColorArray2D,
|
||||
Color3D,
|
||||
ColorCube,
|
||||
ColorArrayCube,
|
||||
Shadow1D,
|
||||
ShadowArray1D,
|
||||
Shadow2D,
|
||||
ShadowArray2D,
|
||||
Shadow3D,
|
||||
ShadowCube,
|
||||
ShadowArrayCube,
|
||||
};
|
||||
|
||||
struct TextureDescriptor {
|
||||
TextureType type;
|
||||
u32 cbuf_index;
|
||||
u32 cbuf_offset;
|
||||
u32 count;
|
||||
};
|
||||
using TextureDescriptors = boost::container::small_vector<TextureDescriptor, 12>;
|
||||
|
||||
struct ConstantBufferDescriptor {
|
||||
u32 index;
|
||||
u32 count;
|
||||
};
|
||||
|
||||
struct StorageBufferDescriptor {
|
||||
u32 cbuf_index;
|
||||
u32 cbuf_offset;
|
||||
u32 count;
|
||||
};
|
||||
|
||||
struct Info {
|
||||
static constexpr size_t MAX_CBUFS{18};
|
||||
static constexpr size_t MAX_SSBOS{16};
|
||||
|
||||
struct ConstantBufferDescriptor {
|
||||
u32 index;
|
||||
u32 count;
|
||||
};
|
||||
|
||||
struct StorageBufferDescriptor {
|
||||
u32 cbuf_index;
|
||||
u32 cbuf_offset;
|
||||
u32 count;
|
||||
};
|
||||
|
||||
bool uses_workgroup_id{};
|
||||
bool uses_local_invocation_id{};
|
||||
bool uses_fp16{};
|
||||
@ -35,12 +61,16 @@ struct Info {
|
||||
bool uses_fp16_denorms_preserve{};
|
||||
bool uses_fp32_denorms_flush{};
|
||||
bool uses_fp32_denorms_preserve{};
|
||||
bool uses_image_1d{};
|
||||
bool uses_sampled_1d{};
|
||||
bool uses_sparse_residency{};
|
||||
|
||||
u32 constant_buffer_mask{};
|
||||
|
||||
boost::container::static_vector<ConstantBufferDescriptor, MAX_CBUFS>
|
||||
constant_buffer_descriptors;
|
||||
boost::container::static_vector<StorageBufferDescriptor, MAX_SSBOS> storage_buffers_descriptors;
|
||||
TextureDescriptors texture_descriptors;
|
||||
};
|
||||
|
||||
} // namespace Shader
|
||||
|
@ -40,6 +40,16 @@ vk::DescriptorSetLayout CreateDescriptorSetLayout(const Device& device, const Sh
|
||||
});
|
||||
++binding;
|
||||
}
|
||||
for (const auto& desc : info.texture_descriptors) {
|
||||
bindings.push_back({
|
||||
.binding = binding,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||
.descriptorCount = 1,
|
||||
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.pImmutableSamplers = nullptr,
|
||||
});
|
||||
++binding;
|
||||
}
|
||||
return device.GetLogical().CreateDescriptorSetLayout({
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
@ -79,6 +89,18 @@ vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate(
|
||||
++binding;
|
||||
offset += sizeof(DescriptorUpdateEntry);
|
||||
}
|
||||
for (const auto& desc : info.texture_descriptors) {
|
||||
entries.push_back({
|
||||
.dstBinding = binding,
|
||||
.dstArrayElement = 0,
|
||||
.descriptorCount = 1,
|
||||
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
|
||||
.offset = offset,
|
||||
.stride = sizeof(DescriptorUpdateEntry),
|
||||
});
|
||||
++binding;
|
||||
offset += sizeof(DescriptorUpdateEntry);
|
||||
}
|
||||
return device.GetLogical().CreateDescriptorUpdateTemplateKHR({
|
||||
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_CREATE_INFO,
|
||||
.pNext = nullptr,
|
||||
@ -92,6 +114,44 @@ vk::DescriptorUpdateTemplateKHR CreateDescriptorUpdateTemplate(
|
||||
.set = 0,
|
||||
});
|
||||
}
|
||||
|
||||
struct TextureHandle {
|
||||
explicit TextureHandle(u32 data, bool via_header_index) {
|
||||
const Tegra::Texture::TextureHandle handle{data};
|
||||
image = handle.tic_id;
|
||||
sampler = via_header_index ? image : handle.tsc_id.Value();
|
||||
}
|
||||
|
||||
u32 image;
|
||||
u32 sampler;
|
||||
};
|
||||
|
||||
VideoCommon::ImageViewType CastType(Shader::TextureType type) {
|
||||
switch (type) {
|
||||
case Shader::TextureType::Color1D:
|
||||
case Shader::TextureType::Shadow1D:
|
||||
return VideoCommon::ImageViewType::e1D;
|
||||
case Shader::TextureType::ColorArray1D:
|
||||
case Shader::TextureType::ShadowArray1D:
|
||||
return VideoCommon::ImageViewType::e1DArray;
|
||||
case Shader::TextureType::Color2D:
|
||||
case Shader::TextureType::Shadow2D:
|
||||
return VideoCommon::ImageViewType::e2D;
|
||||
case Shader::TextureType::ColorArray2D:
|
||||
case Shader::TextureType::ShadowArray2D:
|
||||
return VideoCommon::ImageViewType::e2DArray;
|
||||
case Shader::TextureType::Color3D:
|
||||
case Shader::TextureType::Shadow3D:
|
||||
return VideoCommon::ImageViewType::e3D;
|
||||
case Shader::TextureType::ColorCube:
|
||||
case Shader::TextureType::ShadowCube:
|
||||
return VideoCommon::ImageViewType::Cube;
|
||||
case Shader::TextureType::ColorArrayCube:
|
||||
case Shader::TextureType::ShadowArrayCube:
|
||||
return VideoCommon::ImageViewType::CubeArray;
|
||||
}
|
||||
UNREACHABLE_MSG("Invalid texture type {}", type);
|
||||
}
|
||||
} // Anonymous namespace
|
||||
|
||||
ComputePipeline::ComputePipeline(const Device& device, VKDescriptorPool& descriptor_pool,
|
||||
@ -143,6 +203,47 @@ void ComputePipeline::ConfigureBufferCache(BufferCache& buffer_cache) {
|
||||
buffer_cache.BindHostComputeBuffers();
|
||||
}
|
||||
|
||||
void ComputePipeline::ConfigureTextureCache(Tegra::Engines::KeplerCompute& kepler_compute,
|
||||
Tegra::MemoryManager& gpu_memory,
|
||||
TextureCache& texture_cache) {
|
||||
texture_cache.SynchronizeComputeDescriptors();
|
||||
|
||||
static constexpr size_t max_elements = 64;
|
||||
std::array<ImageId, max_elements> image_view_ids;
|
||||
boost::container::static_vector<u32, max_elements> image_view_indices;
|
||||
boost::container::static_vector<VkSampler, max_elements> sampler_handles;
|
||||
|
||||
const auto& launch_desc{kepler_compute.launch_description};
|
||||
const auto& cbufs{launch_desc.const_buffer_config};
|
||||
const bool via_header_index{launch_desc.linked_tsc};
|
||||
for (const auto& desc : info.texture_descriptors) {
|
||||
const u32 cbuf_index{desc.cbuf_index};
|
||||
const u32 cbuf_offset{desc.cbuf_offset};
|
||||
ASSERT(((launch_desc.const_buffer_enable_mask >> cbuf_index) & 1) != 0);
|
||||
|
||||
const GPUVAddr addr{cbufs[cbuf_index].Address() + cbuf_offset};
|
||||
const u32 raw_handle{gpu_memory.Read<u32>(addr)};
|
||||
|
||||
const TextureHandle handle(raw_handle, via_header_index);
|
||||
image_view_indices.push_back(handle.image);
|
||||
|
||||
Sampler* const sampler = texture_cache.GetComputeSampler(handle.sampler);
|
||||
sampler_handles.push_back(sampler->Handle());
|
||||
}
|
||||
|
||||
const std::span indices_span(image_view_indices.data(), image_view_indices.size());
|
||||
texture_cache.FillComputeImageViews(indices_span, image_view_ids);
|
||||
|
||||
size_t index{};
|
||||
for (const auto& desc : info.texture_descriptors) {
|
||||
const VkSampler vk_sampler{sampler_handles[index]};
|
||||
ImageView& image_view{texture_cache.GetImageView(image_view_ids[index])};
|
||||
const VkImageView vk_image_view{image_view.Handle(CastType(desc.type))};
|
||||
update_descriptor_queue->AddSampledImage(vk_image_view, vk_sampler);
|
||||
++index;
|
||||
}
|
||||
}
|
||||
|
||||
VkDescriptorSet ComputePipeline::UpdateDescriptorSet() {
|
||||
const VkDescriptorSet descriptor_set{descriptor_allocator.Commit()};
|
||||
update_descriptor_queue->Send(*descriptor_update_template, descriptor_set);
|
||||
|
@ -6,9 +6,11 @@
|
||||
|
||||
#include "common/common_types.h"
|
||||
#include "shader_recompiler/shader_info.h"
|
||||
#include "video_core/memory_manager.h"
|
||||
#include "video_core/renderer_vulkan/vk_buffer_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||
#include "video_core/renderer_vulkan/vk_pipeline.h"
|
||||
#include "video_core/renderer_vulkan/vk_texture_cache.h"
|
||||
#include "video_core/renderer_vulkan/vk_update_descriptor.h"
|
||||
#include "video_core/vulkan_common/vulkan_wrapper.h"
|
||||
|
||||
@ -30,6 +32,8 @@ public:
|
||||
ComputePipeline(const ComputePipeline&) = delete;
|
||||
|
||||
void ConfigureBufferCache(BufferCache& buffer_cache);
|
||||
void ConfigureTextureCache(Tegra::Engines::KeplerCompute& kepler_compute,
|
||||
Tegra::MemoryManager& gpu_memory, TextureCache& texture_cache);
|
||||
|
||||
[[nodiscard]] VkDescriptorSet UpdateDescriptorSet();
|
||||
|
||||
|
@ -76,6 +76,10 @@ public:
|
||||
return gpu_memory.Read<u64>(program_base + address);
|
||||
}
|
||||
|
||||
u32 TextureBoundBuffer() override {
|
||||
return kepler_compute.regs.tex_cb_index;
|
||||
}
|
||||
|
||||
std::array<u32, 3> WorkgroupSize() override {
|
||||
const auto& qmd{kepler_compute.launch_description};
|
||||
return {qmd.block_dim_x, qmd.block_dim_y, qmd.block_dim_z};
|
||||
|
@ -241,9 +241,10 @@ void RasterizerVulkan::DispatchCompute() {
|
||||
if (!pipeline) {
|
||||
return;
|
||||
}
|
||||
std::scoped_lock lock{buffer_cache.mutex};
|
||||
std::scoped_lock lock{texture_cache.mutex, buffer_cache.mutex};
|
||||
update_descriptor_queue.Acquire();
|
||||
pipeline->ConfigureBufferCache(buffer_cache);
|
||||
pipeline->ConfigureTextureCache(kepler_compute, gpu_memory, texture_cache);
|
||||
const VkDescriptorSet descriptor_set{pipeline->UpdateDescriptorSet()};
|
||||
|
||||
const auto& qmd{kepler_compute.launch_description};
|
||||
|
Loading…
Reference in New Issue
Block a user