shader_recompiler: emulate 8-bit and 16-bit storage writes with cas loop
This commit is contained in:
		| @@ -65,6 +65,14 @@ void WriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& | ||||
|     WriteStorage(ctx, binding, offset, value, ctx.storage_types.U32, sizeof(u32), | ||||
|                  &StorageDefinitions::U32, index_offset); | ||||
| } | ||||
|  | ||||
| void WriteStorageByCasLoop(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                            Id value, Id bit_offset, Id bit_count) { | ||||
|     const Id pointer{StoragePointer(ctx, binding, offset, ctx.storage_types.U32, sizeof(u32), | ||||
|                                     &StorageDefinitions::U32)}; | ||||
|     ctx.OpFunctionCall(ctx.TypeVoid(), ctx.write_storage_cas_loop_func, pointer, value, bit_offset, | ||||
|                        bit_count); | ||||
| } | ||||
| } // Anonymous namespace | ||||
|  | ||||
| void EmitLoadGlobalU8(EmitContext&) { | ||||
| @@ -219,26 +227,42 @@ Id EmitLoadStorage128(EmitContext& ctx, const IR::Value& binding, const IR::Valu | ||||
|  | ||||
| void EmitWriteStorageU8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                         Id value) { | ||||
|     if (ctx.profile.support_int8) { | ||||
|         WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U8, value), ctx.storage_types.U8, | ||||
|                      sizeof(u8), &StorageDefinitions::U8); | ||||
|     } else { | ||||
|         WriteStorageByCasLoop(ctx, binding, offset, value, ctx.BitOffset8(offset), ctx.Const(8u)); | ||||
|     } | ||||
| } | ||||
|  | ||||
| void EmitWriteStorageS8(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                         Id value) { | ||||
|     if (ctx.profile.support_int8) { | ||||
|         WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S8, value), ctx.storage_types.S8, | ||||
|                      sizeof(s8), &StorageDefinitions::S8); | ||||
|     } else { | ||||
|         WriteStorageByCasLoop(ctx, binding, offset, value, ctx.BitOffset8(offset), ctx.Const(8u)); | ||||
|     } | ||||
| } | ||||
|  | ||||
| void EmitWriteStorageU16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                          Id value) { | ||||
|     if (ctx.profile.support_int16) { | ||||
|         WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.U16, value), ctx.storage_types.U16, | ||||
|                      sizeof(u16), &StorageDefinitions::U16); | ||||
|     } else { | ||||
|         WriteStorageByCasLoop(ctx, binding, offset, value, ctx.BitOffset16(offset), ctx.Const(16u)); | ||||
|     } | ||||
| } | ||||
|  | ||||
| void EmitWriteStorageS16(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|                          Id value) { | ||||
|     if (ctx.profile.support_int16) { | ||||
|         WriteStorage(ctx, binding, offset, ctx.OpSConvert(ctx.S16, value), ctx.storage_types.S16, | ||||
|                      sizeof(s16), &StorageDefinitions::S16); | ||||
|     } else { | ||||
|         WriteStorageByCasLoop(ctx, binding, offset, value, ctx.BitOffset16(offset), ctx.Const(16u)); | ||||
|     } | ||||
| } | ||||
|  | ||||
| void EmitWriteStorage32(EmitContext& ctx, const IR::Value& binding, const IR::Value& offset, | ||||
|   | ||||
| @@ -480,6 +480,7 @@ EmitContext::EmitContext(const Profile& profile_, const RuntimeInfo& runtime_inf | ||||
|     DefineTextures(program.info, texture_binding, bindings.texture_scaling_index); | ||||
|     DefineImages(program.info, image_binding, bindings.image_scaling_index); | ||||
|     DefineAttributeMemAccess(program.info); | ||||
|     DefineWriteStorageCasLoopFunction(program.info); | ||||
|     DefineGlobalMemoryFunctions(program.info); | ||||
|     DefineRescalingInput(program.info); | ||||
|     DefineRenderArea(program.info); | ||||
| @@ -877,6 +878,56 @@ void EmitContext::DefineAttributeMemAccess(const Info& info) { | ||||
|     } | ||||
| } | ||||
|  | ||||
| void EmitContext::DefineWriteStorageCasLoopFunction(const Info& info) { | ||||
|     if (profile.support_int8 && profile.support_int16) { | ||||
|         return; | ||||
|     } | ||||
|     if (!info.uses_int8 && !info.uses_int16) { | ||||
|         return; | ||||
|     } | ||||
|  | ||||
|     AddCapability(spv::Capability::VariablePointersStorageBuffer); | ||||
|  | ||||
|     const Id ptr_type{TypePointer(spv::StorageClass::StorageBuffer, U32[1])}; | ||||
|     const Id func_type{TypeFunction(void_id, ptr_type, U32[1], U32[1], U32[1])}; | ||||
|     const Id func{OpFunction(void_id, spv::FunctionControlMask::MaskNone, func_type)}; | ||||
|     const Id pointer{OpFunctionParameter(ptr_type)}; | ||||
|     const Id value{OpFunctionParameter(U32[1])}; | ||||
|     const Id bit_offset{OpFunctionParameter(U32[1])}; | ||||
|     const Id bit_count{OpFunctionParameter(U32[1])}; | ||||
|  | ||||
|     AddLabel(); | ||||
|     const Id scope_device{Const(1u)}; | ||||
|     const Id ordering_relaxed{u32_zero_value}; | ||||
|     const Id body_label{OpLabel()}; | ||||
|     const Id continue_label{OpLabel()}; | ||||
|     const Id endloop_label{OpLabel()}; | ||||
|     const Id beginloop_label{OpLabel()}; | ||||
|     OpBranch(beginloop_label); | ||||
|  | ||||
|     AddLabel(beginloop_label); | ||||
|     OpLoopMerge(endloop_label, continue_label, spv::LoopControlMask::MaskNone); | ||||
|     OpBranch(body_label); | ||||
|  | ||||
|     AddLabel(body_label); | ||||
|     const Id expected_value{OpLoad(U32[1], pointer)}; | ||||
|     const Id desired_value{OpBitFieldInsert(U32[1], expected_value, value, bit_offset, bit_count)}; | ||||
|     const Id actual_value{OpAtomicCompareExchange(U32[1], pointer, scope_device, ordering_relaxed, | ||||
|                                                   ordering_relaxed, desired_value, expected_value)}; | ||||
|     const Id store_successful{OpIEqual(U1, expected_value, actual_value)}; | ||||
|     OpBranchConditional(store_successful, endloop_label, continue_label); | ||||
|  | ||||
|     AddLabel(endloop_label); | ||||
|     OpReturn(); | ||||
|  | ||||
|     AddLabel(continue_label); | ||||
|     OpBranch(beginloop_label); | ||||
|  | ||||
|     OpFunctionEnd(); | ||||
|  | ||||
|     write_storage_cas_loop_func = func; | ||||
| } | ||||
|  | ||||
| void EmitContext::DefineGlobalMemoryFunctions(const Info& info) { | ||||
|     if (!info.uses_global_memory || !profile.support_int64) { | ||||
|         return; | ||||
|   | ||||
| @@ -325,6 +325,8 @@ public: | ||||
|     Id f32x2_min_cas{}; | ||||
|     Id f32x2_max_cas{}; | ||||
|  | ||||
|     Id write_storage_cas_loop_func{}; | ||||
|  | ||||
|     Id load_global_func_u32{}; | ||||
|     Id load_global_func_u32x2{}; | ||||
|     Id load_global_func_u32x4{}; | ||||
| @@ -372,6 +374,7 @@ private: | ||||
|     void DefineTextures(const Info& info, u32& binding, u32& scaling_index); | ||||
|     void DefineImages(const Info& info, u32& binding, u32& scaling_index); | ||||
|     void DefineAttributeMemAccess(const Info& info); | ||||
|     void DefineWriteStorageCasLoopFunction(const Info& info); | ||||
|     void DefineGlobalMemoryFunctions(const Info& info); | ||||
|     void DefineRescalingInput(const Info& info); | ||||
|     void DefineRescalingInputPushConstant(); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Liam
					Liam