texture_cache: OpenGL: Implement MSAA uploads and copies
This commit is contained in:
		| @@ -22,6 +22,8 @@ set(SHADER_FILES | ||||
|     convert_d24s8_to_abgr8.frag | ||||
|     convert_depth_to_float.frag | ||||
|     convert_float_to_depth.frag | ||||
|     convert_msaa_to_non_msaa.comp | ||||
|     convert_non_msaa_to_msaa.comp | ||||
|     convert_s8d24_to_abgr8.frag | ||||
|     full_screen_triangle.vert | ||||
|     fxaa.frag | ||||
|   | ||||
							
								
								
									
										30
									
								
								src/video_core/host_shaders/convert_msaa_to_non_msaa.comp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										30
									
								
								src/video_core/host_shaders/convert_msaa_to_non_msaa.comp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,30 @@ | ||||
| // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||||
| // SPDX-License-Identifier: GPL-2.0-or-later | ||||
|  | ||||
| #version 450 core | ||||
| layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in; | ||||
|  | ||||
| layout (binding = 0, rgba8) uniform readonly restrict image2DMSArray msaa_in; | ||||
| layout (binding = 1, rgba8) uniform writeonly restrict image2DArray output_img; | ||||
|  | ||||
| void main() { | ||||
|     const ivec3 coords = ivec3(gl_GlobalInvocationID); | ||||
|     if (any(greaterThanEqual(coords, imageSize(msaa_in)))) { | ||||
|         return; | ||||
|     } | ||||
|  | ||||
|     // TODO: Specialization constants for num_samples? | ||||
|     const int num_samples = imageSamples(msaa_in); | ||||
|     for (int curr_sample = 0; curr_sample < num_samples; ++curr_sample) { | ||||
|         const vec4 pixel = imageLoad(msaa_in, coords, curr_sample); | ||||
|  | ||||
|         const int single_sample_x = 2 * coords.x + (curr_sample & 1); | ||||
|         const int single_sample_y = 2 * coords.y + ((curr_sample / 2) & 1); | ||||
|         const ivec3 dest_coords = ivec3(single_sample_x, single_sample_y, coords.z); | ||||
|  | ||||
|         if (any(greaterThanEqual(dest_coords, imageSize(output_img)))) { | ||||
|             continue; | ||||
|         } | ||||
|         imageStore(output_img, dest_coords, pixel); | ||||
|     } | ||||
| } | ||||
							
								
								
									
										29
									
								
								src/video_core/host_shaders/convert_non_msaa_to_msaa.comp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										29
									
								
								src/video_core/host_shaders/convert_non_msaa_to_msaa.comp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,29 @@ | ||||
| // SPDX-FileCopyrightText: Copyright 2023 yuzu Emulator Project | ||||
| // SPDX-License-Identifier: GPL-2.0-or-later | ||||
|  | ||||
| #version 450 core | ||||
| layout (local_size_x = 8, local_size_y = 8, local_size_z = 1) in; | ||||
|  | ||||
| layout (binding = 0, rgba8) uniform readonly restrict image2DArray img_in; | ||||
| layout (binding = 1, rgba8) uniform writeonly restrict image2DMSArray output_msaa; | ||||
|  | ||||
| void main() { | ||||
|     const ivec3 coords = ivec3(gl_GlobalInvocationID); | ||||
|     if (any(greaterThanEqual(coords, imageSize(output_msaa)))) { | ||||
|         return; | ||||
|     } | ||||
|  | ||||
|     // TODO: Specialization constants for num_samples? | ||||
|     const int num_samples = imageSamples(output_msaa); | ||||
|     for (int curr_sample = 0; curr_sample < num_samples; ++curr_sample) { | ||||
|         const int single_sample_x = 2 * coords.x + (curr_sample & 1); | ||||
|         const int single_sample_y = 2 * coords.y + ((curr_sample / 2) & 1); | ||||
|         const ivec3 single_coords = ivec3(single_sample_x, single_sample_y, coords.z); | ||||
|  | ||||
|         if (any(greaterThanEqual(single_coords, imageSize(img_in)))) { | ||||
|             continue; | ||||
|         } | ||||
|         const vec4 pixel = imageLoad(img_in, single_coords); | ||||
|         imageStore(output_msaa, coords, curr_sample, pixel); | ||||
|     } | ||||
| } | ||||
| @@ -557,6 +557,14 @@ void TextureCacheRuntime::CopyImage(Image& dst_image, Image& src_image, | ||||
|     } | ||||
| } | ||||
|  | ||||
| void TextureCacheRuntime::CopyImageMSAA(Image& dst_image, Image& src_image, | ||||
|                                         std::span<const VideoCommon::ImageCopy> copies) { | ||||
|     LOG_DEBUG(Render_OpenGL, "Copying from {} samples to {} samples", src_image.info.num_samples, | ||||
|               dst_image.info.num_samples); | ||||
|     // TODO: Leverage the format conversion pass if possible/accurate. | ||||
|     util_shaders.CopyMSAA(dst_image, src_image, copies); | ||||
| } | ||||
|  | ||||
| void TextureCacheRuntime::ReinterpretImage(Image& dst, Image& src, | ||||
|                                            std::span<const VideoCommon::ImageCopy> copies) { | ||||
|     LOG_DEBUG(Render_OpenGL, "Converting {} to {}", src.info.format, dst.info.format); | ||||
|   | ||||
| @@ -93,12 +93,19 @@ public: | ||||
|         return device.CanReportMemoryUsage(); | ||||
|     } | ||||
|  | ||||
|     bool ShouldReinterpret([[maybe_unused]] Image& dst, [[maybe_unused]] Image& src) { | ||||
|     bool ShouldReinterpret([[maybe_unused]] Image& dst, | ||||
|                            [[maybe_unused]] Image& src) const noexcept { | ||||
|         return true; | ||||
|     } | ||||
|  | ||||
|     bool CanUploadMSAA() const noexcept { | ||||
|         return true; | ||||
|     } | ||||
|  | ||||
|     void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); | ||||
|  | ||||
|     void CopyImageMSAA(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); | ||||
|  | ||||
|     void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); | ||||
|  | ||||
|     void ConvertImage(Framebuffer* dst, ImageView& dst_view, ImageView& src_view) { | ||||
|   | ||||
| @@ -12,6 +12,8 @@ | ||||
| #include "video_core/host_shaders/astc_decoder_comp.h" | ||||
| #include "video_core/host_shaders/block_linear_unswizzle_2d_comp.h" | ||||
| #include "video_core/host_shaders/block_linear_unswizzle_3d_comp.h" | ||||
| #include "video_core/host_shaders/convert_msaa_to_non_msaa_comp.h" | ||||
| #include "video_core/host_shaders/convert_non_msaa_to_msaa_comp.h" | ||||
| #include "video_core/host_shaders/opengl_convert_s8d24_comp.h" | ||||
| #include "video_core/host_shaders/opengl_copy_bc4_comp.h" | ||||
| #include "video_core/host_shaders/pitch_unswizzle_comp.h" | ||||
| @@ -51,7 +53,9 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_) | ||||
|       block_linear_unswizzle_3d_program(MakeProgram(BLOCK_LINEAR_UNSWIZZLE_3D_COMP)), | ||||
|       pitch_unswizzle_program(MakeProgram(PITCH_UNSWIZZLE_COMP)), | ||||
|       copy_bc4_program(MakeProgram(OPENGL_COPY_BC4_COMP)), | ||||
|       convert_s8d24_program(MakeProgram(OPENGL_CONVERT_S8D24_COMP)) { | ||||
|       convert_s8d24_program(MakeProgram(OPENGL_CONVERT_S8D24_COMP)), | ||||
|       convert_ms_to_nonms_program(MakeProgram(CONVERT_MSAA_TO_NON_MSAA_COMP)), | ||||
|       convert_nonms_to_ms_program(MakeProgram(CONVERT_NON_MSAA_TO_MSAA_COMP)) { | ||||
|     const auto swizzle_table = Tegra::Texture::MakeSwizzleTable(); | ||||
|     swizzle_table_buffer.Create(); | ||||
|     glNamedBufferStorage(swizzle_table_buffer.handle, sizeof(swizzle_table), &swizzle_table, 0); | ||||
| @@ -269,6 +273,33 @@ void UtilShaders::ConvertS8D24(Image& dst_image, std::span<const ImageCopy> copi | ||||
|     program_manager.RestoreGuestCompute(); | ||||
| } | ||||
|  | ||||
| void UtilShaders::CopyMSAA(Image& dst_image, Image& src_image, | ||||
|                            std::span<const VideoCommon::ImageCopy> copies) { | ||||
|     const bool is_ms_to_non_ms = src_image.info.num_samples > 1 && dst_image.info.num_samples == 1; | ||||
|     const auto program_handle = | ||||
|         is_ms_to_non_ms ? convert_ms_to_nonms_program.handle : convert_nonms_to_ms_program.handle; | ||||
|     program_manager.BindComputeProgram(program_handle); | ||||
|  | ||||
|     for (const ImageCopy& copy : copies) { | ||||
|         ASSERT(copy.src_subresource.base_layer == 0); | ||||
|         ASSERT(copy.src_subresource.num_layers == 1); | ||||
|         ASSERT(copy.dst_subresource.base_layer == 0); | ||||
|         ASSERT(copy.dst_subresource.num_layers == 1); | ||||
|  | ||||
|         glBindImageTexture(0, src_image.StorageHandle(), copy.src_subresource.base_level, GL_TRUE, | ||||
|                            0, GL_READ_ONLY, GL_RGBA8); | ||||
|         glBindImageTexture(1, dst_image.StorageHandle(), copy.dst_subresource.base_level, GL_TRUE, | ||||
|                            0, GL_WRITE_ONLY, GL_RGBA8); | ||||
|  | ||||
|         const u32 num_dispatches_x = Common::DivCeil(copy.extent.width, 8U); | ||||
|         const u32 num_dispatches_y = Common::DivCeil(copy.extent.height, 8U); | ||||
|         const u32 num_dispatches_z = copy.extent.depth; | ||||
|  | ||||
|         glDispatchCompute(num_dispatches_x, num_dispatches_y, num_dispatches_z); | ||||
|     } | ||||
|     program_manager.RestoreGuestCompute(); | ||||
| } | ||||
|  | ||||
| GLenum StoreFormat(u32 bytes_per_block) { | ||||
|     switch (bytes_per_block) { | ||||
|     case 1: | ||||
|   | ||||
| @@ -40,6 +40,9 @@ public: | ||||
|  | ||||
|     void ConvertS8D24(Image& dst_image, std::span<const VideoCommon::ImageCopy> copies); | ||||
|  | ||||
|     void CopyMSAA(Image& dst_image, Image& src_image, | ||||
|                   std::span<const VideoCommon::ImageCopy> copies); | ||||
|  | ||||
| private: | ||||
|     ProgramManager& program_manager; | ||||
|  | ||||
| @@ -51,6 +54,8 @@ private: | ||||
|     OGLProgram pitch_unswizzle_program; | ||||
|     OGLProgram copy_bc4_program; | ||||
|     OGLProgram convert_s8d24_program; | ||||
|     OGLProgram convert_ms_to_nonms_program; | ||||
|     OGLProgram convert_nonms_to_ms_program; | ||||
| }; | ||||
|  | ||||
| GLenum StoreFormat(u32 bytes_per_block); | ||||
|   | ||||
| @@ -1230,6 +1230,11 @@ void TextureCacheRuntime::CopyImage(Image& dst, Image& src, | ||||
|     }); | ||||
| } | ||||
|  | ||||
| void TextureCacheRuntime::CopyImageMSAA(Image& dst, Image& src, | ||||
|                                         std::span<const VideoCommon::ImageCopy> copies) { | ||||
|     UNIMPLEMENTED_MSG("Copying images with different samples is not implemented in Vulkan."); | ||||
| } | ||||
|  | ||||
| u64 TextureCacheRuntime::GetDeviceLocalMemory() const { | ||||
|     return device.GetDeviceLocalMemory(); | ||||
| } | ||||
|   | ||||
| @@ -70,6 +70,8 @@ public: | ||||
|  | ||||
|     void CopyImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); | ||||
|  | ||||
|     void CopyImageMSAA(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); | ||||
|  | ||||
|     bool ShouldReinterpret(Image& dst, Image& src); | ||||
|  | ||||
|     void ReinterpretImage(Image& dst, Image& src, std::span<const VideoCommon::ImageCopy> copies); | ||||
| @@ -80,6 +82,11 @@ public: | ||||
|         return false; | ||||
|     } | ||||
|  | ||||
|     bool CanUploadMSAA() const noexcept { | ||||
|         // TODO: Implement buffer to MSAA uploads | ||||
|         return false; | ||||
|     } | ||||
|  | ||||
|     void AccelerateImageUpload(Image&, const StagingBufferRef&, | ||||
|                                std::span<const VideoCommon::SwizzleParameters>); | ||||
|  | ||||
|   | ||||
| @@ -22,6 +22,9 @@ std::string Name(const ImageBase& image) { | ||||
|     const u32 num_layers = image.info.resources.layers; | ||||
|     const u32 num_levels = image.info.resources.levels; | ||||
|     std::string resource; | ||||
|     if (image.info.num_samples > 1) { | ||||
|         resource += fmt::format(":{}xMSAA", image.info.num_samples); | ||||
|     } | ||||
|     if (num_layers > 1) { | ||||
|         resource += fmt::format(":L{}", num_layers); | ||||
|     } | ||||
|   | ||||
| @@ -773,7 +773,7 @@ void TextureCache<P>::RefreshContents(Image& image, ImageId image_id) { | ||||
|     image.flags &= ~ImageFlagBits::CpuModified; | ||||
|     TrackImage(image, image_id); | ||||
|  | ||||
|     if (image.info.num_samples > 1) { | ||||
|     if (image.info.num_samples > 1 && !runtime.CanUploadMSAA()) { | ||||
|         LOG_WARNING(HW_GPU, "MSAA image uploads are not implemented"); | ||||
|         return; | ||||
|     } | ||||
| @@ -1167,14 +1167,14 @@ ImageId TextureCache<P>::JoinImages(const ImageInfo& info, GPUVAddr gpu_addr, VA | ||||
|         if (True(overlap.flags & ImageFlagBits::GpuModified)) { | ||||
|             new_image.flags |= ImageFlagBits::GpuModified; | ||||
|         } | ||||
|         const auto& resolution = Settings::values.resolution_info; | ||||
|         const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); | ||||
|         const u32 up_scale = can_rescale ? resolution.up_scale : 1; | ||||
|         const u32 down_shift = can_rescale ? resolution.down_shift : 0; | ||||
|         auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift); | ||||
|         if (overlap.info.num_samples != new_image.info.num_samples) { | ||||
|             LOG_WARNING(HW_GPU, "Copying between images with different samples is not implemented"); | ||||
|             runtime.CopyImageMSAA(new_image, overlap, std::move(copies)); | ||||
|         } else { | ||||
|             const auto& resolution = Settings::values.resolution_info; | ||||
|             const SubresourceBase base = new_image.TryFindBase(overlap.gpu_addr).value(); | ||||
|             const u32 up_scale = can_rescale ? resolution.up_scale : 1; | ||||
|             const u32 down_shift = can_rescale ? resolution.down_shift : 0; | ||||
|             auto copies = MakeShrinkImageCopies(new_info, overlap.info, base, up_scale, down_shift); | ||||
|             runtime.CopyImage(new_image, overlap, std::move(copies)); | ||||
|         } | ||||
|         if (True(overlap.flags & ImageFlagBits::Tracked)) { | ||||
|   | ||||
| @@ -573,10 +573,6 @@ u32 CalculateUnswizzledSizeBytes(const ImageInfo& info) noexcept { | ||||
|     if (info.type == ImageType::Buffer) { | ||||
|         return info.size.width * BytesPerBlock(info.format); | ||||
|     } | ||||
|     if (info.num_samples > 1) { | ||||
|         // Multisample images can't be uploaded or downloaded to the host | ||||
|         return 0; | ||||
|     } | ||||
|     if (info.type == ImageType::Linear) { | ||||
|         return info.pitch * Common::DivCeil(info.size.height, DefaultBlockHeight(info.format)); | ||||
|     } | ||||
| @@ -703,7 +699,6 @@ ImageViewType RenderTargetImageViewType(const ImageInfo& info) noexcept { | ||||
| std::vector<ImageCopy> MakeShrinkImageCopies(const ImageInfo& dst, const ImageInfo& src, | ||||
|                                              SubresourceBase base, u32 up_scale, u32 down_shift) { | ||||
|     ASSERT(dst.resources.levels >= src.resources.levels); | ||||
|     ASSERT(dst.num_samples == src.num_samples); | ||||
|  | ||||
|     const bool is_dst_3d = dst.type == ImageType::e3D; | ||||
|     if (is_dst_3d) { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 ameerj
					ameerj