Merge pull request #1438 from ReinUsesLisp/quads
gl_rasterizer: Implement quads topology
This commit is contained in:
		| @@ -27,6 +27,8 @@ add_library(video_core STATIC | ||||
|     renderer_base.h | ||||
|     renderer_opengl/gl_buffer_cache.cpp | ||||
|     renderer_opengl/gl_buffer_cache.h | ||||
|     renderer_opengl/gl_primitive_assembler.cpp | ||||
|     renderer_opengl/gl_primitive_assembler.h | ||||
|     renderer_opengl/gl_rasterizer.cpp | ||||
|     renderer_opengl/gl_rasterizer.h | ||||
|     renderer_opengl/gl_rasterizer_cache.cpp | ||||
|   | ||||
| @@ -744,6 +744,12 @@ public: | ||||
|                         return static_cast<GPUVAddr>((static_cast<GPUVAddr>(end_addr_high) << 32) | | ||||
|                                                      end_addr_low); | ||||
|                     } | ||||
|  | ||||
|                     /// Adjust the index buffer offset so it points to the first desired index. | ||||
|                     GPUVAddr IndexStart() const { | ||||
|                         return StartAddress() + static_cast<size_t>(first) * | ||||
|                                                     static_cast<size_t>(FormatSizeInBytes()); | ||||
|                     } | ||||
|                 } index_array; | ||||
|  | ||||
|                 INSERT_PADDING_WORDS(0x7); | ||||
|   | ||||
| @@ -34,7 +34,7 @@ GLintptr OGLBufferCache::UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size | ||||
|     } | ||||
|  | ||||
|     AlignBuffer(alignment); | ||||
|     GLintptr uploaded_offset = buffer_offset; | ||||
|     const GLintptr uploaded_offset = buffer_offset; | ||||
|  | ||||
|     Memory::ReadBlock(*cpu_addr, buffer_ptr, size); | ||||
|  | ||||
| @@ -57,13 +57,23 @@ GLintptr OGLBufferCache::UploadHostMemory(const void* raw_pointer, std::size_t s | ||||
|                                           std::size_t alignment) { | ||||
|     AlignBuffer(alignment); | ||||
|     std::memcpy(buffer_ptr, raw_pointer, size); | ||||
|     GLintptr uploaded_offset = buffer_offset; | ||||
|     const GLintptr uploaded_offset = buffer_offset; | ||||
|  | ||||
|     buffer_ptr += size; | ||||
|     buffer_offset += size; | ||||
|     return uploaded_offset; | ||||
| } | ||||
|  | ||||
| std::tuple<u8*, GLintptr> OGLBufferCache::ReserveMemory(std::size_t size, std::size_t alignment) { | ||||
|     AlignBuffer(alignment); | ||||
|     u8* const uploaded_ptr = buffer_ptr; | ||||
|     const GLintptr uploaded_offset = buffer_offset; | ||||
|  | ||||
|     buffer_ptr += size; | ||||
|     buffer_offset += size; | ||||
|     return std::make_tuple(uploaded_ptr, uploaded_offset); | ||||
| } | ||||
|  | ||||
| void OGLBufferCache::Map(std::size_t max_size) { | ||||
|     bool invalidate; | ||||
|     std::tie(buffer_ptr, buffer_offset_base, invalidate) = | ||||
| @@ -74,6 +84,7 @@ void OGLBufferCache::Map(std::size_t max_size) { | ||||
|         InvalidateAll(); | ||||
|     } | ||||
| } | ||||
|  | ||||
| void OGLBufferCache::Unmap() { | ||||
|     stream_buffer.Unmap(buffer_offset - buffer_offset_base); | ||||
| } | ||||
| @@ -84,7 +95,7 @@ GLuint OGLBufferCache::GetHandle() const { | ||||
|  | ||||
| void OGLBufferCache::AlignBuffer(std::size_t alignment) { | ||||
|     // Align the offset, not the mapped pointer | ||||
|     GLintptr offset_aligned = | ||||
|     const GLintptr offset_aligned = | ||||
|         static_cast<GLintptr>(Common::AlignUp(static_cast<std::size_t>(buffer_offset), alignment)); | ||||
|     buffer_ptr += offset_aligned - buffer_offset; | ||||
|     buffer_offset = offset_aligned; | ||||
|   | ||||
| @@ -6,6 +6,7 @@ | ||||
|  | ||||
| #include <cstddef> | ||||
| #include <memory> | ||||
| #include <tuple> | ||||
|  | ||||
| #include "common/common_types.h" | ||||
| #include "video_core/rasterizer_cache.h" | ||||
| @@ -33,11 +34,17 @@ class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBuffer | ||||
| public: | ||||
|     explicit OGLBufferCache(std::size_t size); | ||||
|  | ||||
|     /// Uploads data from a guest GPU address. Returns host's buffer offset where it's been | ||||
|     /// allocated. | ||||
|     GLintptr UploadMemory(Tegra::GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, | ||||
|                           bool cache = true); | ||||
|  | ||||
|     /// Uploads from a host memory. Returns host's buffer offset where it's been allocated. | ||||
|     GLintptr UploadHostMemory(const void* raw_pointer, std::size_t size, std::size_t alignment = 4); | ||||
|  | ||||
|     /// Reserves memory to be used by host's CPU. Returns mapped address and offset. | ||||
|     std::tuple<u8*, GLintptr> ReserveMemory(std::size_t size, std::size_t alignment = 4); | ||||
|  | ||||
|     void Map(std::size_t max_size); | ||||
|     void Unmap(); | ||||
|  | ||||
|   | ||||
							
								
								
									
										64
									
								
								src/video_core/renderer_opengl/gl_primitive_assembler.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										64
									
								
								src/video_core/renderer_opengl/gl_primitive_assembler.cpp
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,64 @@ | ||||
| // Copyright 2018 yuzu Emulator Project | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #include <algorithm> | ||||
| #include <array> | ||||
| #include "common/assert.h" | ||||
| #include "common/common_types.h" | ||||
| #include "core/memory.h" | ||||
| #include "video_core/renderer_opengl/gl_buffer_cache.h" | ||||
| #include "video_core/renderer_opengl/gl_primitive_assembler.h" | ||||
|  | ||||
| namespace OpenGL { | ||||
|  | ||||
| constexpr u32 TRIANGLES_PER_QUAD = 6; | ||||
| constexpr std::array<u32, TRIANGLES_PER_QUAD> QUAD_MAP = {0, 1, 2, 0, 2, 3}; | ||||
|  | ||||
| PrimitiveAssembler::PrimitiveAssembler(OGLBufferCache& buffer_cache) : buffer_cache(buffer_cache) {} | ||||
|  | ||||
| PrimitiveAssembler::~PrimitiveAssembler() = default; | ||||
|  | ||||
| std::size_t PrimitiveAssembler::CalculateQuadSize(u32 count) const { | ||||
|     ASSERT_MSG(count % 4 == 0, "Quad count is expected to be a multiple of 4"); | ||||
|     return (count / 4) * TRIANGLES_PER_QUAD * sizeof(GLuint); | ||||
| } | ||||
|  | ||||
| GLintptr PrimitiveAssembler::MakeQuadArray(u32 first, u32 count) { | ||||
|     const std::size_t size{CalculateQuadSize(count)}; | ||||
|     auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(size); | ||||
|  | ||||
|     for (u32 primitive = 0; primitive < count / 4; ++primitive) { | ||||
|         for (u32 i = 0; i < TRIANGLES_PER_QUAD; ++i) { | ||||
|             const u32 index = first + primitive * 4 + QUAD_MAP[i]; | ||||
|             std::memcpy(dst_pointer, &index, sizeof(index)); | ||||
|             dst_pointer += sizeof(index); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     return index_offset; | ||||
| } | ||||
|  | ||||
| GLintptr PrimitiveAssembler::MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size, | ||||
|                                              u32 count) { | ||||
|     const std::size_t map_size{CalculateQuadSize(count)}; | ||||
|     auto [dst_pointer, index_offset] = buffer_cache.ReserveMemory(map_size); | ||||
|  | ||||
|     auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager(); | ||||
|     const boost::optional<VAddr> cpu_addr{memory_manager.GpuToCpuAddress(gpu_addr)}; | ||||
|     const u8* source{Memory::GetPointer(*cpu_addr)}; | ||||
|  | ||||
|     for (u32 primitive = 0; primitive < count / 4; ++primitive) { | ||||
|         for (std::size_t i = 0; i < TRIANGLES_PER_QUAD; ++i) { | ||||
|             const u32 index = primitive * 4 + QUAD_MAP[i]; | ||||
|             const u8* src_offset = source + (index * index_size); | ||||
|  | ||||
|             std::memcpy(dst_pointer, src_offset, index_size); | ||||
|             dst_pointer += index_size; | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     return index_offset; | ||||
| } | ||||
|  | ||||
| } // namespace OpenGL | ||||
							
								
								
									
										33
									
								
								src/video_core/renderer_opengl/gl_primitive_assembler.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								src/video_core/renderer_opengl/gl_primitive_assembler.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,33 @@ | ||||
| // Copyright 2018 yuzu Emulator Project | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| #include <vector> | ||||
| #include <glad/glad.h> | ||||
|  | ||||
| #include "common/common_types.h" | ||||
| #include "video_core/memory_manager.h" | ||||
|  | ||||
| namespace OpenGL { | ||||
|  | ||||
| class OGLBufferCache; | ||||
|  | ||||
| class PrimitiveAssembler { | ||||
| public: | ||||
|     explicit PrimitiveAssembler(OGLBufferCache& buffer_cache); | ||||
|     ~PrimitiveAssembler(); | ||||
|  | ||||
|     /// Calculates the size required by MakeQuadArray and MakeQuadIndexed. | ||||
|     std::size_t CalculateQuadSize(u32 count) const; | ||||
|  | ||||
|     GLintptr MakeQuadArray(u32 first, u32 count); | ||||
|  | ||||
|     GLintptr MakeQuadIndexed(Tegra::GPUVAddr gpu_addr, std::size_t index_size, u32 count); | ||||
|  | ||||
| private: | ||||
|     OGLBufferCache& buffer_cache; | ||||
| }; | ||||
|  | ||||
| } // namespace OpenGL | ||||
| @@ -42,6 +42,41 @@ MICROPROFILE_DEFINE(OpenGL_Framebuffer, "OpenGL", "Framebuffer Setup", MP_RGB(12 | ||||
| MICROPROFILE_DEFINE(OpenGL_Drawing, "OpenGL", "Drawing", MP_RGB(128, 128, 192)); | ||||
| MICROPROFILE_DEFINE(OpenGL_Blits, "OpenGL", "Blits", MP_RGB(128, 128, 192)); | ||||
| MICROPROFILE_DEFINE(OpenGL_CacheManagement, "OpenGL", "Cache Mgmt", MP_RGB(100, 255, 100)); | ||||
| MICROPROFILE_DEFINE(OpenGL_PrimitiveAssembly, "OpenGL", "Prim Asmbl", MP_RGB(255, 100, 100)); | ||||
|  | ||||
| struct DrawParameters { | ||||
|     GLenum primitive_mode; | ||||
|     GLsizei count; | ||||
|     GLint current_instance; | ||||
|     bool use_indexed; | ||||
|  | ||||
|     GLint vertex_first; | ||||
|  | ||||
|     GLenum index_format; | ||||
|     GLint base_vertex; | ||||
|     GLintptr index_buffer_offset; | ||||
|  | ||||
|     void DispatchDraw() const { | ||||
|         if (use_indexed) { | ||||
|             const auto index_buffer_ptr = reinterpret_cast<const void*>(index_buffer_offset); | ||||
|             if (current_instance > 0) { | ||||
|                 glDrawElementsInstancedBaseVertexBaseInstance(primitive_mode, count, index_format, | ||||
|                                                               index_buffer_ptr, 1, base_vertex, | ||||
|                                                               current_instance); | ||||
|             } else { | ||||
|                 glDrawElementsBaseVertex(primitive_mode, count, index_format, index_buffer_ptr, | ||||
|                                          base_vertex); | ||||
|             } | ||||
|         } else { | ||||
|             if (current_instance > 0) { | ||||
|                 glDrawArraysInstancedBaseInstance(primitive_mode, vertex_first, count, 1, | ||||
|                                                   current_instance); | ||||
|             } else { | ||||
|                 glDrawArrays(primitive_mode, vertex_first, count); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| }; | ||||
|  | ||||
| RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& window, ScreenInfo& info) | ||||
|     : emu_window{window}, screen_info{info}, buffer_cache(STREAM_BUFFER_SIZE) { | ||||
| @@ -172,6 +207,53 @@ void RasterizerOpenGL::SetupVertexArrays() { | ||||
|     } | ||||
| } | ||||
|  | ||||
| DrawParameters RasterizerOpenGL::SetupDraw() { | ||||
|     const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | ||||
|     const auto& regs = gpu.regs; | ||||
|     const bool is_indexed = accelerate_draw == AccelDraw::Indexed; | ||||
|  | ||||
|     DrawParameters params{}; | ||||
|     params.current_instance = gpu.state.current_instance; | ||||
|  | ||||
|     if (regs.draw.topology == Maxwell::PrimitiveTopology::Quads) { | ||||
|         MICROPROFILE_SCOPE(OpenGL_PrimitiveAssembly); | ||||
|  | ||||
|         params.use_indexed = true; | ||||
|         params.primitive_mode = GL_TRIANGLES; | ||||
|  | ||||
|         if (is_indexed) { | ||||
|             params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); | ||||
|             params.count = (regs.index_array.count / 4) * 6; | ||||
|             params.index_buffer_offset = primitive_assembler.MakeQuadIndexed( | ||||
|                 regs.index_array.IndexStart(), regs.index_array.FormatSizeInBytes(), | ||||
|                 regs.index_array.count); | ||||
|             params.base_vertex = static_cast<GLint>(regs.vb_element_base); | ||||
|         } else { | ||||
|             // MakeQuadArray always generates u32 indexes | ||||
|             params.index_format = GL_UNSIGNED_INT; | ||||
|             params.count = (regs.vertex_buffer.count / 4) * 6; | ||||
|             params.index_buffer_offset = | ||||
|                 primitive_assembler.MakeQuadArray(regs.vertex_buffer.first, params.count); | ||||
|         } | ||||
|         return params; | ||||
|     } | ||||
|  | ||||
|     params.use_indexed = is_indexed; | ||||
|     params.primitive_mode = MaxwellToGL::PrimitiveTopology(regs.draw.topology); | ||||
|  | ||||
|     if (is_indexed) { | ||||
|         MICROPROFILE_SCOPE(OpenGL_Index); | ||||
|         params.index_format = MaxwellToGL::IndexFormat(regs.index_array.format); | ||||
|         params.count = regs.index_array.count; | ||||
|         params.index_buffer_offset = | ||||
|             buffer_cache.UploadMemory(regs.index_array.IndexStart(), CalculateIndexBufferSize()); | ||||
|         params.base_vertex = static_cast<GLint>(regs.vb_element_base); | ||||
|     } else { | ||||
|         params.count = regs.vertex_buffer.count; | ||||
|         params.vertex_first = regs.vertex_buffer.first; | ||||
|     } | ||||
| } | ||||
|  | ||||
| void RasterizerOpenGL::SetupShaders() { | ||||
|     MICROPROFILE_SCOPE(OpenGL_Shader); | ||||
|     const auto& gpu = Core::System::GetInstance().GPU().Maxwell3D(); | ||||
| @@ -256,6 +338,13 @@ std::size_t RasterizerOpenGL::CalculateVertexArraysSize() const { | ||||
|     return size; | ||||
| } | ||||
|  | ||||
| std::size_t RasterizerOpenGL::CalculateIndexBufferSize() const { | ||||
|     const auto& regs = Core::System::GetInstance().GPU().Maxwell3D().regs; | ||||
|  | ||||
|     return static_cast<std::size_t>(regs.index_array.count) * | ||||
|            static_cast<std::size_t>(regs.index_array.FormatSizeInBytes()); | ||||
| } | ||||
|  | ||||
| bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { | ||||
|     accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; | ||||
|     DrawArrays(); | ||||
| @@ -459,16 +548,23 @@ void RasterizerOpenGL::DrawArrays() { | ||||
|  | ||||
|     // Draw the vertex batch | ||||
|     const bool is_indexed = accelerate_draw == AccelDraw::Indexed; | ||||
|     const u64 index_buffer_size{static_cast<u64>(regs.index_array.count) * | ||||
|                                 static_cast<u64>(regs.index_array.FormatSizeInBytes())}; | ||||
|  | ||||
|     state.draw.vertex_buffer = buffer_cache.GetHandle(); | ||||
|     state.Apply(); | ||||
|  | ||||
|     std::size_t buffer_size = CalculateVertexArraysSize(); | ||||
|  | ||||
|     if (is_indexed) { | ||||
|         buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + index_buffer_size; | ||||
|     // Add space for index buffer (keeping in mind non-core primitives) | ||||
|     switch (regs.draw.topology) { | ||||
|     case Maxwell::PrimitiveTopology::Quads: | ||||
|         buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + | ||||
|                       primitive_assembler.CalculateQuadSize(regs.vertex_buffer.count); | ||||
|         break; | ||||
|     default: | ||||
|         if (is_indexed) { | ||||
|             buffer_size = Common::AlignUp<std::size_t>(buffer_size, 4) + CalculateIndexBufferSize(); | ||||
|         } | ||||
|         break; | ||||
|     } | ||||
|  | ||||
|     // Uniform space for the 5 shader stages | ||||
| @@ -482,20 +578,7 @@ void RasterizerOpenGL::DrawArrays() { | ||||
|     buffer_cache.Map(buffer_size); | ||||
|  | ||||
|     SetupVertexArrays(); | ||||
|  | ||||
|     // If indexed mode, copy the index buffer | ||||
|     GLintptr index_buffer_offset = 0; | ||||
|     if (is_indexed) { | ||||
|         MICROPROFILE_SCOPE(OpenGL_Index); | ||||
|  | ||||
|         // Adjust the index buffer offset so it points to the first desired index. | ||||
|         auto index_start = regs.index_array.StartAddress(); | ||||
|         index_start += static_cast<size_t>(regs.index_array.first) * | ||||
|                        static_cast<size_t>(regs.index_array.FormatSizeInBytes()); | ||||
|  | ||||
|         index_buffer_offset = buffer_cache.UploadMemory(index_start, index_buffer_size); | ||||
|     } | ||||
|  | ||||
|     DrawParameters params = SetupDraw(); | ||||
|     SetupShaders(); | ||||
|  | ||||
|     buffer_cache.Unmap(); | ||||
| @@ -503,31 +586,8 @@ void RasterizerOpenGL::DrawArrays() { | ||||
|     shader_program_manager->ApplyTo(state); | ||||
|     state.Apply(); | ||||
|  | ||||
|     const GLenum primitive_mode{MaxwellToGL::PrimitiveTopology(regs.draw.topology)}; | ||||
|     if (is_indexed) { | ||||
|         const GLint base_vertex{static_cast<GLint>(regs.vb_element_base)}; | ||||
|  | ||||
|         if (gpu.state.current_instance > 0) { | ||||
|             glDrawElementsInstancedBaseVertexBaseInstance( | ||||
|                 primitive_mode, regs.index_array.count, | ||||
|                 MaxwellToGL::IndexFormat(regs.index_array.format), | ||||
|                 reinterpret_cast<const void*>(index_buffer_offset), 1, base_vertex, | ||||
|                 gpu.state.current_instance); | ||||
|         } else { | ||||
|             glDrawElementsBaseVertex(primitive_mode, regs.index_array.count, | ||||
|                                      MaxwellToGL::IndexFormat(regs.index_array.format), | ||||
|                                      reinterpret_cast<const void*>(index_buffer_offset), | ||||
|                                      base_vertex); | ||||
|         } | ||||
|     } else { | ||||
|         if (gpu.state.current_instance > 0) { | ||||
|             glDrawArraysInstancedBaseInstance(primitive_mode, regs.vertex_buffer.first, | ||||
|                                               regs.vertex_buffer.count, 1, | ||||
|                                               gpu.state.current_instance); | ||||
|         } else { | ||||
|             glDrawArrays(primitive_mode, regs.vertex_buffer.first, regs.vertex_buffer.count); | ||||
|         } | ||||
|     } | ||||
|     // Execute draw call | ||||
|     params.DispatchDraw(); | ||||
|  | ||||
|     // Disable scissor test | ||||
|     state.scissor.enabled = false; | ||||
|   | ||||
| @@ -23,6 +23,7 @@ | ||||
| #include "video_core/rasterizer_cache.h" | ||||
| #include "video_core/rasterizer_interface.h" | ||||
| #include "video_core/renderer_opengl/gl_buffer_cache.h" | ||||
| #include "video_core/renderer_opengl/gl_primitive_assembler.h" | ||||
| #include "video_core/renderer_opengl/gl_rasterizer_cache.h" | ||||
| #include "video_core/renderer_opengl/gl_resource_manager.h" | ||||
| #include "video_core/renderer_opengl/gl_shader_cache.h" | ||||
| @@ -38,6 +39,7 @@ class EmuWindow; | ||||
| namespace OpenGL { | ||||
|  | ||||
| struct ScreenInfo; | ||||
| struct DrawParameters; | ||||
|  | ||||
| class RasterizerOpenGL : public VideoCore::RasterizerInterface { | ||||
| public: | ||||
| @@ -192,12 +194,17 @@ private: | ||||
|     static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; | ||||
|     OGLBufferCache buffer_cache; | ||||
|     OGLFramebuffer framebuffer; | ||||
|     PrimitiveAssembler primitive_assembler{buffer_cache}; | ||||
|     GLint uniform_buffer_alignment; | ||||
|  | ||||
|     std::size_t CalculateVertexArraysSize() const; | ||||
|  | ||||
|     std::size_t CalculateIndexBufferSize() const; | ||||
|  | ||||
|     void SetupVertexArrays(); | ||||
|  | ||||
|     DrawParameters SetupDraw(); | ||||
|  | ||||
|     void SetupShaders(); | ||||
|  | ||||
|     enum class AccelDraw { Disabled, Arrays, Indexed }; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 bunnei
					bunnei