GPU: Support multiple enabled vertex arrays.
The vertex arrays will be copied to the stream buffer one after the other, and the attributes will be set using the ARB_vertex_attrib_binding extension. yuzu now thus requires OpenGL 4.3 or the ARB_vertex_attrib_binding extension.
This commit is contained in:
		| @@ -500,6 +500,11 @@ public: | ||||
|                         return static_cast<GPUVAddr>((static_cast<GPUVAddr>(start_high) << 32) | | ||||
|                                                      start_low); | ||||
|                     } | ||||
|  | ||||
|                     bool IsEnabled() const { | ||||
|                         return enable != 0 && StartAddress() != 0; | ||||
|                     } | ||||
|  | ||||
|                 } vertex_array[NumVertexArrays]; | ||||
|  | ||||
|                 Blend blend; | ||||
|   | ||||
| @@ -127,7 +127,8 @@ RasterizerOpenGL::~RasterizerOpenGL() { | ||||
|     } | ||||
| } | ||||
|  | ||||
| void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { | ||||
| std::pair<u8*, GLintptr> RasterizerOpenGL::SetupVertexArrays(u8* array_ptr, | ||||
|                                                              GLintptr buffer_offset) { | ||||
|     MICROPROFILE_SCOPE(OpenGL_VAO); | ||||
|     const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; | ||||
|     const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; | ||||
| @@ -136,43 +137,59 @@ void RasterizerOpenGL::SetupVertexArray(u8* array_ptr, GLintptr buffer_offset) { | ||||
|     state.draw.vertex_buffer = stream_buffer->GetHandle(); | ||||
|     state.Apply(); | ||||
|  | ||||
|     // TODO(bunnei): Add support for 1+ vertex arrays | ||||
|     const auto& vertex_array{regs.vertex_array[0]}; | ||||
|     const auto& vertex_array_limit{regs.vertex_array_limit[0]}; | ||||
|     ASSERT_MSG(vertex_array.enable, "vertex array 0 is disabled?"); | ||||
|     ASSERT_MSG(!vertex_array.divisor, "vertex array 0 divisor is unimplemented!"); | ||||
|     for (unsigned index = 1; index < Maxwell::NumVertexArrays; ++index) { | ||||
|         ASSERT_MSG(!regs.vertex_array[index].enable, "vertex array %d is unimplemented!", index); | ||||
|     // Upload all guest vertex arrays sequentially to our buffer | ||||
|     for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||||
|         const auto& vertex_array = regs.vertex_array[index]; | ||||
|         if (!vertex_array.IsEnabled()) | ||||
|             continue; | ||||
|  | ||||
|         const Tegra::GPUVAddr start = vertex_array.StartAddress(); | ||||
|         const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); | ||||
|  | ||||
|         ASSERT(end > start); | ||||
|         u64 size = end - start + 1; | ||||
|  | ||||
|         // Copy vertex array data | ||||
|         const VAddr data_addr{memory_manager->PhysicalToVirtualAddress(start)}; | ||||
|         res_cache.FlushRegion(data_addr, size, nullptr); | ||||
|         Memory::ReadBlock(data_addr, array_ptr, size); | ||||
|  | ||||
|         // Bind the vertex array to the buffer at the current offset. | ||||
|         glBindVertexBuffer(index, stream_buffer->GetHandle(), buffer_offset, vertex_array.stride); | ||||
|  | ||||
|         ASSERT_MSG(vertex_array.divisor == 0, "Vertex buffer divisor unimplemented"); | ||||
|  | ||||
|         array_ptr += size; | ||||
|         buffer_offset += size; | ||||
|     } | ||||
|  | ||||
|     // Use the vertex array as-is, assumes that the data is formatted correctly for OpenGL. | ||||
|     // Enables the first 16 vertex attributes always, as we don't know which ones are actually used | ||||
|     // until shader time. Note, Tegra technically supports 32, but we're cappinig this to 16 for now | ||||
|     // until shader time. Note, Tegra technically supports 32, but we're capping this to 16 for now | ||||
|     // to avoid OpenGL errors. | ||||
|     // TODO(Subv): Analyze the shader to identify which attributes are actually used and don't | ||||
|     // assume every shader uses them all. | ||||
|     for (unsigned index = 0; index < 16; ++index) { | ||||
|         auto& attrib = regs.vertex_attrib_format[index]; | ||||
|         NGLOG_DEBUG(HW_GPU, "vertex attrib {}, count={}, size={}, type={}, offset={}, normalize={}", | ||||
|                     index, attrib.ComponentCount(), attrib.SizeString(), attrib.TypeString(), | ||||
|                     attrib.offset.Value(), attrib.IsNormalized()); | ||||
|  | ||||
|         glVertexAttribPointer(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), | ||||
|                               attrib.IsNormalized() ? GL_TRUE : GL_FALSE, vertex_array.stride, | ||||
|                               reinterpret_cast<GLvoid*>(buffer_offset + attrib.offset)); | ||||
|         auto& buffer = regs.vertex_array[attrib.buffer]; | ||||
|         ASSERT(buffer.IsEnabled()); | ||||
|  | ||||
|         glEnableVertexAttribArray(index); | ||||
|         glVertexAttribFormat(index, attrib.ComponentCount(), MaxwellToGL::VertexType(attrib), | ||||
|                              attrib.IsNormalized() ? GL_TRUE : GL_FALSE, attrib.offset); | ||||
|         glVertexAttribBinding(index, attrib.buffer); | ||||
|  | ||||
|         hw_vao_enabled_attributes[index] = true; | ||||
|     } | ||||
|  | ||||
|     // Copy vertex array data | ||||
|     const u64 data_size{vertex_array_limit.LimitAddress() - vertex_array.StartAddress() + 1}; | ||||
|     const VAddr data_addr{memory_manager->PhysicalToVirtualAddress(vertex_array.StartAddress())}; | ||||
|     res_cache.FlushRegion(data_addr, data_size, nullptr); | ||||
|     Memory::ReadBlock(data_addr, array_ptr, data_size); | ||||
|  | ||||
|     array_ptr += data_size; | ||||
|     buffer_offset += data_size; | ||||
|     return {array_ptr, buffer_offset}; | ||||
| } | ||||
|  | ||||
| void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos) { | ||||
| void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset) { | ||||
|     // Helper function for uploading uniform data | ||||
|     const auto copy_buffer = [&](GLuint handle, GLintptr offset, GLsizeiptr size) { | ||||
|         if (has_ARB_direct_state_access) { | ||||
| @@ -190,8 +207,6 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size | ||||
|     u32 current_constbuffer_bindpoint = 0; | ||||
|  | ||||
|     for (unsigned index = 1; index < Maxwell::MaxShaderProgram; ++index) { | ||||
|         ptr_pos += sizeof(GLShader::MaxwellUniformData); | ||||
|  | ||||
|         auto& shader_config = gpu.regs.shader_config[index]; | ||||
|         const Maxwell::ShaderProgram program{static_cast<Maxwell::ShaderProgram>(index)}; | ||||
|  | ||||
| @@ -205,13 +220,16 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size | ||||
|         } | ||||
|  | ||||
|         // Upload uniform data as one UBO per stage | ||||
|         const GLintptr ubo_offset = buffer_offset + static_cast<GLintptr>(ptr_pos); | ||||
|         const GLintptr ubo_offset = buffer_offset; | ||||
|         copy_buffer(uniform_buffers[stage].handle, ubo_offset, | ||||
|                     sizeof(GLShader::MaxwellUniformData)); | ||||
|         GLShader::MaxwellUniformData* ub_ptr = | ||||
|             reinterpret_cast<GLShader::MaxwellUniformData*>(&buffer_ptr[ptr_pos]); | ||||
|             reinterpret_cast<GLShader::MaxwellUniformData*>(buffer_ptr); | ||||
|         ub_ptr->SetFromRegs(gpu.state.shader_stages[stage]); | ||||
|  | ||||
|         buffer_ptr += sizeof(GLShader::MaxwellUniformData); | ||||
|         buffer_offset += sizeof(GLShader::MaxwellUniformData); | ||||
|  | ||||
|         // Fetch program code from memory | ||||
|         GLShader::ProgramCode program_code; | ||||
|         const u64 gpu_address{gpu.regs.code_address.CodeAddress() + shader_config.offset}; | ||||
| @@ -252,6 +270,24 @@ void RasterizerOpenGL::SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size | ||||
|     shader_program_manager->UseTrivialGeometryShader(); | ||||
| } | ||||
|  | ||||
| size_t RasterizerOpenGL::CalculateVertexArraysSize() const { | ||||
|     const auto& regs = Core::System().GetInstance().GPU().Maxwell3D().regs; | ||||
|  | ||||
|     size_t size = 0; | ||||
|     for (u32 index = 0; index < Maxwell::NumVertexArrays; ++index) { | ||||
|         if (!regs.vertex_array[index].IsEnabled()) | ||||
|             continue; | ||||
|  | ||||
|         const Tegra::GPUVAddr start = regs.vertex_array[index].StartAddress(); | ||||
|         const Tegra::GPUVAddr end = regs.vertex_array_limit[index].LimitAddress(); | ||||
|  | ||||
|         ASSERT(end > start); | ||||
|         size += end - start + 1; | ||||
|     } | ||||
|  | ||||
|     return size; | ||||
| } | ||||
|  | ||||
| bool RasterizerOpenGL::AccelerateDrawBatch(bool is_indexed) { | ||||
|     accelerate_draw = is_indexed ? AccelDraw::Indexed : AccelDraw::Arrays; | ||||
|     DrawArrays(); | ||||
| @@ -329,44 +365,49 @@ void RasterizerOpenGL::DrawArrays() { | ||||
|     const u64 index_buffer_size{regs.index_array.count * regs.index_array.FormatSizeInBytes()}; | ||||
|     const unsigned vertex_num{is_indexed ? regs.index_array.count : regs.vertex_buffer.count}; | ||||
|  | ||||
|     // TODO(bunnei): Add support for 1+ vertex arrays | ||||
|     vs_input_size = vertex_num * regs.vertex_array[0].stride; | ||||
|  | ||||
|     state.draw.vertex_buffer = stream_buffer->GetHandle(); | ||||
|     state.Apply(); | ||||
|  | ||||
|     size_t buffer_size = static_cast<size_t>(vs_input_size); | ||||
|     size_t buffer_size = CalculateVertexArraysSize(); | ||||
|  | ||||
|     if (is_indexed) { | ||||
|         buffer_size = Common::AlignUp(buffer_size, 4) + index_buffer_size; | ||||
|         buffer_size = Common::AlignUp<size_t>(buffer_size, 4) + index_buffer_size; | ||||
|     } | ||||
|  | ||||
|     // Uniform space for the 5 shader stages | ||||
|     buffer_size += sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage; | ||||
|     buffer_size = Common::AlignUp<size_t>(buffer_size, 4) + | ||||
|                   sizeof(GLShader::MaxwellUniformData) * Maxwell::MaxShaderStage; | ||||
|  | ||||
|     size_t ptr_pos = 0; | ||||
|     u8* buffer_ptr; | ||||
|     GLintptr buffer_offset; | ||||
|     std::tie(buffer_ptr, buffer_offset) = | ||||
|         stream_buffer->Map(static_cast<GLsizeiptr>(buffer_size), 4); | ||||
|  | ||||
|     SetupVertexArray(buffer_ptr, buffer_offset); | ||||
|     ptr_pos += vs_input_size; | ||||
|     u8* offseted_buffer; | ||||
|     std::tie(offseted_buffer, buffer_offset) = SetupVertexArrays(buffer_ptr, buffer_offset); | ||||
|  | ||||
|     offseted_buffer = | ||||
|         reinterpret_cast<u8*>(Common::AlignUp(reinterpret_cast<size_t>(offseted_buffer), 4)); | ||||
|     buffer_offset = Common::AlignUp<size_t>(buffer_offset, 4); | ||||
|  | ||||
|     // If indexed mode, copy the index buffer | ||||
|     GLintptr index_buffer_offset = 0; | ||||
|     if (is_indexed) { | ||||
|         ptr_pos = Common::AlignUp(ptr_pos, 4); | ||||
|  | ||||
|         const auto& memory_manager = Core::System().GetInstance().GPU().memory_manager; | ||||
|         const VAddr index_data_addr{ | ||||
|             memory_manager->PhysicalToVirtualAddress(regs.index_array.StartAddress())}; | ||||
|         Memory::ReadBlock(index_data_addr, &buffer_ptr[ptr_pos], index_buffer_size); | ||||
|         Memory::ReadBlock(index_data_addr, offseted_buffer, index_buffer_size); | ||||
|  | ||||
|         index_buffer_offset = buffer_offset + static_cast<GLintptr>(ptr_pos); | ||||
|         ptr_pos += index_buffer_size; | ||||
|         index_buffer_offset = buffer_offset; | ||||
|         offseted_buffer += index_buffer_size; | ||||
|         buffer_offset += index_buffer_size; | ||||
|     } | ||||
|  | ||||
|     SetupShaders(buffer_ptr, buffer_offset, ptr_pos); | ||||
|     offseted_buffer = | ||||
|         reinterpret_cast<u8*>(Common::AlignUp(reinterpret_cast<size_t>(offseted_buffer), 4)); | ||||
|     buffer_offset = Common::AlignUp<size_t>(buffer_offset, 4); | ||||
|  | ||||
|     SetupShaders(offseted_buffer, buffer_offset); | ||||
|  | ||||
|     stream_buffer->Unmap(); | ||||
|  | ||||
|   | ||||
| @@ -148,13 +148,13 @@ private: | ||||
|     static constexpr size_t STREAM_BUFFER_SIZE = 4 * 1024 * 1024; | ||||
|     std::unique_ptr<OGLStreamBuffer> stream_buffer; | ||||
|  | ||||
|     GLsizeiptr vs_input_size; | ||||
|     size_t CalculateVertexArraysSize() const; | ||||
|  | ||||
|     void SetupVertexArray(u8* array_ptr, GLintptr buffer_offset); | ||||
|     std::pair<u8*, GLintptr> SetupVertexArrays(u8* array_ptr, GLintptr buffer_offset); | ||||
|  | ||||
|     std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::MaxShaderStage> uniform_buffers; | ||||
|  | ||||
|     void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset, size_t ptr_pos); | ||||
|     void SetupShaders(u8* buffer_ptr, GLintptr buffer_offset); | ||||
|  | ||||
|     enum class AccelDraw { Disabled, Arrays, Indexed }; | ||||
|     AccelDraw accelerate_draw; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Subv
					Subv