renderer_software: Multi-thread processing
* Doubles the performance in most cases
This commit is contained in:
		| @@ -95,8 +95,14 @@ private: | ||||
|  | ||||
| } // Anonymous namespace | ||||
|  | ||||
| // Kirby Blowout Blast relies on the combiner output of a previous draw | ||||
| // in order to render the sky correctly. | ||||
| static thread_local Common::Vec4<u8> combiner_output{}; | ||||
|  | ||||
| RasterizerSoftware::RasterizerSoftware(Memory::MemorySystem& memory_) | ||||
|     : memory{memory_}, state{Pica::g_state}, regs{state.regs}, fb{memory, regs.framebuffer} {} | ||||
|     : memory{memory_}, state{Pica::g_state}, regs{state.regs}, | ||||
|       num_sw_threads{std::max(std::thread::hardware_concurrency(), 2U)}, | ||||
|       sw_workers{num_sw_threads, "SwRenderer workers"}, fb{memory, regs.framebuffer} {} | ||||
|  | ||||
| void RasterizerSoftware::AddTriangle(const Pica::Shader::OutputVertex& v0, | ||||
|                                      const Pica::Shader::OutputVertex& v1, | ||||
| @@ -295,161 +301,171 @@ void RasterizerSoftware::ProcessTriangle(const Vertex& v0, const Vertex& v1, con | ||||
|     // Enter rasterization loop, starting at the center of the topleft bounding box corner. | ||||
|     // TODO: Not sure if looping through x first might be faster | ||||
|     for (u16 y = min_y + 8; y < max_y; y += 0x10) { | ||||
|         for (u16 x = min_x + 8; x < max_x; x += 0x10) { | ||||
|             // Do not process the pixel if it's inside the scissor box and the scissor mode is set | ||||
|             // to Exclude. | ||||
|             if (regs.rasterizer.scissor_test.mode == RasterizerRegs::ScissorMode::Exclude) { | ||||
|                 if (x >= scissor_x1 && x < scissor_x2 && y >= scissor_y1 && y < scissor_y2) { | ||||
|         const auto process_scanline = [&, y] { | ||||
|             for (u16 x = min_x + 8; x < max_x; x += 0x10) { | ||||
|                 // Do not process the pixel if it's inside the scissor box and the scissor mode is | ||||
|                 // set to Exclude. | ||||
|                 if (regs.rasterizer.scissor_test.mode == RasterizerRegs::ScissorMode::Exclude) { | ||||
|                     if (x >= scissor_x1 && x < scissor_x2 && y >= scissor_y1 && y < scissor_y2) { | ||||
|                         continue; | ||||
|                     } | ||||
|                 } | ||||
|  | ||||
|                 // Calculate the barycentric coordinates w0, w1 and w2 | ||||
|                 const s32 w0 = bias0 + SignedArea(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); | ||||
|                 const s32 w1 = bias1 + SignedArea(vtxpos[2].xy(), vtxpos[0].xy(), {x, y}); | ||||
|                 const s32 w2 = bias2 + SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), {x, y}); | ||||
|                 const s32 wsum = w0 + w1 + w2; | ||||
|  | ||||
|                 // If current pixel is not covered by the current primitive | ||||
|                 if (w0 < 0 || w1 < 0 || w2 < 0) { | ||||
|                     continue; | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             // Calculate the barycentric coordinates w0, w1 and w2 | ||||
|             const s32 w0 = bias0 + SignedArea(vtxpos[1].xy(), vtxpos[2].xy(), {x, y}); | ||||
|             const s32 w1 = bias1 + SignedArea(vtxpos[2].xy(), vtxpos[0].xy(), {x, y}); | ||||
|             const s32 w2 = bias2 + SignedArea(vtxpos[0].xy(), vtxpos[1].xy(), {x, y}); | ||||
|             const s32 wsum = w0 + w1 + w2; | ||||
|                 const auto baricentric_coordinates = Common::MakeVec( | ||||
|                     f24::FromFloat32(static_cast<f32>(w0)), f24::FromFloat32(static_cast<f32>(w1)), | ||||
|                     f24::FromFloat32(static_cast<f32>(w2))); | ||||
|                 const f24 interpolated_w_inverse = | ||||
|                     f24::One() / Common::Dot(w_inverse, baricentric_coordinates); | ||||
|  | ||||
|             // If current pixel is not covered by the current primitive | ||||
|             if (w0 < 0 || w1 < 0 || w2 < 0) { | ||||
|                 continue; | ||||
|             } | ||||
|                 // interpolated_z = z / w | ||||
|                 const float interpolated_z_over_w = | ||||
|                     (v0.screenpos[2].ToFloat32() * w0 + v1.screenpos[2].ToFloat32() * w1 + | ||||
|                      v2.screenpos[2].ToFloat32() * w2) / | ||||
|                     wsum; | ||||
|  | ||||
|             const auto baricentric_coordinates = Common::MakeVec( | ||||
|                 f24::FromFloat32(static_cast<f32>(w0)), f24::FromFloat32(static_cast<f32>(w1)), | ||||
|                 f24::FromFloat32(static_cast<f32>(w2))); | ||||
|             const f24 interpolated_w_inverse = | ||||
|                 f24::One() / Common::Dot(w_inverse, baricentric_coordinates); | ||||
|                 // Not fully accurate. About 3 bits in precision are missing. | ||||
|                 // Z-Buffer (z / w * scale + offset) | ||||
|                 const float depth_scale = | ||||
|                     f24::FromRaw(regs.rasterizer.viewport_depth_range).ToFloat32(); | ||||
|                 const float depth_offset = | ||||
|                     f24::FromRaw(regs.rasterizer.viewport_depth_near_plane).ToFloat32(); | ||||
|                 float depth = interpolated_z_over_w * depth_scale + depth_offset; | ||||
|  | ||||
|             // interpolated_z = z / w | ||||
|             const float interpolated_z_over_w = | ||||
|                 (v0.screenpos[2].ToFloat32() * w0 + v1.screenpos[2].ToFloat32() * w1 + | ||||
|                  v2.screenpos[2].ToFloat32() * w2) / | ||||
|                 wsum; | ||||
|                 // Potentially switch to W-Buffer | ||||
|                 if (regs.rasterizer.depthmap_enable == | ||||
|                     Pica::RasterizerRegs::DepthBuffering::WBuffering) { | ||||
|                     // W-Buffer (z * scale + w * offset = (z / w * scale + offset) * w) | ||||
|                     depth *= interpolated_w_inverse.ToFloat32() * wsum; | ||||
|                 } | ||||
|  | ||||
|             // Not fully accurate. About 3 bits in precision are missing. | ||||
|             // Z-Buffer (z / w * scale + offset) | ||||
|             const float depth_scale = | ||||
|                 f24::FromRaw(regs.rasterizer.viewport_depth_range).ToFloat32(); | ||||
|             const float depth_offset = | ||||
|                 f24::FromRaw(regs.rasterizer.viewport_depth_near_plane).ToFloat32(); | ||||
|             float depth = interpolated_z_over_w * depth_scale + depth_offset; | ||||
|                 // Clamp the result | ||||
|                 depth = std::clamp(depth, 0.0f, 1.0f); | ||||
|  | ||||
|             // Potentially switch to W-Buffer | ||||
|             if (regs.rasterizer.depthmap_enable == | ||||
|                 Pica::RasterizerRegs::DepthBuffering::WBuffering) { | ||||
|                 // W-Buffer (z * scale + w * offset = (z / w * scale + offset) * w) | ||||
|                 depth *= interpolated_w_inverse.ToFloat32() * wsum; | ||||
|             } | ||||
|  | ||||
|             // Clamp the result | ||||
|             depth = std::clamp(depth, 0.0f, 1.0f); | ||||
|  | ||||
|             /** | ||||
|              * Perspective correct attribute interpolation: | ||||
|              * Attribute values cannot be calculated by simple linear interpolation since | ||||
|              * they are not linear in screen space. For example, when interpolating a | ||||
|              * texture coordinate across two vertices, something simple like | ||||
|              *     u = (u0*w0 + u1*w1)/(w0+w1) | ||||
|              * will not work. However, the attribute value divided by the | ||||
|              * clipspace w-coordinate (u/w) and and the inverse w-coordinate (1/w) are linear | ||||
|              * in screenspace. Hence, we can linearly interpolate these two independently and | ||||
|              * calculate the interpolated attribute by dividing the results. | ||||
|              * I.e. | ||||
|              *     u_over_w   = ((u0/v0.pos.w)*w0 + (u1/v1.pos.w)*w1)/(w0+w1) | ||||
|              *     one_over_w = (( 1/v0.pos.w)*w0 + ( 1/v1.pos.w)*w1)/(w0+w1) | ||||
|              *     u = u_over_w / one_over_w | ||||
|              * | ||||
|              * The generalization to three vertices is straightforward in baricentric coordinates. | ||||
|              **/ | ||||
|             const auto get_interpolated_attribute = [&](f24 attr0, f24 attr1, f24 attr2) { | ||||
|                 auto attr_over_w = Common::MakeVec(attr0, attr1, attr2); | ||||
|                 f24 interpolated_attr_over_w = Common::Dot(attr_over_w, baricentric_coordinates); | ||||
|                 return interpolated_attr_over_w * interpolated_w_inverse; | ||||
|             }; | ||||
|  | ||||
|             const Common::Vec4<u8> primary_color{ | ||||
|                 static_cast<u8>( | ||||
|                     round(get_interpolated_attribute(v0.color.r(), v1.color.r(), v2.color.r()) | ||||
|                               .ToFloat32() * | ||||
|                           255)), | ||||
|                 static_cast<u8>( | ||||
|                     round(get_interpolated_attribute(v0.color.g(), v1.color.g(), v2.color.g()) | ||||
|                               .ToFloat32() * | ||||
|                           255)), | ||||
|                 static_cast<u8>( | ||||
|                     round(get_interpolated_attribute(v0.color.b(), v1.color.b(), v2.color.b()) | ||||
|                               .ToFloat32() * | ||||
|                           255)), | ||||
|                 static_cast<u8>( | ||||
|                     round(get_interpolated_attribute(v0.color.a(), v1.color.a(), v2.color.a()) | ||||
|                               .ToFloat32() * | ||||
|                           255)), | ||||
|             }; | ||||
|  | ||||
|             std::array<Common::Vec2<f24>, 3> uv; | ||||
|             uv[0].u() = get_interpolated_attribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u()); | ||||
|             uv[0].v() = get_interpolated_attribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v()); | ||||
|             uv[1].u() = get_interpolated_attribute(v0.tc1.u(), v1.tc1.u(), v2.tc1.u()); | ||||
|             uv[1].v() = get_interpolated_attribute(v0.tc1.v(), v1.tc1.v(), v2.tc1.v()); | ||||
|             uv[2].u() = get_interpolated_attribute(v0.tc2.u(), v1.tc2.u(), v2.tc2.u()); | ||||
|             uv[2].v() = get_interpolated_attribute(v0.tc2.v(), v1.tc2.v(), v2.tc2.v()); | ||||
|  | ||||
|             // Sample bound texture units. | ||||
|             const f24 tc0_w = get_interpolated_attribute(v0.tc0_w, v1.tc0_w, v2.tc0_w); | ||||
|             const auto texture_color = TextureColor(uv, textures, tc0_w); | ||||
|  | ||||
|             Common::Vec4<u8> primary_fragment_color = {0, 0, 0, 0}; | ||||
|             Common::Vec4<u8> secondary_fragment_color = {0, 0, 0, 0}; | ||||
|  | ||||
|             if (!regs.lighting.disable) { | ||||
|                 const auto normquat = | ||||
|                     Common::Quaternion<f32>{ | ||||
|                         {get_interpolated_attribute(v0.quat.x, v1.quat.x, v2.quat.x).ToFloat32(), | ||||
|                          get_interpolated_attribute(v0.quat.y, v1.quat.y, v2.quat.y).ToFloat32(), | ||||
|                          get_interpolated_attribute(v0.quat.z, v1.quat.z, v2.quat.z).ToFloat32()}, | ||||
|                         get_interpolated_attribute(v0.quat.w, v1.quat.w, v2.quat.w).ToFloat32(), | ||||
|                     } | ||||
|                         .Normalized(); | ||||
|  | ||||
|                 const Common::Vec3f view{ | ||||
|                     get_interpolated_attribute(v0.view.x, v1.view.x, v2.view.x).ToFloat32(), | ||||
|                     get_interpolated_attribute(v0.view.y, v1.view.y, v2.view.y).ToFloat32(), | ||||
|                     get_interpolated_attribute(v0.view.z, v1.view.z, v2.view.z).ToFloat32(), | ||||
|                 /** | ||||
|                  * Perspective correct attribute interpolation: | ||||
|                  * Attribute values cannot be calculated by simple linear interpolation since | ||||
|                  * they are not linear in screen space. For example, when interpolating a | ||||
|                  * texture coordinate across two vertices, something simple like | ||||
|                  *     u = (u0*w0 + u1*w1)/(w0+w1) | ||||
|                  * will not work. However, the attribute value divided by the | ||||
|                  * clipspace w-coordinate (u/w) and and the inverse w-coordinate (1/w) are linear | ||||
|                  * in screenspace. Hence, we can linearly interpolate these two independently and | ||||
|                  * calculate the interpolated attribute by dividing the results. | ||||
|                  * I.e. | ||||
|                  *     u_over_w   = ((u0/v0.pos.w)*w0 + (u1/v1.pos.w)*w1)/(w0+w1) | ||||
|                  *     one_over_w = (( 1/v0.pos.w)*w0 + ( 1/v1.pos.w)*w1)/(w0+w1) | ||||
|                  *     u = u_over_w / one_over_w | ||||
|                  * | ||||
|                  * The generalization to three vertices is straightforward in baricentric | ||||
|                  *coordinates. | ||||
|                  **/ | ||||
|                 const auto get_interpolated_attribute = [&](f24 attr0, f24 attr1, f24 attr2) { | ||||
|                     auto attr_over_w = Common::MakeVec(attr0, attr1, attr2); | ||||
|                     f24 interpolated_attr_over_w = | ||||
|                         Common::Dot(attr_over_w, baricentric_coordinates); | ||||
|                     return interpolated_attr_over_w * interpolated_w_inverse; | ||||
|                 }; | ||||
|                 std::tie(primary_fragment_color, secondary_fragment_color) = ComputeFragmentsColors( | ||||
|                     regs.lighting, state.lighting, normquat, view, texture_color); | ||||
|             } | ||||
|  | ||||
|             // Write the TEV stages. | ||||
|             WriteTevConfig(texture_color, tev_stages, primary_color, primary_fragment_color, | ||||
|                            secondary_fragment_color); | ||||
|                 const Common::Vec4<u8> primary_color{ | ||||
|                     static_cast<u8>( | ||||
|                         round(get_interpolated_attribute(v0.color.r(), v1.color.r(), v2.color.r()) | ||||
|                                   .ToFloat32() * | ||||
|                               255)), | ||||
|                     static_cast<u8>( | ||||
|                         round(get_interpolated_attribute(v0.color.g(), v1.color.g(), v2.color.g()) | ||||
|                                   .ToFloat32() * | ||||
|                               255)), | ||||
|                     static_cast<u8>( | ||||
|                         round(get_interpolated_attribute(v0.color.b(), v1.color.b(), v2.color.b()) | ||||
|                                   .ToFloat32() * | ||||
|                               255)), | ||||
|                     static_cast<u8>( | ||||
|                         round(get_interpolated_attribute(v0.color.a(), v1.color.a(), v2.color.a()) | ||||
|                                   .ToFloat32() * | ||||
|                               255)), | ||||
|                 }; | ||||
|  | ||||
|             const auto& output_merger = regs.framebuffer.output_merger; | ||||
|             if (output_merger.fragment_operation_mode == | ||||
|                 FramebufferRegs::FragmentOperationMode::Shadow) { | ||||
|                 u32 depth_int = static_cast<u32>(depth * 0xFFFFFF); | ||||
|                 // Use green color as the shadow intensity | ||||
|                 u8 stencil = combiner_output.y; | ||||
|                 fb.DrawShadowMapPixel(x >> 4, y >> 4, depth_int, stencil); | ||||
|                 // Skip the normal output merger pipeline if it is in shadow mode | ||||
|                 continue; | ||||
|             } | ||||
|                 std::array<Common::Vec2<f24>, 3> uv; | ||||
|                 uv[0].u() = get_interpolated_attribute(v0.tc0.u(), v1.tc0.u(), v2.tc0.u()); | ||||
|                 uv[0].v() = get_interpolated_attribute(v0.tc0.v(), v1.tc0.v(), v2.tc0.v()); | ||||
|                 uv[1].u() = get_interpolated_attribute(v0.tc1.u(), v1.tc1.u(), v2.tc1.u()); | ||||
|                 uv[1].v() = get_interpolated_attribute(v0.tc1.v(), v1.tc1.v(), v2.tc1.v()); | ||||
|                 uv[2].u() = get_interpolated_attribute(v0.tc2.u(), v1.tc2.u(), v2.tc2.u()); | ||||
|                 uv[2].v() = get_interpolated_attribute(v0.tc2.v(), v1.tc2.v(), v2.tc2.v()); | ||||
|  | ||||
|             // Does alpha testing happen before or after stencil? | ||||
|             if (!DoAlphaTest(combiner_output.a())) { | ||||
|                 continue; | ||||
|                 // Sample bound texture units. | ||||
|                 const f24 tc0_w = get_interpolated_attribute(v0.tc0_w, v1.tc0_w, v2.tc0_w); | ||||
|                 const auto texture_color = TextureColor(uv, textures, tc0_w); | ||||
|  | ||||
|                 Common::Vec4<u8> primary_fragment_color = {0, 0, 0, 0}; | ||||
|                 Common::Vec4<u8> secondary_fragment_color = {0, 0, 0, 0}; | ||||
|  | ||||
|                 if (!regs.lighting.disable) { | ||||
|                     const auto normquat = | ||||
|                         Common::Quaternion<f32>{ | ||||
|                             {get_interpolated_attribute(v0.quat.x, v1.quat.x, v2.quat.x) | ||||
|                                  .ToFloat32(), | ||||
|                              get_interpolated_attribute(v0.quat.y, v1.quat.y, v2.quat.y) | ||||
|                                  .ToFloat32(), | ||||
|                              get_interpolated_attribute(v0.quat.z, v1.quat.z, v2.quat.z) | ||||
|                                  .ToFloat32()}, | ||||
|                             get_interpolated_attribute(v0.quat.w, v1.quat.w, v2.quat.w).ToFloat32(), | ||||
|                         } | ||||
|                             .Normalized(); | ||||
|  | ||||
|                     const Common::Vec3f view{ | ||||
|                         get_interpolated_attribute(v0.view.x, v1.view.x, v2.view.x).ToFloat32(), | ||||
|                         get_interpolated_attribute(v0.view.y, v1.view.y, v2.view.y).ToFloat32(), | ||||
|                         get_interpolated_attribute(v0.view.z, v1.view.z, v2.view.z).ToFloat32(), | ||||
|                     }; | ||||
|                     std::tie(primary_fragment_color, secondary_fragment_color) = | ||||
|                         ComputeFragmentsColors(regs.lighting, state.lighting, normquat, view, | ||||
|                                                texture_color); | ||||
|                 } | ||||
|  | ||||
|                 // Write the TEV stages. | ||||
|                 WriteTevConfig(texture_color, tev_stages, primary_color, primary_fragment_color, | ||||
|                                secondary_fragment_color); | ||||
|  | ||||
|                 const auto& output_merger = regs.framebuffer.output_merger; | ||||
|                 if (output_merger.fragment_operation_mode == | ||||
|                     FramebufferRegs::FragmentOperationMode::Shadow) { | ||||
|                     u32 depth_int = static_cast<u32>(depth * 0xFFFFFF); | ||||
|                     // Use green color as the shadow intensity | ||||
|                     u8 stencil = combiner_output.y; | ||||
|                     fb.DrawShadowMapPixel(x >> 4, y >> 4, depth_int, stencil); | ||||
|                     // Skip the normal output merger pipeline if it is in shadow mode | ||||
|                     continue; | ||||
|                 } | ||||
|  | ||||
|                 // Does alpha testing happen before or after stencil? | ||||
|                 if (!DoAlphaTest(combiner_output.a())) { | ||||
|                     continue; | ||||
|                 } | ||||
|                 WriteFog(depth); | ||||
|                 if (!DoDepthStencilTest(x, y, depth)) { | ||||
|                     continue; | ||||
|                 } | ||||
|                 const auto result = PixelColor(x, y); | ||||
|                 if (regs.framebuffer.framebuffer.allow_color_write != 0) { | ||||
|                     fb.DrawPixel(x >> 4, y >> 4, result); | ||||
|                 } | ||||
|             } | ||||
|             WriteFog(combiner_output, depth); | ||||
|             if (!DoDepthStencilTest(x, y, depth)) { | ||||
|                 continue; | ||||
|             } | ||||
|             const auto result = PixelColor(x, y, combiner_output); | ||||
|             if (regs.framebuffer.framebuffer.allow_color_write != 0) { | ||||
|                 fb.DrawPixel(x >> 4, y >> 4, result); | ||||
|             } | ||||
|         } | ||||
|         }; | ||||
|         sw_workers.QueueWork(std::move(process_scanline)); | ||||
|     } | ||||
|     sw_workers.WaitForRequests(); | ||||
| } | ||||
|  | ||||
| std::array<Common::Vec4<u8>, 4> RasterizerSoftware::TextureColor( | ||||
| @@ -572,8 +588,7 @@ std::array<Common::Vec4<u8>, 4> RasterizerSoftware::TextureColor( | ||||
|     return texture_color; | ||||
| } | ||||
|  | ||||
| Common::Vec4<u8> RasterizerSoftware::PixelColor(u16 x, u16 y, | ||||
|                                                 Common::Vec4<u8>& combiner_output) const { | ||||
| Common::Vec4<u8> RasterizerSoftware::PixelColor(u16 x, u16 y) const { | ||||
|     const auto dest = fb.GetPixel(x >> 4, y >> 4); | ||||
|     Common::Vec4<u8> blend_output = combiner_output; | ||||
|  | ||||
| @@ -768,7 +783,7 @@ void RasterizerSoftware::WriteTevConfig( | ||||
|     } | ||||
| } | ||||
|  | ||||
| void RasterizerSoftware::WriteFog(Common::Vec4<u8>& combiner_output, float depth) const { | ||||
| void RasterizerSoftware::WriteFog(float depth) const { | ||||
|     /** | ||||
|      * Apply fog combiner. Not fully accurate. We'd have to know what data type is used to | ||||
|      * store the depth etc. Using float for now until we know more about Pica datatypes. | ||||
|   | ||||
| @@ -5,7 +5,7 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include <span> | ||||
|  | ||||
| #include "common/thread_worker.h" | ||||
| #include "video_core/rasterizer_interface.h" | ||||
| #include "video_core/regs_texturing.h" | ||||
| #include "video_core/renderer_software/sw_clipper.h" | ||||
| @@ -52,7 +52,7 @@ private: | ||||
|         std::span<const Pica::TexturingRegs::FullTextureConfig, 3> textures, f24 tc0_w) const; | ||||
|  | ||||
|     /// Returns the final pixel color with blending or logic ops applied. | ||||
|     Common::Vec4<u8> PixelColor(u16 x, u16 y, Common::Vec4<u8>& combiner_output) const; | ||||
|     Common::Vec4<u8> PixelColor(u16 x, u16 y) const; | ||||
|  | ||||
|     /// Emulates the TEV configuration and returns the combiner output. | ||||
|     void WriteTevConfig(std::span<const Common::Vec4<u8>, 4> texture_color, | ||||
| @@ -61,7 +61,7 @@ private: | ||||
|                         Common::Vec4<u8> secondary_fragment_color); | ||||
|  | ||||
|     /// Blends fog to the combiner output if enabled. | ||||
|     void WriteFog(Common::Vec4<u8>& combiner_output, float depth) const; | ||||
|     void WriteFog(float depth) const; | ||||
|  | ||||
|     /// Performs the alpha test. Returns false if the test failed. | ||||
|     bool DoAlphaTest(u8 alpha) const; | ||||
| @@ -73,10 +73,9 @@ private: | ||||
|     Memory::MemorySystem& memory; | ||||
|     Pica::State& state; | ||||
|     const Pica::Regs& regs; | ||||
|     size_t num_sw_threads; | ||||
|     Common::ThreadWorker sw_workers; | ||||
|     Framebuffer fb; | ||||
|     // Kirby Blowout Blast relies on the combiner output of a previous draw | ||||
|     // in order to render the sky correctly. | ||||
|     Common::Vec4<u8> combiner_output{}; | ||||
| }; | ||||
|  | ||||
| } // namespace SwRenderer | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 GPUCode
					GPUCode