gl_rasterize: implement shadow mapping using image load/store

This commit is contained in:
wwylele 2018-04-11 15:47:02 +03:00
parent 08b119153d
commit 781912e854
9 changed files with 464 additions and 27 deletions

View File

@ -38,6 +38,15 @@ RasterizerOpenGL::RasterizerOpenGL()
: shader_dirty(true), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE), : shader_dirty(true), vertex_buffer(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE),
uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE), uniform_buffer(GL_UNIFORM_BUFFER, UNIFORM_BUFFER_SIZE),
index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE) { index_buffer(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE) {
allow_shadow = GLAD_GL_ARB_shader_image_load_store && GLAD_GL_ARB_shader_image_size &&
GLAD_GL_ARB_framebuffer_no_attachments;
if (!allow_shadow) {
NGLOG_WARNING(
Render_OpenGL,
"Shadow might not be able to render because of unsupported OpenGL extensions.");
}
// Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0 // Clipping plane 0 is always enabled for PICA fixed clip plane z <= 0
state.clip_distance[0] = true; state.clip_distance[0] = true;
@ -237,6 +246,7 @@ void RasterizerOpenGL::SyncEntireState() {
SyncFogColor(); SyncFogColor();
SyncProcTexNoise(); SyncProcTexNoise();
SyncShadowBias();
} }
/** /**
@ -533,12 +543,16 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
MICROPROFILE_SCOPE(OpenGL_Drawing); MICROPROFILE_SCOPE(OpenGL_Drawing);
const auto& regs = Pica::g_state.regs; const auto& regs = Pica::g_state.regs;
bool shadow_rendering = regs.framebuffer.output_merger.fragment_operation_mode ==
Pica::FramebufferRegs::FragmentOperationMode::Shadow;
const bool has_stencil = const bool has_stencil =
regs.framebuffer.framebuffer.depth_format == Pica::FramebufferRegs::DepthFormat::D24S8; regs.framebuffer.framebuffer.depth_format == Pica::FramebufferRegs::DepthFormat::D24S8;
const bool write_color_fb = const bool write_color_fb = shadow_rendering || state.color_mask.red_enabled == GL_TRUE ||
state.color_mask.red_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE || state.color_mask.green_enabled == GL_TRUE ||
state.color_mask.blue_enabled == GL_TRUE || state.color_mask.alpha_enabled == GL_TRUE; state.color_mask.blue_enabled == GL_TRUE ||
state.color_mask.alpha_enabled == GL_TRUE;
const bool write_depth_fb = const bool write_depth_fb =
(state.depth.test_enabled && state.depth.write_mask == GL_TRUE) || (state.depth.test_enabled && state.depth.write_mask == GL_TRUE) ||
@ -547,7 +561,7 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
const bool using_color_fb = const bool using_color_fb =
regs.framebuffer.framebuffer.GetColorBufferPhysicalAddress() != 0 && write_color_fb; regs.framebuffer.framebuffer.GetColorBufferPhysicalAddress() != 0 && write_color_fb;
const bool using_depth_fb = const bool using_depth_fb =
regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress() != 0 && !shadow_rendering && regs.framebuffer.framebuffer.GetDepthBufferPhysicalAddress() != 0 &&
(write_depth_fb || regs.framebuffer.output_merger.depth_test_enable != 0 || (write_depth_fb || regs.framebuffer.output_merger.depth_test_enable != 0 ||
(has_stencil && state.stencil.test_enabled)); (has_stencil && state.stencil.test_enabled));
@ -591,24 +605,39 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
state.draw.draw_framebuffer = framebuffer.handle; state.draw.draw_framebuffer = framebuffer.handle;
state.Apply(); state.Apply();
if (shadow_rendering) {
if (!allow_shadow || color_surface == nullptr) {
return true;
}
glFramebufferParameteri(GL_DRAW_FRAMEBUFFER, GL_FRAMEBUFFER_DEFAULT_WIDTH,
color_surface->width * color_surface->res_scale);
glFramebufferParameteri(GL_DRAW_FRAMEBUFFER, GL_FRAMEBUFFER_DEFAULT_HEIGHT,
color_surface->height * color_surface->res_scale);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, 0, 0);
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
0);
state.image_shadow_buffer = color_surface->texture.handle;
} else {
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
color_surface != nullptr ? color_surface->texture.handle : 0, 0); color_surface != nullptr ? color_surface->texture.handle : 0, 0);
if (depth_surface != nullptr) { if (depth_surface != nullptr) {
if (has_stencil) { if (has_stencil) {
// attach both depth and stencil // attach both depth and stencil
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
depth_surface->texture.handle, 0); GL_TEXTURE_2D, depth_surface->texture.handle, 0);
} else { } else {
// attach depth // attach depth
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
depth_surface->texture.handle, 0); depth_surface->texture.handle, 0);
// clear stencil attachment // clear stencil attachment
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, 0); glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0,
0);
} }
} else { } else {
// clear both depth and stencil attachment // clear both depth and stencil attachment
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D, 0, glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT, GL_TEXTURE_2D,
0); 0, 0);
}
} }
// Sync the viewport // Sync the viewport
@ -658,6 +687,82 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
if (texture_index == 0) { if (texture_index == 0) {
using TextureType = Pica::TexturingRegs::TextureConfig::TextureType; using TextureType = Pica::TexturingRegs::TextureConfig::TextureType;
switch (texture.config.type.Value()) { switch (texture.config.type.Value()) {
case TextureType::Shadow2D: {
if (!allow_shadow)
continue;
Surface surface = res_cache.GetTextureSurface(texture);
if (surface != nullptr) {
state.image_shadow_texture_px = surface->texture.handle;
} else {
state.image_shadow_texture_px = 0;
}
continue;
}
case TextureType::ShadowCube: {
if (!allow_shadow)
continue;
Pica::Texture::TextureInfo info = Pica::Texture::TextureInfo::FromPicaRegister(
texture.config, texture.format);
Surface surface;
using CubeFace = Pica::TexturingRegs::CubeFace;
info.physical_address =
regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveX);
surface = res_cache.GetTextureSurface(info);
if (surface != nullptr) {
state.image_shadow_texture_px = surface->texture.handle;
} else {
state.image_shadow_texture_px = 0;
}
info.physical_address =
regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeX);
surface = res_cache.GetTextureSurface(info);
if (surface != nullptr) {
state.image_shadow_texture_nx = surface->texture.handle;
} else {
state.image_shadow_texture_nx = 0;
}
info.physical_address =
regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveY);
surface = res_cache.GetTextureSurface(info);
if (surface != nullptr) {
state.image_shadow_texture_py = surface->texture.handle;
} else {
state.image_shadow_texture_py = 0;
}
info.physical_address =
regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeY);
surface = res_cache.GetTextureSurface(info);
if (surface != nullptr) {
state.image_shadow_texture_ny = surface->texture.handle;
} else {
state.image_shadow_texture_ny = 0;
}
info.physical_address =
regs.texturing.GetCubePhysicalAddress(CubeFace::PositiveZ);
surface = res_cache.GetTextureSurface(info);
if (surface != nullptr) {
state.image_shadow_texture_pz = surface->texture.handle;
} else {
state.image_shadow_texture_pz = 0;
}
info.physical_address =
regs.texturing.GetCubePhysicalAddress(CubeFace::NegativeZ);
surface = res_cache.GetTextureSurface(info);
if (surface != nullptr) {
state.image_shadow_texture_nz = surface->texture.handle;
} else {
state.image_shadow_texture_nz = 0;
}
continue;
}
case TextureType::TextureCube: case TextureType::TextureCube:
using CubeFace = Pica::TexturingRegs::CubeFace; using CubeFace = Pica::TexturingRegs::CubeFace;
TextureCubeConfig config; TextureCubeConfig config;
@ -791,8 +896,22 @@ bool RasterizerOpenGL::Draw(bool accelerate, bool is_indexed) {
state.texture_units[texture_index].texture_2d = 0; state.texture_units[texture_index].texture_2d = 0;
} }
state.texture_cube_unit.texture_cube = 0; state.texture_cube_unit.texture_cube = 0;
if (allow_shadow) {
state.image_shadow_texture_px = 0;
state.image_shadow_texture_nx = 0;
state.image_shadow_texture_py = 0;
state.image_shadow_texture_ny = 0;
state.image_shadow_texture_pz = 0;
state.image_shadow_texture_nz = 0;
state.image_shadow_buffer = 0;
}
state.Apply(); state.Apply();
if (shadow_rendering) {
glMemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT | GL_SHADER_IMAGE_ACCESS_BARRIER_BIT |
GL_TEXTURE_UPDATE_BARRIER_BIT | GL_FRAMEBUFFER_BARRIER_BIT);
}
// Mark framebuffer surfaces as dirty // Mark framebuffer surfaces as dirty
MathUtil::Rectangle<u32> draw_rect_unscaled{ MathUtil::Rectangle<u32> draw_rect_unscaled{
draw_rect.left / res_scale, draw_rect.top / res_scale, draw_rect.right / res_scale, draw_rect.left / res_scale, draw_rect.top / res_scale, draw_rect.right / res_scale,
@ -951,6 +1070,10 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
SyncColorWriteMask(); SyncColorWriteMask();
break; break;
case PICA_REG_INDEX(framebuffer.shadow):
SyncShadowBias();
break;
// Scissor test // Scissor test
case PICA_REG_INDEX(rasterizer.scissor_test.mode): case PICA_REG_INDEX(rasterizer.scissor_test.mode):
shader_dirty = true; shader_dirty = true;
@ -1926,6 +2049,19 @@ void RasterizerOpenGL::SyncLightDistanceAttenuationScale(int light_index) {
} }
} }
void RasterizerOpenGL::SyncShadowBias() {
const auto& shadow = Pica::g_state.regs.framebuffer.shadow;
GLfloat constant = Pica::float16::FromRaw(shadow.constant).ToFloat32();
GLfloat linear = Pica::float16::FromRaw(shadow.linear).ToFloat32();
if (constant != uniform_block_data.data.shadow_bias_constant ||
linear != uniform_block_data.data.shadow_bias_linear) {
uniform_block_data.data.shadow_bias_constant = constant;
uniform_block_data.data.shadow_bias_linear = linear;
uniform_block_data.dirty = true;
}
}
void RasterizerOpenGL::UploadUniforms(bool accelerate_draw, bool use_gs) { void RasterizerOpenGL::UploadUniforms(bool accelerate_draw, bool use_gs) {
// glBindBufferRange below also changes the generic buffer binding point, so we sync the state // glBindBufferRange below also changes the generic buffer binding point, so we sync the state
// first // first

View File

@ -217,6 +217,9 @@ private:
/// Syncs the specified light's distance attenuation scale to match the PICA register /// Syncs the specified light's distance attenuation scale to match the PICA register
void SyncLightDistanceAttenuationScale(int light_index); void SyncLightDistanceAttenuationScale(int light_index);
/// Syncs the shadow rendering bias to match the PICA register
void SyncShadowBias();
/// Upload the uniform blocks to the uniform buffer object /// Upload the uniform blocks to the uniform buffer object
void UploadUniforms(bool accelerate_draw, bool use_gs); void UploadUniforms(bool accelerate_draw, bool use_gs);
@ -315,4 +318,6 @@ private:
OGLBuffer proctex_diff_lut_buffer; OGLBuffer proctex_diff_lut_buffer;
OGLTexture proctex_diff_lut; OGLTexture proctex_diff_lut;
std::array<GLvec4, 256> proctex_diff_lut_data{}; std::array<GLvec4, 256> proctex_diff_lut_data{};
bool allow_shadow;
}; };

View File

@ -303,6 +303,11 @@ static bool BlitTextures(GLuint src_tex, const MathUtil::Rectangle<u32>& src_rec
buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT; buffers = GL_DEPTH_BUFFER_BIT | GL_STENCIL_BUFFER_BIT;
} }
// TODO (wwylele): use GL_NEAREST for shadow map texture
// Note: shadow map is treated as RGBA8 format in PICA, as well as in the rasterizer cache, but
// doing linear intepolation componentwise would cause incorrect value. However, for a
// well-programmed game this code path should be rarely executed for shadow map with
// inconsistent scale.
glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, dst_rect.left, glBlitFramebuffer(src_rect.left, src_rect.bottom, src_rect.right, src_rect.top, dst_rect.left,
dst_rect.bottom, dst_rect.right, dst_rect.top, buffers, dst_rect.bottom, dst_rect.right, dst_rect.top, buffers,
buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST); buffers == GL_COLOR_BUFFER_BIT ? GL_LINEAR : GL_NEAREST);

View File

@ -49,6 +49,8 @@ layout (std140) uniform shader_data {
int alphatest_ref; int alphatest_ref;
float depth_scale; float depth_scale;
float depth_offset; float depth_offset;
float shadow_bias_constant;
float shadow_bias_linear;
int scissor_x1; int scissor_x1;
int scissor_y1; int scissor_y1;
int scissor_x2; int scissor_x2;
@ -220,6 +222,12 @@ PicaFSConfig PicaFSConfig::BuildFromRegs(const Pica::Regs& regs) {
state.proctex.lut_filter = regs.texturing.proctex_lut.filter; state.proctex.lut_filter = regs.texturing.proctex_lut.filter;
} }
state.shadow_rendering = regs.framebuffer.output_merger.fragment_operation_mode ==
Pica::FramebufferRegs::FragmentOperationMode::Shadow;
state.shadow_texture_orthographic = regs.texturing.shadow.orthographic != 0;
state.shadow_texture_bias = regs.texturing.shadow.bias << 1;
return res; return res;
} }
@ -300,10 +308,9 @@ static std::string SampleTexture(const PicaFSConfig& config, unsigned texture_un
case TexturingRegs::TextureConfig::TextureCube: case TexturingRegs::TextureConfig::TextureCube:
return "texture(tex_cube, vec3(texcoord0, texcoord0_w))"; return "texture(tex_cube, vec3(texcoord0, texcoord0_w))";
case TexturingRegs::TextureConfig::Shadow2D: case TexturingRegs::TextureConfig::Shadow2D:
return "shadowTexture(texcoord0, texcoord0_w)";
case TexturingRegs::TextureConfig::ShadowCube: case TexturingRegs::TextureConfig::ShadowCube:
NGLOG_CRITICAL(HW_GPU, "Unhandled shadow texture"); return "shadowTextureCube(texcoord0, texcoord0_w)";
UNIMPLEMENTED();
return "vec4(1.0)"; // stubbed to avoid rendering with wrong shadow
default: default:
LOG_CRITICAL(HW_GPU, "Unhandled texture type %x", LOG_CRITICAL(HW_GPU, "Unhandled texture type %x",
static_cast<int>(state.texture0_type)); static_cast<int>(state.texture0_type));
@ -1181,7 +1188,13 @@ float ProcTexNoiseCoef(vec2 x) {
std::string GenerateFragmentShader(const PicaFSConfig& config, bool separable_shader) { std::string GenerateFragmentShader(const PicaFSConfig& config, bool separable_shader) {
const auto& state = config.state; const auto& state = config.state;
std::string out = "#version 330 core\n"; std::string out = R"(
#version 330 core
#extension GL_ARB_shader_image_load_store : enable
#extension GL_ARB_shader_image_size : enable
#define ALLOW_SHADOW (defined(GL_ARB_shader_image_load_store) && defined(GL_ARB_shader_image_size))
)";
if (separable_shader) { if (separable_shader) {
out += "#extension GL_ARB_separate_shader_objects : enable\n"; out += "#extension GL_ARB_separate_shader_objects : enable\n";
} }
@ -1204,6 +1217,16 @@ uniform samplerBuffer proctex_color_map;
uniform samplerBuffer proctex_alpha_map; uniform samplerBuffer proctex_alpha_map;
uniform samplerBuffer proctex_lut; uniform samplerBuffer proctex_lut;
uniform samplerBuffer proctex_diff_lut; uniform samplerBuffer proctex_diff_lut;
#if ALLOW_SHADOW
layout(r32ui) uniform readonly uimage2D shadow_texture_px;
layout(r32ui) uniform readonly uimage2D shadow_texture_nx;
layout(r32ui) uniform readonly uimage2D shadow_texture_py;
layout(r32ui) uniform readonly uimage2D shadow_texture_ny;
layout(r32ui) uniform readonly uimage2D shadow_texture_pz;
layout(r32ui) uniform readonly uimage2D shadow_texture_nz;
layout(r32ui) uniform uimage2D shadow_buffer;
#endif
)"; )";
out += UniformBlockDef; out += UniformBlockDef;
@ -1248,6 +1271,147 @@ vec4 byteround(vec4 x) {
return round(x * 255.0) * (1.0 / 255.0); return round(x * 255.0) * (1.0 / 255.0);
} }
#if ALLOW_SHADOW
uvec2 DecodeShadow(uint pixel) {
return uvec2(pixel >> 8, pixel & 0xFFu);
}
uint EncodeShadow(uvec2 pixel) {
return (pixel.x << 8) | pixel.y;
}
float CompareShadow(uint pixel, uint z) {
uvec2 p = DecodeShadow(pixel);
return mix(float(p.y) * (1.0 / 255.0), 0.0, p.x <= z);
}
float SampleShadow2D(ivec2 uv, uint z) {
if (any(bvec4( lessThan(uv, ivec2(0)), greaterThanEqual(uv, imageSize(shadow_texture_px)) )))
return 1.0;
return CompareShadow(imageLoad(shadow_texture_px, uv).x, z);
}
float mix2(vec4 s, vec2 a) {
vec2 t = mix(s.xy, s.zw, a.yy);
return mix(t.x, t.y, a.x);
}
vec4 shadowTexture(vec2 uv, float w) {
)";
if (!config.state.shadow_texture_orthographic) {
out += "uv /= w;";
}
out += "uint z = uint(max(0, int(min(abs(w), 1.0) * 0xFFFFFF) - " +
std::to_string(state.shadow_texture_bias) + "));";
out += R"(
vec2 coord = vec2(imageSize(shadow_texture_px)) * uv - vec2(0.5);
vec2 coord_floor = floor(coord);
vec2 f = coord - coord_floor;
ivec2 i = ivec2(coord_floor);
vec4 s = vec4(
SampleShadow2D(i , z),
SampleShadow2D(i + ivec2(1, 0), z),
SampleShadow2D(i + ivec2(0, 1), z),
SampleShadow2D(i + ivec2(1, 1), z));
return vec4(mix2(s, f));
}
vec4 shadowTextureCube(vec2 uv, float w) {
ivec2 size = imageSize(shadow_texture_px);
vec3 c = vec3(uv, w);
vec3 a = abs(c);
if (a.x > a.y && a.x > a.z) {
w = a.x;
uv = -c.zy;
if (c.x < 0.0) uv.x = -uv.x;
} else if (a.y > a.z) {
w = a.y;
uv = c.xz;
if (c.y < 0.0) uv.y = -uv.y;
} else {
w = a.z;
uv = -c.xy;
if (c.z > 0.0) uv.x = -uv.x;
}
)";
out += "uint z = uint(max(0, int(min(w, 1.0) * 0xFFFFFF) - " +
std::to_string(state.shadow_texture_bias) + "));";
out += R"(
vec2 coord = vec2(size) * (uv / w * vec2(0.5) + vec2(0.5)) - vec2(0.5);
vec2 coord_floor = floor(coord);
vec2 f = coord - coord_floor;
ivec2 i00 = ivec2(coord_floor);
ivec2 i10 = i00 + ivec2(1, 0);
ivec2 i01 = i00 + ivec2(0, 1);
ivec2 i11 = i00 + ivec2(1, 1);
ivec2 cmin = ivec2(0), cmax = size - ivec2(1, 1);
i00 = clamp(i00, cmin, cmax);
i10 = clamp(i10, cmin, cmax);
i01 = clamp(i01, cmin, cmax);
i11 = clamp(i11, cmin, cmax);
uvec4 pixels;
// This part should have been refactored into functions,
// but many drivers don't like passing uimage2D as parameters
if (a.x > a.y && a.x > a.z) {
if (c.x > 0.0)
pixels = uvec4(
imageLoad(shadow_texture_px, i00).r,
imageLoad(shadow_texture_px, i10).r,
imageLoad(shadow_texture_px, i01).r,
imageLoad(shadow_texture_px, i11).r);
else
pixels = uvec4(
imageLoad(shadow_texture_nx, i00).r,
imageLoad(shadow_texture_nx, i10).r,
imageLoad(shadow_texture_nx, i01).r,
imageLoad(shadow_texture_nx, i11).r);
} else if (a.y > a.z) {
if (c.y > 0.0)
pixels = uvec4(
imageLoad(shadow_texture_py, i00).r,
imageLoad(shadow_texture_py, i10).r,
imageLoad(shadow_texture_py, i01).r,
imageLoad(shadow_texture_py, i11).r);
else
pixels = uvec4(
imageLoad(shadow_texture_ny, i00).r,
imageLoad(shadow_texture_ny, i10).r,
imageLoad(shadow_texture_ny, i01).r,
imageLoad(shadow_texture_ny, i11).r);
} else {
if (c.z > 0.0)
pixels = uvec4(
imageLoad(shadow_texture_pz, i00).r,
imageLoad(shadow_texture_pz, i10).r,
imageLoad(shadow_texture_pz, i01).r,
imageLoad(shadow_texture_pz, i11).r);
else
pixels = uvec4(
imageLoad(shadow_texture_nz, i00).r,
imageLoad(shadow_texture_nz, i10).r,
imageLoad(shadow_texture_nz, i01).r,
imageLoad(shadow_texture_nz, i11).r);
}
vec4 s = vec4(
CompareShadow(pixels.x, z),
CompareShadow(pixels.y, z),
CompareShadow(pixels.z, z),
CompareShadow(pixels.w, z));
return vec4(mix2(s, f));
}
#else
vec4 shadowTexture(vec2 uv, float w) {
return vec4(1.0);
}
vec4 shadowTextureCube(vec2 uv, float w) {
return vec4(1.0);
}
#endif
)"; )";
if (config.state.proctex.enable) if (config.state.proctex.enable)
@ -1331,9 +1495,38 @@ vec4 secondary_fragment_color = vec4(0.0);
return out; return out;
} }
if (state.shadow_rendering) {
out += R"(
#if ALLOW_SHADOW
uint d = uint(clamp(depth, 0.0, 1.0) * 0xFFFFFF);
uint s = uint(last_tex_env_out.g * 0xFF);
ivec2 image_coord = ivec2(gl_FragCoord.xy);
uint old = imageLoad(shadow_buffer, image_coord).x;
uint new;
uint old2;
do {
old2 = old;
uvec2 ref = DecodeShadow(old);
if (d < ref.x) {
if (s == 0u) {
ref.x = d;
} else {
s = uint(float(s) / (shadow_bias_constant + shadow_bias_linear * float(d) / float(ref.x)));
ref.y = min(s, ref.y);
}
}
new = EncodeShadow(ref);
} while ((old = imageAtomicCompSwap(shadow_buffer, image_coord, old, new)) != old2);
#endif // ALLOW_SHADOW
)";
} else {
out += "gl_FragDepth = depth;\n"; out += "gl_FragDepth = depth;\n";
// Round the final fragment color to maintain the PICA's 8 bits of precision // Round the final fragment color to maintain the PICA's 8 bits of precision
out += "color = byteround(last_tex_env_out);\n"; out += "color = byteround(last_tex_env_out);\n";
}
out += "}"; out += "}";

View File

@ -110,6 +110,10 @@ struct PicaFSConfigState {
u32 lut_offset; u32 lut_offset;
Pica::TexturingRegs::ProcTexFilter lut_filter; Pica::TexturingRegs::ProcTexFilter lut_filter;
} proctex; } proctex;
bool shadow_rendering;
bool shadow_texture_orthographic;
u32 shadow_texture_bias;
}; };
/** /**

View File

@ -36,6 +36,13 @@ static void SetShaderSamplerBinding(GLuint shader, const char* name,
} }
} }
static void SetShaderImageBinding(GLuint shader, const char* name, GLuint binding) {
GLint uniform_tex = glGetUniformLocation(shader, name);
if (uniform_tex != -1) {
glUniform1i(uniform_tex, static_cast<GLint>(binding));
}
}
static void SetShaderSamplerBindings(GLuint shader) { static void SetShaderSamplerBindings(GLuint shader) {
OpenGLState cur_state = OpenGLState::GetCurState(); OpenGLState cur_state = OpenGLState::GetCurState();
GLuint old_program = std::exchange(cur_state.draw.shader_program, shader); GLuint old_program = std::exchange(cur_state.draw.shader_program, shader);
@ -56,6 +63,14 @@ static void SetShaderSamplerBindings(GLuint shader) {
SetShaderSamplerBinding(shader, "proctex_lut", TextureUnits::ProcTexLUT); SetShaderSamplerBinding(shader, "proctex_lut", TextureUnits::ProcTexLUT);
SetShaderSamplerBinding(shader, "proctex_diff_lut", TextureUnits::ProcTexDiffLUT); SetShaderSamplerBinding(shader, "proctex_diff_lut", TextureUnits::ProcTexDiffLUT);
SetShaderImageBinding(shader, "shadow_buffer", ImageUnits::ShadowBuffer);
SetShaderImageBinding(shader, "shadow_texture_px", ImageUnits::ShadowTexturePX);
SetShaderImageBinding(shader, "shadow_texture_nx", ImageUnits::ShadowTextureNX);
SetShaderImageBinding(shader, "shadow_texture_py", ImageUnits::ShadowTexturePY);
SetShaderImageBinding(shader, "shadow_texture_ny", ImageUnits::ShadowTextureNY);
SetShaderImageBinding(shader, "shadow_texture_pz", ImageUnits::ShadowTexturePZ);
SetShaderImageBinding(shader, "shadow_texture_nz", ImageUnits::ShadowTextureNZ);
cur_state.draw.shader_program = old_program; cur_state.draw.shader_program = old_program;
cur_state.Apply(); cur_state.Apply();
} }

View File

@ -32,6 +32,8 @@ struct UniformData {
GLint alphatest_ref; GLint alphatest_ref;
GLfloat depth_scale; GLfloat depth_scale;
GLfloat depth_offset; GLfloat depth_offset;
GLfloat shadow_bias_constant;
GLfloat shadow_bias_linear;
GLint scissor_x1; GLint scissor_x1;
GLint scissor_y1; GLint scissor_y1;
GLint scissor_x2; GLint scissor_x2;
@ -48,7 +50,7 @@ struct UniformData {
}; };
static_assert( static_assert(
sizeof(UniformData) == 0x460, sizeof(UniformData) == 0x470,
"The size of the UniformData structure has changed, update the structure in the shader"); "The size of the UniformData structure has changed, update the structure in the shader");
static_assert(sizeof(UniformData) < 16384, static_assert(sizeof(UniformData) < 16384,
"UniformData structure must be less than 16kb as per the OpenGL spec"); "UniformData structure must be less than 16kb as per the OpenGL spec");

View File

@ -65,6 +65,14 @@ OpenGLState::OpenGLState() {
proctex_alpha_map.texture_buffer = 0; proctex_alpha_map.texture_buffer = 0;
proctex_noise_lut.texture_buffer = 0; proctex_noise_lut.texture_buffer = 0;
image_shadow_buffer = 0;
image_shadow_texture_px = 0;
image_shadow_texture_nx = 0;
image_shadow_texture_py = 0;
image_shadow_texture_ny = 0;
image_shadow_texture_pz = 0;
image_shadow_texture_nz = 0;
draw.read_framebuffer = 0; draw.read_framebuffer = 0;
draw.draw_framebuffer = 0; draw.draw_framebuffer = 0;
draw.vertex_array = 0; draw.vertex_array = 0;
@ -255,6 +263,42 @@ void OpenGLState::Apply() const {
glBindTexture(GL_TEXTURE_BUFFER, proctex_diff_lut.texture_buffer); glBindTexture(GL_TEXTURE_BUFFER, proctex_diff_lut.texture_buffer);
} }
// Shadow Images
if (image_shadow_buffer != cur_state.image_shadow_buffer) {
glBindImageTexture(ImageUnits::ShadowBuffer, image_shadow_buffer, 0, GL_FALSE, 0,
GL_READ_WRITE, GL_R32UI);
}
if (image_shadow_texture_px != cur_state.image_shadow_texture_px) {
glBindImageTexture(ImageUnits::ShadowTexturePX, image_shadow_texture_px, 0, GL_FALSE, 0,
GL_READ_ONLY, GL_R32UI);
}
if (image_shadow_texture_nx != cur_state.image_shadow_texture_nx) {
glBindImageTexture(ImageUnits::ShadowTextureNX, image_shadow_texture_nx, 0, GL_FALSE, 0,
GL_READ_ONLY, GL_R32UI);
}
if (image_shadow_texture_py != cur_state.image_shadow_texture_py) {
glBindImageTexture(ImageUnits::ShadowTexturePY, image_shadow_texture_py, 0, GL_FALSE, 0,
GL_READ_ONLY, GL_R32UI);
}
if (image_shadow_texture_ny != cur_state.image_shadow_texture_ny) {
glBindImageTexture(ImageUnits::ShadowTextureNY, image_shadow_texture_ny, 0, GL_FALSE, 0,
GL_READ_ONLY, GL_R32UI);
}
if (image_shadow_texture_pz != cur_state.image_shadow_texture_pz) {
glBindImageTexture(ImageUnits::ShadowTexturePZ, image_shadow_texture_pz, 0, GL_FALSE, 0,
GL_READ_ONLY, GL_R32UI);
}
if (image_shadow_texture_nz != cur_state.image_shadow_texture_nz) {
glBindImageTexture(ImageUnits::ShadowTextureNZ, image_shadow_texture_nz, 0, GL_FALSE, 0,
GL_READ_ONLY, GL_R32UI);
}
// Framebuffer // Framebuffer
if (draw.read_framebuffer != cur_state.draw.read_framebuffer) { if (draw.read_framebuffer != cur_state.draw.read_framebuffer) {
glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer); glBindFramebuffer(GL_READ_FRAMEBUFFER, draw.read_framebuffer);
@ -344,6 +388,20 @@ OpenGLState& OpenGLState::ResetTexture(GLuint handle) {
proctex_lut.texture_buffer = 0; proctex_lut.texture_buffer = 0;
if (proctex_diff_lut.texture_buffer == handle) if (proctex_diff_lut.texture_buffer == handle)
proctex_diff_lut.texture_buffer = 0; proctex_diff_lut.texture_buffer = 0;
if (image_shadow_buffer == handle)
image_shadow_buffer = 0;
if (image_shadow_texture_px == handle)
image_shadow_texture_px = 0;
if (image_shadow_texture_nx == handle)
image_shadow_texture_nx = 0;
if (image_shadow_texture_py == handle)
image_shadow_texture_py = 0;
if (image_shadow_texture_ny == handle)
image_shadow_texture_ny = 0;
if (image_shadow_texture_pz == handle)
image_shadow_texture_pz = 0;
if (image_shadow_texture_nz == handle)
image_shadow_texture_nz = 0;
return *this; return *this;
} }

View File

@ -31,6 +31,16 @@ constexpr TextureUnit TextureCube{10};
} // namespace TextureUnits } // namespace TextureUnits
namespace ImageUnits {
constexpr GLuint ShadowBuffer = 0;
constexpr GLuint ShadowTexturePX = 1;
constexpr GLuint ShadowTextureNX = 2;
constexpr GLuint ShadowTexturePY = 3;
constexpr GLuint ShadowTextureNY = 4;
constexpr GLuint ShadowTexturePZ = 5;
constexpr GLuint ShadowTextureNZ = 6;
} // namespace ImageUnits
class OpenGLState { class OpenGLState {
public: public:
struct { struct {
@ -121,6 +131,15 @@ public:
GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER GLuint texture_buffer; // GL_TEXTURE_BINDING_BUFFER
} proctex_diff_lut; } proctex_diff_lut;
// GL_IMAGE_BINDING_NAME
GLuint image_shadow_buffer;
GLuint image_shadow_texture_px;
GLuint image_shadow_texture_nx;
GLuint image_shadow_texture_py;
GLuint image_shadow_texture_ny;
GLuint image_shadow_texture_pz;
GLuint image_shadow_texture_nz;
struct { struct {
GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING GLuint read_framebuffer; // GL_READ_FRAMEBUFFER_BINDING
GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING GLuint draw_framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING