mirror of
https://github.com/yuzu-emu/yuzu.git
synced 2024-11-15 10:30:05 +00:00
Merge pull request #1264 from bunnei/fragment-lighting-hw
Fragment lighting support in the HW renderer
This commit is contained in:
commit
f1d1049c4f
@ -33,6 +33,7 @@ set(HEADERS
|
||||
command_processor.h
|
||||
gpu_debugger.h
|
||||
pica.h
|
||||
pica_types.h
|
||||
primitive_assembly.h
|
||||
rasterizer.h
|
||||
rasterizer_interface.h
|
||||
|
@ -59,15 +59,17 @@ static void InitScreenCoordinates(OutputVertex& vtx)
|
||||
} viewport;
|
||||
|
||||
const auto& regs = g_state.regs;
|
||||
viewport.halfsize_x = float24::FromRawFloat24(regs.viewport_size_x);
|
||||
viewport.halfsize_y = float24::FromRawFloat24(regs.viewport_size_y);
|
||||
viewport.halfsize_x = float24::FromRaw(regs.viewport_size_x);
|
||||
viewport.halfsize_y = float24::FromRaw(regs.viewport_size_y);
|
||||
viewport.offset_x = float24::FromFloat32(static_cast<float>(regs.viewport_corner.x));
|
||||
viewport.offset_y = float24::FromFloat32(static_cast<float>(regs.viewport_corner.y));
|
||||
viewport.zscale = float24::FromRawFloat24(regs.viewport_depth_range);
|
||||
viewport.offset_z = float24::FromRawFloat24(regs.viewport_depth_far_plane);
|
||||
viewport.zscale = float24::FromRaw(regs.viewport_depth_range);
|
||||
viewport.offset_z = float24::FromRaw(regs.viewport_depth_far_plane);
|
||||
|
||||
float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w;
|
||||
vtx.color *= inv_w;
|
||||
vtx.view *= inv_w;
|
||||
vtx.quat *= inv_w;
|
||||
vtx.tc0 *= inv_w;
|
||||
vtx.tc1 *= inv_w;
|
||||
vtx.tc2 *= inv_w;
|
||||
|
@ -98,10 +98,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
||||
Math::Vec4<float24>& attribute = g_state.vs.default_attributes[setup.index];
|
||||
|
||||
// NOTE: The destination component order indeed is "backwards"
|
||||
attribute.w = float24::FromRawFloat24(default_attr_write_buffer[0] >> 8);
|
||||
attribute.z = float24::FromRawFloat24(((default_attr_write_buffer[0] & 0xFF) << 16) | ((default_attr_write_buffer[1] >> 16) & 0xFFFF));
|
||||
attribute.y = float24::FromRawFloat24(((default_attr_write_buffer[1] & 0xFFFF) << 8) | ((default_attr_write_buffer[2] >> 24) & 0xFF));
|
||||
attribute.x = float24::FromRawFloat24(default_attr_write_buffer[2] & 0xFFFFFF);
|
||||
attribute.w = float24::FromRaw(default_attr_write_buffer[0] >> 8);
|
||||
attribute.z = float24::FromRaw(((default_attr_write_buffer[0] & 0xFF) << 16) | ((default_attr_write_buffer[1] >> 16) & 0xFFFF));
|
||||
attribute.y = float24::FromRaw(((default_attr_write_buffer[1] & 0xFFFF) << 8) | ((default_attr_write_buffer[2] >> 24) & 0xFF));
|
||||
attribute.x = float24::FromRaw(default_attr_write_buffer[2] & 0xFFFFFF);
|
||||
|
||||
LOG_TRACE(HW_GPU, "Set default VS attribute %x to (%f %f %f %f)", (int)setup.index,
|
||||
attribute.x.ToFloat32(), attribute.y.ToFloat32(), attribute.z.ToFloat32(),
|
||||
@ -418,10 +418,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
||||
uniform[3 - i] = float24::FromFloat32(*(float*)(&uniform_write_buffer[i]));
|
||||
} else {
|
||||
// TODO: Untested
|
||||
uniform.w = float24::FromRawFloat24(uniform_write_buffer[0] >> 8);
|
||||
uniform.z = float24::FromRawFloat24(((uniform_write_buffer[0] & 0xFF)<<16) | ((uniform_write_buffer[1] >> 16) & 0xFFFF));
|
||||
uniform.y = float24::FromRawFloat24(((uniform_write_buffer[1] & 0xFFFF)<<8) | ((uniform_write_buffer[2] >> 24) & 0xFF));
|
||||
uniform.x = float24::FromRawFloat24(uniform_write_buffer[2] & 0xFFFFFF);
|
||||
uniform.w = float24::FromRaw(uniform_write_buffer[0] >> 8);
|
||||
uniform.z = float24::FromRaw(((uniform_write_buffer[0] & 0xFF) << 16) | ((uniform_write_buffer[1] >> 16) & 0xFFFF));
|
||||
uniform.y = float24::FromRaw(((uniform_write_buffer[1] & 0xFFFF) << 8) | ((uniform_write_buffer[2] >> 24) & 0xFF));
|
||||
uniform.x = float24::FromRaw(uniform_write_buffer[2] & 0xFFFFFF);
|
||||
}
|
||||
|
||||
LOG_TRACE(HW_GPU, "Set uniform %x to (%f %f %f %f)", (int)uniform_setup.index,
|
||||
@ -464,6 +464,24 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
|
||||
break;
|
||||
}
|
||||
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[0], 0x1c8):
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[1], 0x1c9):
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[2], 0x1ca):
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[3], 0x1cb):
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[4], 0x1cc):
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[5], 0x1cd):
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce):
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf):
|
||||
{
|
||||
auto& lut_config = regs.lighting.lut_config;
|
||||
|
||||
ASSERT_MSG(lut_config.index < 256, "lut_config.index exceeded maximum value of 255!");
|
||||
|
||||
g_state.lighting.luts[lut_config.type][lut_config.index].raw = value;
|
||||
lut_config.index = lut_config.index + 1;
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -16,6 +16,8 @@
|
||||
#include "common/vector_math.h"
|
||||
#include "common/logging/log.h"
|
||||
|
||||
#include "pica_types.h"
|
||||
|
||||
namespace Pica {
|
||||
|
||||
// Returns index corresponding to the Regs member labeled by field_name
|
||||
@ -239,7 +241,8 @@ struct Regs {
|
||||
TextureConfig texture0;
|
||||
INSERT_PADDING_WORDS(0x8);
|
||||
BitField<0, 4, TextureFormat> texture0_format;
|
||||
INSERT_PADDING_WORDS(0x2);
|
||||
BitField<0, 1, u32> fragment_lighting_enable;
|
||||
INSERT_PADDING_WORDS(0x1);
|
||||
TextureConfig texture1;
|
||||
BitField<0, 4, TextureFormat> texture1_format;
|
||||
INSERT_PADDING_WORDS(0x2);
|
||||
@ -641,7 +644,268 @@ struct Regs {
|
||||
}
|
||||
}
|
||||
|
||||
INSERT_PADDING_WORDS(0xe0);
|
||||
INSERT_PADDING_WORDS(0x20);
|
||||
|
||||
enum class LightingSampler {
|
||||
Distribution0 = 0,
|
||||
Distribution1 = 1,
|
||||
Fresnel = 3,
|
||||
ReflectBlue = 4,
|
||||
ReflectGreen = 5,
|
||||
ReflectRed = 6,
|
||||
SpotlightAttenuation = 8,
|
||||
DistanceAttenuation = 16,
|
||||
};
|
||||
|
||||
/**
|
||||
* Pica fragment lighting supports using different LUTs for each lighting component:
|
||||
* Reflectance R, G, and B channels, distribution function for specular components 0 and 1,
|
||||
* fresnel factor, and spotlight attenuation. Furthermore, which LUTs are used for each channel
|
||||
* (or whether a channel is enabled at all) is specified by various pre-defined lighting
|
||||
* configurations. With configurations that require more LUTs, more cycles are required on HW to
|
||||
* perform lighting computations.
|
||||
*/
|
||||
enum class LightingConfig {
|
||||
Config0 = 0, ///< Reflect Red, Distribution 0, Spotlight
|
||||
Config1 = 1, ///< Reflect Red, Fresnel, Spotlight
|
||||
Config2 = 2, ///< Reflect Red, Distribution 0/1
|
||||
Config3 = 3, ///< Distribution 0/1, Fresnel
|
||||
Config4 = 4, ///< Reflect Red/Green/Blue, Distribution 0/1, Spotlight
|
||||
Config5 = 5, ///< Reflect Red/Green/Blue, Distribution 0, Fresnel, Spotlight
|
||||
Config6 = 6, ///< Reflect Red, Distribution 0/1, Fresnel, Spotlight
|
||||
Config7 = 8, ///< Reflect Red/Green/Blue, Distribution 0/1, Fresnel, Spotlight
|
||||
///< NOTE: '8' is intentional, '7' does not appear to be a valid configuration
|
||||
};
|
||||
|
||||
/// Selects which lighting components are affected by fresnel
|
||||
enum class LightingFresnelSelector {
|
||||
None = 0, ///< Fresnel is disabled
|
||||
PrimaryAlpha = 1, ///< Primary (diffuse) lighting alpha is affected by fresnel
|
||||
SecondaryAlpha = 2, ///< Secondary (specular) lighting alpha is affected by fresnel
|
||||
Both = PrimaryAlpha | SecondaryAlpha, ///< Both primary and secondary lighting alphas are affected by fresnel
|
||||
};
|
||||
|
||||
/// Factor used to scale the output of a lighting LUT
|
||||
enum class LightingScale {
|
||||
Scale1 = 0, ///< Scale is 1x
|
||||
Scale2 = 1, ///< Scale is 2x
|
||||
Scale4 = 2, ///< Scale is 4x
|
||||
Scale8 = 3, ///< Scale is 8x
|
||||
Scale1_4 = 6, ///< Scale is 0.25x
|
||||
Scale1_2 = 7, ///< Scale is 0.5x
|
||||
};
|
||||
|
||||
enum class LightingLutInput {
|
||||
NH = 0, // Cosine of the angle between the normal and half-angle vectors
|
||||
VH = 1, // Cosine of the angle between the view and half-angle vectors
|
||||
NV = 2, // Cosine of the angle between the normal and the view vector
|
||||
LN = 3, // Cosine of the angle between the light and the normal vectors
|
||||
};
|
||||
|
||||
enum class LightingBumpMode : u32 {
|
||||
None = 0,
|
||||
NormalMap = 1,
|
||||
TangentMap = 2,
|
||||
};
|
||||
|
||||
union LightColor {
|
||||
BitField< 0, 10, u32> b;
|
||||
BitField<10, 10, u32> g;
|
||||
BitField<20, 10, u32> r;
|
||||
|
||||
Math::Vec3f ToVec3f() const {
|
||||
// These fields are 10 bits wide, however 255 corresponds to 1.0f for each color component
|
||||
return Math::MakeVec((f32)r / 255.f, (f32)g / 255.f, (f32)b / 255.f);
|
||||
}
|
||||
};
|
||||
|
||||
/// Returns true if the specified lighting sampler is supported by the current Pica lighting configuration
|
||||
static bool IsLightingSamplerSupported(LightingConfig config, LightingSampler sampler) {
|
||||
switch (sampler) {
|
||||
case LightingSampler::Distribution0:
|
||||
return (config != LightingConfig::Config1);
|
||||
|
||||
case LightingSampler::Distribution1:
|
||||
return (config != LightingConfig::Config0) && (config != LightingConfig::Config1) && (config != LightingConfig::Config5);
|
||||
|
||||
case LightingSampler::Fresnel:
|
||||
return (config != LightingConfig::Config0) && (config != LightingConfig::Config2) && (config != LightingConfig::Config4);
|
||||
|
||||
case LightingSampler::ReflectRed:
|
||||
return (config != LightingConfig::Config3);
|
||||
|
||||
case LightingSampler::ReflectGreen:
|
||||
case LightingSampler::ReflectBlue:
|
||||
return (config == LightingConfig::Config4) || (config == LightingConfig::Config5) || (config == LightingConfig::Config7);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
struct {
|
||||
struct LightSrc {
|
||||
LightColor specular_0; // material.specular_0 * light.specular_0
|
||||
LightColor specular_1; // material.specular_1 * light.specular_1
|
||||
LightColor diffuse; // material.diffuse * light.diffuse
|
||||
LightColor ambient; // material.ambient * light.ambient
|
||||
|
||||
struct {
|
||||
// Encoded as 16-bit floating point
|
||||
union {
|
||||
BitField< 0, 16, u32> x;
|
||||
BitField<16, 16, u32> y;
|
||||
};
|
||||
union {
|
||||
BitField< 0, 16, u32> z;
|
||||
};
|
||||
|
||||
INSERT_PADDING_WORDS(0x3);
|
||||
|
||||
union {
|
||||
BitField<0, 1, u32> directional;
|
||||
BitField<1, 1, u32> two_sided_diffuse; // When disabled, clamp dot-product to 0
|
||||
};
|
||||
};
|
||||
|
||||
BitField<0, 20, u32> dist_atten_bias;
|
||||
BitField<0, 20, u32> dist_atten_scale;
|
||||
|
||||
INSERT_PADDING_WORDS(0x4);
|
||||
};
|
||||
static_assert(sizeof(LightSrc) == 0x10 * sizeof(u32), "LightSrc structure must be 0x10 words");
|
||||
|
||||
LightSrc light[8];
|
||||
LightColor global_ambient; // Emission + (material.ambient * lighting.ambient)
|
||||
INSERT_PADDING_WORDS(0x1);
|
||||
BitField<0, 3, u32> num_lights; // Number of enabled lights - 1
|
||||
|
||||
union {
|
||||
BitField< 2, 2, LightingFresnelSelector> fresnel_selector;
|
||||
BitField< 4, 4, LightingConfig> config;
|
||||
BitField<22, 2, u32> bump_selector; // 0: Texture 0, 1: Texture 1, 2: Texture 2
|
||||
BitField<27, 1, u32> clamp_highlights;
|
||||
BitField<28, 2, LightingBumpMode> bump_mode;
|
||||
BitField<30, 1, u32> disable_bump_renorm;
|
||||
};
|
||||
|
||||
union {
|
||||
BitField<16, 1, u32> disable_lut_d0;
|
||||
BitField<17, 1, u32> disable_lut_d1;
|
||||
BitField<19, 1, u32> disable_lut_fr;
|
||||
BitField<20, 1, u32> disable_lut_rr;
|
||||
BitField<21, 1, u32> disable_lut_rg;
|
||||
BitField<22, 1, u32> disable_lut_rb;
|
||||
|
||||
// Each bit specifies whether distance attenuation should be applied for the
|
||||
// corresponding light
|
||||
|
||||
BitField<24, 1, u32> disable_dist_atten_light_0;
|
||||
BitField<25, 1, u32> disable_dist_atten_light_1;
|
||||
BitField<26, 1, u32> disable_dist_atten_light_2;
|
||||
BitField<27, 1, u32> disable_dist_atten_light_3;
|
||||
BitField<28, 1, u32> disable_dist_atten_light_4;
|
||||
BitField<29, 1, u32> disable_dist_atten_light_5;
|
||||
BitField<30, 1, u32> disable_dist_atten_light_6;
|
||||
BitField<31, 1, u32> disable_dist_atten_light_7;
|
||||
};
|
||||
|
||||
bool IsDistAttenDisabled(unsigned index) const {
|
||||
const unsigned disable[] = { disable_dist_atten_light_0, disable_dist_atten_light_1,
|
||||
disable_dist_atten_light_2, disable_dist_atten_light_3,
|
||||
disable_dist_atten_light_4, disable_dist_atten_light_5,
|
||||
disable_dist_atten_light_6, disable_dist_atten_light_7 };
|
||||
return disable[index] != 0;
|
||||
}
|
||||
|
||||
union {
|
||||
BitField<0, 8, u32> index; ///< Index at which to set data in the LUT
|
||||
BitField<8, 5, u32> type; ///< Type of LUT for which to set data
|
||||
} lut_config;
|
||||
|
||||
BitField<0, 1, u32> disable;
|
||||
INSERT_PADDING_WORDS(0x1);
|
||||
|
||||
// When data is written to any of these registers, it gets written to the lookup table of
|
||||
// the selected type at the selected index, specified above in the `lut_config` register.
|
||||
// With each write, `lut_config.index` is incremented. It does not matter which of these
|
||||
// registers is written to, the behavior will be the same.
|
||||
u32 lut_data[8];
|
||||
|
||||
// These are used to specify if absolute (abs) value should be used for each LUT index. When
|
||||
// abs mode is disabled, LUT indexes are in the range of (-1.0, 1.0). Otherwise, they are in
|
||||
// the range of (0.0, 1.0).
|
||||
union {
|
||||
BitField< 1, 1, u32> disable_d0;
|
||||
BitField< 5, 1, u32> disable_d1;
|
||||
BitField< 9, 1, u32> disable_sp;
|
||||
BitField<13, 1, u32> disable_fr;
|
||||
BitField<17, 1, u32> disable_rb;
|
||||
BitField<21, 1, u32> disable_rg;
|
||||
BitField<25, 1, u32> disable_rr;
|
||||
} abs_lut_input;
|
||||
|
||||
union {
|
||||
BitField< 0, 3, LightingLutInput> d0;
|
||||
BitField< 4, 3, LightingLutInput> d1;
|
||||
BitField< 8, 3, LightingLutInput> sp;
|
||||
BitField<12, 3, LightingLutInput> fr;
|
||||
BitField<16, 3, LightingLutInput> rb;
|
||||
BitField<20, 3, LightingLutInput> rg;
|
||||
BitField<24, 3, LightingLutInput> rr;
|
||||
} lut_input;
|
||||
|
||||
union {
|
||||
BitField< 0, 3, LightingScale> d0;
|
||||
BitField< 4, 3, LightingScale> d1;
|
||||
BitField< 8, 3, LightingScale> sp;
|
||||
BitField<12, 3, LightingScale> fr;
|
||||
BitField<16, 3, LightingScale> rb;
|
||||
BitField<20, 3, LightingScale> rg;
|
||||
BitField<24, 3, LightingScale> rr;
|
||||
|
||||
static float GetScale(LightingScale scale) {
|
||||
switch (scale) {
|
||||
case LightingScale::Scale1:
|
||||
return 1.0f;
|
||||
case LightingScale::Scale2:
|
||||
return 2.0f;
|
||||
case LightingScale::Scale4:
|
||||
return 4.0f;
|
||||
case LightingScale::Scale8:
|
||||
return 8.0f;
|
||||
case LightingScale::Scale1_4:
|
||||
return 0.25f;
|
||||
case LightingScale::Scale1_2:
|
||||
return 0.5f;
|
||||
}
|
||||
return 0.0f;
|
||||
}
|
||||
} lut_scale;
|
||||
|
||||
INSERT_PADDING_WORDS(0x6);
|
||||
|
||||
union {
|
||||
// There are 8 light enable "slots", corresponding to the total number of lights
|
||||
// supported by Pica. For N enabled lights (specified by register 0x1c2, or 'src_num'
|
||||
// above), the first N slots below will be set to integers within the range of 0-7,
|
||||
// corresponding to the actual light that is enabled for each slot.
|
||||
|
||||
BitField< 0, 3, u32> slot_0;
|
||||
BitField< 4, 3, u32> slot_1;
|
||||
BitField< 8, 3, u32> slot_2;
|
||||
BitField<12, 3, u32> slot_3;
|
||||
BitField<16, 3, u32> slot_4;
|
||||
BitField<20, 3, u32> slot_5;
|
||||
BitField<24, 3, u32> slot_6;
|
||||
BitField<28, 3, u32> slot_7;
|
||||
|
||||
unsigned GetNum(unsigned index) const {
|
||||
const unsigned enable_slots[] = { slot_0, slot_1, slot_2, slot_3, slot_4, slot_5, slot_6, slot_7 };
|
||||
return enable_slots[index];
|
||||
}
|
||||
} light_enable;
|
||||
} lighting;
|
||||
|
||||
INSERT_PADDING_WORDS(0x26);
|
||||
|
||||
enum class VertexAttributeFormat : u64 {
|
||||
BYTE = 0,
|
||||
@ -990,6 +1254,7 @@ ASSERT_REG_POSITION(viewport_corner, 0x68);
|
||||
ASSERT_REG_POSITION(texture0_enable, 0x80);
|
||||
ASSERT_REG_POSITION(texture0, 0x81);
|
||||
ASSERT_REG_POSITION(texture0_format, 0x8e);
|
||||
ASSERT_REG_POSITION(fragment_lighting_enable, 0x8f);
|
||||
ASSERT_REG_POSITION(texture1, 0x91);
|
||||
ASSERT_REG_POSITION(texture1_format, 0x96);
|
||||
ASSERT_REG_POSITION(texture2, 0x99);
|
||||
@ -1004,6 +1269,7 @@ ASSERT_REG_POSITION(tev_stage5, 0xf8);
|
||||
ASSERT_REG_POSITION(tev_combiner_buffer_color, 0xfd);
|
||||
ASSERT_REG_POSITION(output_merger, 0x100);
|
||||
ASSERT_REG_POSITION(framebuffer, 0x110);
|
||||
ASSERT_REG_POSITION(lighting, 0x140);
|
||||
ASSERT_REG_POSITION(vertex_attributes, 0x200);
|
||||
ASSERT_REG_POSITION(index_array, 0x227);
|
||||
ASSERT_REG_POSITION(num_vertices, 0x228);
|
||||
@ -1026,118 +1292,6 @@ static_assert(sizeof(Regs::ShaderConfig) == 0x30 * sizeof(u32), "ShaderConfig st
|
||||
static_assert(sizeof(Regs) <= 0x300 * sizeof(u32), "Register set structure larger than it should be");
|
||||
static_assert(sizeof(Regs) >= 0x300 * sizeof(u32), "Register set structure smaller than it should be");
|
||||
|
||||
struct float24 {
|
||||
static float24 FromFloat32(float val) {
|
||||
float24 ret;
|
||||
ret.value = val;
|
||||
return ret;
|
||||
}
|
||||
|
||||
// 16 bit mantissa, 7 bit exponent, 1 bit sign
|
||||
// TODO: No idea if this works as intended
|
||||
static float24 FromRawFloat24(u32 hex) {
|
||||
float24 ret;
|
||||
if ((hex & 0xFFFFFF) == 0) {
|
||||
ret.value = 0;
|
||||
} else {
|
||||
u32 mantissa = hex & 0xFFFF;
|
||||
u32 exponent = (hex >> 16) & 0x7F;
|
||||
u32 sign = hex >> 23;
|
||||
ret.value = std::pow(2.0f, (float)exponent-63.0f) * (1.0f + mantissa * std::pow(2.0f, -16.f));
|
||||
if (sign)
|
||||
ret.value = -ret.value;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
static float24 Zero() {
|
||||
return FromFloat32(0.f);
|
||||
}
|
||||
|
||||
// Not recommended for anything but logging
|
||||
float ToFloat32() const {
|
||||
return value;
|
||||
}
|
||||
|
||||
float24 operator * (const float24& flt) const {
|
||||
if ((this->value == 0.f && !std::isnan(flt.value)) ||
|
||||
(flt.value == 0.f && !std::isnan(this->value)))
|
||||
// PICA gives 0 instead of NaN when multiplying by inf
|
||||
return Zero();
|
||||
return float24::FromFloat32(ToFloat32() * flt.ToFloat32());
|
||||
}
|
||||
|
||||
float24 operator / (const float24& flt) const {
|
||||
return float24::FromFloat32(ToFloat32() / flt.ToFloat32());
|
||||
}
|
||||
|
||||
float24 operator + (const float24& flt) const {
|
||||
return float24::FromFloat32(ToFloat32() + flt.ToFloat32());
|
||||
}
|
||||
|
||||
float24 operator - (const float24& flt) const {
|
||||
return float24::FromFloat32(ToFloat32() - flt.ToFloat32());
|
||||
}
|
||||
|
||||
float24& operator *= (const float24& flt) {
|
||||
if ((this->value == 0.f && !std::isnan(flt.value)) ||
|
||||
(flt.value == 0.f && !std::isnan(this->value)))
|
||||
// PICA gives 0 instead of NaN when multiplying by inf
|
||||
*this = Zero();
|
||||
else value *= flt.ToFloat32();
|
||||
return *this;
|
||||
}
|
||||
|
||||
float24& operator /= (const float24& flt) {
|
||||
value /= flt.ToFloat32();
|
||||
return *this;
|
||||
}
|
||||
|
||||
float24& operator += (const float24& flt) {
|
||||
value += flt.ToFloat32();
|
||||
return *this;
|
||||
}
|
||||
|
||||
float24& operator -= (const float24& flt) {
|
||||
value -= flt.ToFloat32();
|
||||
return *this;
|
||||
}
|
||||
|
||||
float24 operator - () const {
|
||||
return float24::FromFloat32(-ToFloat32());
|
||||
}
|
||||
|
||||
bool operator < (const float24& flt) const {
|
||||
return ToFloat32() < flt.ToFloat32();
|
||||
}
|
||||
|
||||
bool operator > (const float24& flt) const {
|
||||
return ToFloat32() > flt.ToFloat32();
|
||||
}
|
||||
|
||||
bool operator >= (const float24& flt) const {
|
||||
return ToFloat32() >= flt.ToFloat32();
|
||||
}
|
||||
|
||||
bool operator <= (const float24& flt) const {
|
||||
return ToFloat32() <= flt.ToFloat32();
|
||||
}
|
||||
|
||||
bool operator == (const float24& flt) const {
|
||||
return ToFloat32() == flt.ToFloat32();
|
||||
}
|
||||
|
||||
bool operator != (const float24& flt) const {
|
||||
return ToFloat32() != flt.ToFloat32();
|
||||
}
|
||||
|
||||
private:
|
||||
// Stored as a regular float, merely for convenience
|
||||
// TODO: Perform proper arithmetic on this!
|
||||
float value;
|
||||
};
|
||||
static_assert(sizeof(float24) == sizeof(float), "Shader JIT assumes float24 is implemented as a 32-bit float");
|
||||
|
||||
/// Struct used to describe current Pica state
|
||||
struct State {
|
||||
/// Pica registers
|
||||
@ -1163,6 +1317,25 @@ struct State {
|
||||
ShaderSetup vs;
|
||||
ShaderSetup gs;
|
||||
|
||||
struct {
|
||||
union LutEntry {
|
||||
// Used for raw access
|
||||
u32 raw;
|
||||
|
||||
// LUT value, encoded as 12-bit fixed point, with 12 fraction bits
|
||||
BitField< 0, 12, u32> value;
|
||||
|
||||
// Used by HW for efficient interpolation, Citra does not use these
|
||||
BitField<12, 12, u32> difference;
|
||||
|
||||
float ToFloat() {
|
||||
return static_cast<float>(value) / 4095.f;
|
||||
}
|
||||
};
|
||||
|
||||
std::array<std::array<LutEntry, 256>, 24> luts;
|
||||
} lighting;
|
||||
|
||||
/// Current Pica command list
|
||||
struct {
|
||||
const u32* head_ptr;
|
||||
|
146
src/video_core/pica_types.h
Normal file
146
src/video_core/pica_types.h
Normal file
@ -0,0 +1,146 @@
|
||||
// Copyright 2015 Citra Emulator Project
|
||||
// Licensed under GPLv2 or any later version
|
||||
// Refer to the license.txt file included.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstring>
|
||||
|
||||
#include "common/common_types.h"
|
||||
|
||||
namespace Pica {
|
||||
|
||||
/**
|
||||
* Template class for converting arbitrary Pica float types to IEEE 754 32-bit single-precision
|
||||
* floating point.
|
||||
*
|
||||
* When decoding, format is as follows:
|
||||
* - The first `M` bits are the mantissa
|
||||
* - The next `E` bits are the exponent
|
||||
* - The last bit is the sign bit
|
||||
*
|
||||
* @todo Verify on HW if this conversion is sufficiently accurate.
|
||||
*/
|
||||
template<unsigned M, unsigned E>
|
||||
struct Float {
|
||||
public:
|
||||
static Float<M, E> FromFloat32(float val) {
|
||||
Float<M, E> ret;
|
||||
ret.value = val;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static Float<M, E> FromRaw(u32 hex) {
|
||||
Float<M, E> res;
|
||||
|
||||
const int width = M + E + 1;
|
||||
const int bias = 128 - (1 << (E - 1));
|
||||
const int exponent = (hex >> M) & ((1 << E) - 1);
|
||||
const unsigned mantissa = hex & ((1 << M) - 1);
|
||||
|
||||
if (hex & ((1 << (width - 1)) - 1))
|
||||
hex = ((hex >> (E + M)) << 31) | (mantissa << (23 - M)) | ((exponent + bias) << 23);
|
||||
else
|
||||
hex = ((hex >> (E + M)) << 31);
|
||||
|
||||
std::memcpy(&res.value, &hex, sizeof(float));
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
static Float<M, E> Zero() {
|
||||
return FromFloat32(0.f);
|
||||
}
|
||||
|
||||
// Not recommended for anything but logging
|
||||
float ToFloat32() const {
|
||||
return value;
|
||||
}
|
||||
|
||||
Float<M, E> operator * (const Float<M, E>& flt) const {
|
||||
if ((this->value == 0.f && !std::isnan(flt.value)) ||
|
||||
(flt.value == 0.f && !std::isnan(this->value)))
|
||||
// PICA gives 0 instead of NaN when multiplying by inf
|
||||
return Zero();
|
||||
return Float<M, E>::FromFloat32(ToFloat32() * flt.ToFloat32());
|
||||
}
|
||||
|
||||
Float<M, E> operator / (const Float<M, E>& flt) const {
|
||||
return Float<M, E>::FromFloat32(ToFloat32() / flt.ToFloat32());
|
||||
}
|
||||
|
||||
Float<M, E> operator + (const Float<M, E>& flt) const {
|
||||
return Float<M, E>::FromFloat32(ToFloat32() + flt.ToFloat32());
|
||||
}
|
||||
|
||||
Float<M, E> operator - (const Float<M, E>& flt) const {
|
||||
return Float<M, E>::FromFloat32(ToFloat32() - flt.ToFloat32());
|
||||
}
|
||||
|
||||
Float<M, E>& operator *= (const Float<M, E>& flt) {
|
||||
if ((this->value == 0.f && !std::isnan(flt.value)) ||
|
||||
(flt.value == 0.f && !std::isnan(this->value)))
|
||||
// PICA gives 0 instead of NaN when multiplying by inf
|
||||
*this = Zero();
|
||||
else value *= flt.ToFloat32();
|
||||
return *this;
|
||||
}
|
||||
|
||||
Float<M, E>& operator /= (const Float<M, E>& flt) {
|
||||
value /= flt.ToFloat32();
|
||||
return *this;
|
||||
}
|
||||
|
||||
Float<M, E>& operator += (const Float<M, E>& flt) {
|
||||
value += flt.ToFloat32();
|
||||
return *this;
|
||||
}
|
||||
|
||||
Float<M, E>& operator -= (const Float<M, E>& flt) {
|
||||
value -= flt.ToFloat32();
|
||||
return *this;
|
||||
}
|
||||
|
||||
Float<M, E> operator - () const {
|
||||
return Float<M, E>::FromFloat32(-ToFloat32());
|
||||
}
|
||||
|
||||
bool operator < (const Float<M, E>& flt) const {
|
||||
return ToFloat32() < flt.ToFloat32();
|
||||
}
|
||||
|
||||
bool operator > (const Float<M, E>& flt) const {
|
||||
return ToFloat32() > flt.ToFloat32();
|
||||
}
|
||||
|
||||
bool operator >= (const Float<M, E>& flt) const {
|
||||
return ToFloat32() >= flt.ToFloat32();
|
||||
}
|
||||
|
||||
bool operator <= (const Float<M, E>& flt) const {
|
||||
return ToFloat32() <= flt.ToFloat32();
|
||||
}
|
||||
|
||||
bool operator == (const Float<M, E>& flt) const {
|
||||
return ToFloat32() == flt.ToFloat32();
|
||||
}
|
||||
|
||||
bool operator != (const Float<M, E>& flt) const {
|
||||
return ToFloat32() != flt.ToFloat32();
|
||||
}
|
||||
|
||||
private:
|
||||
static const unsigned MASK = (1 << (M + E + 1)) - 1;
|
||||
static const unsigned MANTISSA_MASK = (1 << M) - 1;
|
||||
static const unsigned EXPONENT_MASK = (1 << E) - 1;
|
||||
|
||||
// Stored as a regular float, merely for convenience
|
||||
// TODO: Perform proper arithmetic on this!
|
||||
float value;
|
||||
};
|
||||
|
||||
using float24 = Float<16, 7>;
|
||||
using float20 = Float<12, 7>;
|
||||
using float16 = Float<10, 5>;
|
||||
|
||||
} // namespace Pica
|
@ -75,6 +75,12 @@ void RasterizerOpenGL::InitObjects() {
|
||||
glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD1);
|
||||
glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD2);
|
||||
|
||||
glVertexAttribPointer(GLShader::ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, normquat));
|
||||
glEnableVertexAttribArray(GLShader::ATTRIBUTE_NORMQUAT);
|
||||
|
||||
glVertexAttribPointer(GLShader::ATTRIBUTE_VIEW, 3, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, view));
|
||||
glEnableVertexAttribArray(GLShader::ATTRIBUTE_VIEW);
|
||||
|
||||
SetShader();
|
||||
|
||||
// Create textures for OGL framebuffer that will be rendered to, initially 1x1 to succeed in framebuffer creation
|
||||
@ -120,6 +126,19 @@ void RasterizerOpenGL::InitObjects() {
|
||||
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, fb_color_texture.texture.handle, 0);
|
||||
glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, fb_depth_texture.texture.handle, 0);
|
||||
|
||||
for (size_t i = 0; i < lighting_lut.size(); ++i) {
|
||||
lighting_lut[i].Create();
|
||||
state.lighting_lut[i].texture_1d = lighting_lut[i].handle;
|
||||
|
||||
glActiveTexture(GL_TEXTURE3 + i);
|
||||
glBindTexture(GL_TEXTURE_1D, state.lighting_lut[i].texture_1d);
|
||||
|
||||
glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr);
|
||||
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
|
||||
glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
|
||||
}
|
||||
state.Apply();
|
||||
|
||||
ASSERT_MSG(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE,
|
||||
"OpenGL rasterizer framebuffer setup failed, status %X", glCheckFramebufferStatus(GL_FRAMEBUFFER));
|
||||
}
|
||||
@ -139,12 +158,34 @@ void RasterizerOpenGL::Reset() {
|
||||
res_cache.InvalidateAll();
|
||||
}
|
||||
|
||||
/**
|
||||
* This is a helper function to resolve an issue with opposite quaternions being interpolated by
|
||||
* OpenGL. See below for a detailed description of this issue (yuriks):
|
||||
*
|
||||
* For any rotation, there are two quaternions Q, and -Q, that represent the same rotation. If you
|
||||
* interpolate two quaternions that are opposite, instead of going from one rotation to another
|
||||
* using the shortest path, you'll go around the longest path. You can test if two quaternions are
|
||||
* opposite by checking if Dot(Q1, W2) < 0. In that case, you can flip either of them, therefore
|
||||
* making Dot(-Q1, W2) positive.
|
||||
*
|
||||
* NOTE: This solution corrects this issue per-vertex before passing the quaternions to OpenGL. This
|
||||
* should be correct for nearly all cases, however a more correct implementation (but less trivial
|
||||
* and perhaps unnecessary) would be to handle this per-fragment, by interpolating the quaternions
|
||||
* manually using two Lerps, and doing this correction before each Lerp.
|
||||
*/
|
||||
static bool AreQuaternionsOpposite(Math::Vec4<Pica::float24> qa, Math::Vec4<Pica::float24> qb) {
|
||||
Math::Vec4f a{ qa.x.ToFloat32(), qa.y.ToFloat32(), qa.z.ToFloat32(), qa.w.ToFloat32() };
|
||||
Math::Vec4f b{ qb.x.ToFloat32(), qb.y.ToFloat32(), qb.z.ToFloat32(), qb.w.ToFloat32() };
|
||||
|
||||
return (Math::Dot(a, b) < 0.f);
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::AddTriangle(const Pica::Shader::OutputVertex& v0,
|
||||
const Pica::Shader::OutputVertex& v1,
|
||||
const Pica::Shader::OutputVertex& v2) {
|
||||
vertex_batch.emplace_back(v0);
|
||||
vertex_batch.emplace_back(v1);
|
||||
vertex_batch.emplace_back(v2);
|
||||
vertex_batch.emplace_back(v0, false);
|
||||
vertex_batch.emplace_back(v1, AreQuaternionsOpposite(v0.quat, v1.quat));
|
||||
vertex_batch.emplace_back(v2, AreQuaternionsOpposite(v0.quat, v2.quat));
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::DrawTriangles() {
|
||||
@ -156,6 +197,13 @@ void RasterizerOpenGL::DrawTriangles() {
|
||||
state.draw.shader_dirty = false;
|
||||
}
|
||||
|
||||
for (unsigned index = 0; index < lighting_lut.size(); index++) {
|
||||
if (uniform_block_data.lut_dirty[index]) {
|
||||
SyncLightingLUT(index);
|
||||
uniform_block_data.lut_dirty[index] = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (uniform_block_data.dirty) {
|
||||
glBufferData(GL_UNIFORM_BUFFER, sizeof(UniformData), &uniform_block_data.data, GL_STATIC_DRAW);
|
||||
uniform_block_data.dirty = false;
|
||||
@ -283,6 +331,165 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) {
|
||||
case PICA_REG_INDEX(tev_combiner_buffer_color):
|
||||
SyncCombinerColor();
|
||||
break;
|
||||
|
||||
// Fragment lighting specular 0 color
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[0].specular_0, 0x140 + 0 * 0x10):
|
||||
SyncLightSpecular0(0);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[1].specular_0, 0x140 + 1 * 0x10):
|
||||
SyncLightSpecular0(1);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[2].specular_0, 0x140 + 2 * 0x10):
|
||||
SyncLightSpecular0(2);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[3].specular_0, 0x140 + 3 * 0x10):
|
||||
SyncLightSpecular0(3);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[4].specular_0, 0x140 + 4 * 0x10):
|
||||
SyncLightSpecular0(4);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[5].specular_0, 0x140 + 5 * 0x10):
|
||||
SyncLightSpecular0(5);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[6].specular_0, 0x140 + 6 * 0x10):
|
||||
SyncLightSpecular0(6);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[7].specular_0, 0x140 + 7 * 0x10):
|
||||
SyncLightSpecular0(7);
|
||||
break;
|
||||
|
||||
// Fragment lighting specular 1 color
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[0].specular_1, 0x141 + 0 * 0x10):
|
||||
SyncLightSpecular1(0);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[1].specular_1, 0x141 + 1 * 0x10):
|
||||
SyncLightSpecular1(1);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[2].specular_1, 0x141 + 2 * 0x10):
|
||||
SyncLightSpecular1(2);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[3].specular_1, 0x141 + 3 * 0x10):
|
||||
SyncLightSpecular1(3);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[4].specular_1, 0x141 + 4 * 0x10):
|
||||
SyncLightSpecular1(4);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[5].specular_1, 0x141 + 5 * 0x10):
|
||||
SyncLightSpecular1(5);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[6].specular_1, 0x141 + 6 * 0x10):
|
||||
SyncLightSpecular1(6);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[7].specular_1, 0x141 + 7 * 0x10):
|
||||
SyncLightSpecular1(7);
|
||||
break;
|
||||
|
||||
// Fragment lighting diffuse color
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[0].diffuse, 0x142 + 0 * 0x10):
|
||||
SyncLightDiffuse(0);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[1].diffuse, 0x142 + 1 * 0x10):
|
||||
SyncLightDiffuse(1);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[2].diffuse, 0x142 + 2 * 0x10):
|
||||
SyncLightDiffuse(2);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[3].diffuse, 0x142 + 3 * 0x10):
|
||||
SyncLightDiffuse(3);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[4].diffuse, 0x142 + 4 * 0x10):
|
||||
SyncLightDiffuse(4);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[5].diffuse, 0x142 + 5 * 0x10):
|
||||
SyncLightDiffuse(5);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[6].diffuse, 0x142 + 6 * 0x10):
|
||||
SyncLightDiffuse(6);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[7].diffuse, 0x142 + 7 * 0x10):
|
||||
SyncLightDiffuse(7);
|
||||
break;
|
||||
|
||||
// Fragment lighting ambient color
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[0].ambient, 0x143 + 0 * 0x10):
|
||||
SyncLightAmbient(0);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[1].ambient, 0x143 + 1 * 0x10):
|
||||
SyncLightAmbient(1);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[2].ambient, 0x143 + 2 * 0x10):
|
||||
SyncLightAmbient(2);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[3].ambient, 0x143 + 3 * 0x10):
|
||||
SyncLightAmbient(3);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[4].ambient, 0x143 + 4 * 0x10):
|
||||
SyncLightAmbient(4);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[5].ambient, 0x143 + 5 * 0x10):
|
||||
SyncLightAmbient(5);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[6].ambient, 0x143 + 6 * 0x10):
|
||||
SyncLightAmbient(6);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[7].ambient, 0x143 + 7 * 0x10):
|
||||
SyncLightAmbient(7);
|
||||
break;
|
||||
|
||||
// Fragment lighting position
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[0].x, 0x144 + 0 * 0x10):
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[0].z, 0x145 + 0 * 0x10):
|
||||
SyncLightPosition(0);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[1].x, 0x144 + 1 * 0x10):
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[1].z, 0x145 + 1 * 0x10):
|
||||
SyncLightPosition(1);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[2].x, 0x144 + 2 * 0x10):
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[2].z, 0x145 + 2 * 0x10):
|
||||
SyncLightPosition(2);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[3].x, 0x144 + 3 * 0x10):
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[3].z, 0x145 + 3 * 0x10):
|
||||
SyncLightPosition(3);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[4].x, 0x144 + 4 * 0x10):
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[4].z, 0x145 + 4 * 0x10):
|
||||
SyncLightPosition(4);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[5].x, 0x144 + 5 * 0x10):
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[5].z, 0x145 + 5 * 0x10):
|
||||
SyncLightPosition(5);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[6].x, 0x144 + 6 * 0x10):
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[6].z, 0x145 + 6 * 0x10):
|
||||
SyncLightPosition(6);
|
||||
break;
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[7].x, 0x144 + 7 * 0x10):
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.light[7].z, 0x145 + 7 * 0x10):
|
||||
SyncLightPosition(7);
|
||||
break;
|
||||
|
||||
// Fragment lighting global ambient color (emission + ambient * ambient)
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.global_ambient, 0x1c0):
|
||||
SyncGlobalAmbient();
|
||||
break;
|
||||
|
||||
// Fragment lighting lookup tables
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[0], 0x1c8):
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[1], 0x1c9):
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[2], 0x1ca):
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[3], 0x1cb):
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[4], 0x1cc):
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[5], 0x1cd):
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[6], 0x1ce):
|
||||
case PICA_REG_INDEX_WORKAROUND(lighting.lut_data[7], 0x1cf):
|
||||
{
|
||||
auto& lut_config = regs.lighting.lut_config;
|
||||
uniform_block_data.lut_dirty[lut_config.type / 4] = true;
|
||||
break;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@ -491,18 +698,39 @@ void RasterizerOpenGL::SetShader() {
|
||||
uniform_tex = glGetUniformLocation(shader->shader.handle, "tex[2]");
|
||||
if (uniform_tex != -1) { glUniform1i(uniform_tex, 2); }
|
||||
|
||||
// Set the texture samplers to correspond to different lookup table texture units
|
||||
GLuint uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[0]");
|
||||
if (uniform_lut != -1) { glUniform1i(uniform_lut, 3); }
|
||||
uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[1]");
|
||||
if (uniform_lut != -1) { glUniform1i(uniform_lut, 4); }
|
||||
uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[2]");
|
||||
if (uniform_lut != -1) { glUniform1i(uniform_lut, 5); }
|
||||
uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[3]");
|
||||
if (uniform_lut != -1) { glUniform1i(uniform_lut, 6); }
|
||||
uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[4]");
|
||||
if (uniform_lut != -1) { glUniform1i(uniform_lut, 7); }
|
||||
uniform_lut = glGetUniformLocation(shader->shader.handle, "lut[5]");
|
||||
if (uniform_lut != -1) { glUniform1i(uniform_lut, 8); }
|
||||
|
||||
current_shader = shader_cache.emplace(config, std::move(shader)).first->second.get();
|
||||
|
||||
unsigned int block_index = glGetUniformBlockIndex(current_shader->shader.handle, "shader_data");
|
||||
glUniformBlockBinding(current_shader->shader.handle, block_index, 0);
|
||||
}
|
||||
|
||||
// Update uniforms
|
||||
SyncAlphaTest();
|
||||
SyncCombinerColor();
|
||||
auto& tev_stages = Pica::g_state.regs.GetTevStages();
|
||||
for (int index = 0; index < tev_stages.size(); ++index)
|
||||
SyncTevConstColor(index, tev_stages[index]);
|
||||
// Update uniforms
|
||||
SyncAlphaTest();
|
||||
SyncCombinerColor();
|
||||
auto& tev_stages = Pica::g_state.regs.GetTevStages();
|
||||
for (int index = 0; index < tev_stages.size(); ++index)
|
||||
SyncTevConstColor(index, tev_stages[index]);
|
||||
|
||||
SyncGlobalAmbient();
|
||||
for (int light_index = 0; light_index < 8; light_index++) {
|
||||
SyncLightDiffuse(light_index);
|
||||
SyncLightAmbient(light_index);
|
||||
SyncLightPosition(light_index);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncFramebuffer() {
|
||||
@ -604,8 +832,8 @@ void RasterizerOpenGL::SyncCullMode() {
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncDepthModifiers() {
|
||||
float depth_scale = -Pica::float24::FromRawFloat24(Pica::g_state.regs.viewport_depth_range).ToFloat32();
|
||||
float depth_offset = Pica::float24::FromRawFloat24(Pica::g_state.regs.viewport_depth_far_plane).ToFloat32() / 2.0f;
|
||||
float depth_scale = -Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32();
|
||||
float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_far_plane).ToFloat32() / 2.0f;
|
||||
|
||||
// TODO: Implement scale modifier
|
||||
uniform_block_data.data.depth_offset = depth_offset;
|
||||
@ -683,12 +911,81 @@ void RasterizerOpenGL::SyncTevConstColor(int stage_index, const Pica::Regs::TevS
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncGlobalAmbient() {
|
||||
auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.global_ambient);
|
||||
if (color != uniform_block_data.data.lighting_global_ambient) {
|
||||
uniform_block_data.data.lighting_global_ambient = color;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncLightingLUT(unsigned lut_index) {
|
||||
std::array<GLvec4, 256> new_data;
|
||||
|
||||
for (unsigned offset = 0; offset < new_data.size(); ++offset) {
|
||||
new_data[offset][0] = Pica::g_state.lighting.luts[(lut_index * 4) + 0][offset].ToFloat();
|
||||
new_data[offset][1] = Pica::g_state.lighting.luts[(lut_index * 4) + 1][offset].ToFloat();
|
||||
new_data[offset][2] = Pica::g_state.lighting.luts[(lut_index * 4) + 2][offset].ToFloat();
|
||||
new_data[offset][3] = Pica::g_state.lighting.luts[(lut_index * 4) + 3][offset].ToFloat();
|
||||
}
|
||||
|
||||
if (new_data != lighting_lut_data[lut_index]) {
|
||||
lighting_lut_data[lut_index] = new_data;
|
||||
glActiveTexture(GL_TEXTURE3 + lut_index);
|
||||
glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RGBA, GL_FLOAT, lighting_lut_data[lut_index].data());
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncLightSpecular0(int light_index) {
|
||||
auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_0);
|
||||
if (color != uniform_block_data.data.light_src[light_index].specular_0) {
|
||||
uniform_block_data.data.light_src[light_index].specular_0 = color;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncLightSpecular1(int light_index) {
|
||||
auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].specular_1);
|
||||
if (color != uniform_block_data.data.light_src[light_index].specular_1) {
|
||||
uniform_block_data.data.light_src[light_index].specular_1 = color;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncLightDiffuse(int light_index) {
|
||||
auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].diffuse);
|
||||
if (color != uniform_block_data.data.light_src[light_index].diffuse) {
|
||||
uniform_block_data.data.light_src[light_index].diffuse = color;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncLightAmbient(int light_index) {
|
||||
auto color = PicaToGL::LightColor(Pica::g_state.regs.lighting.light[light_index].ambient);
|
||||
if (color != uniform_block_data.data.light_src[light_index].ambient) {
|
||||
uniform_block_data.data.light_src[light_index].ambient = color;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncLightPosition(int light_index) {
|
||||
GLvec3 position = {
|
||||
Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].x).ToFloat32(),
|
||||
Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].y).ToFloat32(),
|
||||
Pica::float16::FromRaw(Pica::g_state.regs.lighting.light[light_index].z).ToFloat32() };
|
||||
|
||||
if (position != uniform_block_data.data.light_src[light_index].position) {
|
||||
uniform_block_data.data.light_src[light_index].position = position;
|
||||
uniform_block_data.dirty = true;
|
||||
}
|
||||
}
|
||||
|
||||
void RasterizerOpenGL::SyncDrawState() {
|
||||
const auto& regs = Pica::g_state.regs;
|
||||
|
||||
// Sync the viewport
|
||||
GLsizei viewport_width = (GLsizei)Pica::float24::FromRawFloat24(regs.viewport_size_x).ToFloat32() * 2;
|
||||
GLsizei viewport_height = (GLsizei)Pica::float24::FromRawFloat24(regs.viewport_size_y).ToFloat32() * 2;
|
||||
GLsizei viewport_width = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_x).ToFloat32() * 2;
|
||||
GLsizei viewport_height = (GLsizei)Pica::float24::FromRaw(regs.viewport_size_y).ToFloat32() * 2;
|
||||
|
||||
// OpenGL uses different y coordinates, so negate corner offset and flip origin
|
||||
// TODO: Ensure viewport_corner.x should not be negated or origin flipped
|
||||
|
@ -17,6 +17,7 @@
|
||||
#include "video_core/rasterizer_interface.h"
|
||||
#include "video_core/renderer_opengl/gl_rasterizer_cache.h"
|
||||
#include "video_core/renderer_opengl/gl_state.h"
|
||||
#include "video_core/renderer_opengl/pica_to_gl.h"
|
||||
#include "video_core/shader/shader_interpreter.h"
|
||||
|
||||
/**
|
||||
@ -71,6 +72,59 @@ struct PicaShaderConfig {
|
||||
regs.tev_combiner_buffer_input.update_mask_rgb.Value() |
|
||||
regs.tev_combiner_buffer_input.update_mask_a.Value() << 4;
|
||||
|
||||
// Fragment lighting
|
||||
|
||||
res.lighting.enable = !regs.lighting.disable;
|
||||
res.lighting.src_num = regs.lighting.num_lights + 1;
|
||||
|
||||
for (unsigned light_index = 0; light_index < res.lighting.src_num; ++light_index) {
|
||||
unsigned num = regs.lighting.light_enable.GetNum(light_index);
|
||||
const auto& light = regs.lighting.light[num];
|
||||
res.lighting.light[light_index].num = num;
|
||||
res.lighting.light[light_index].directional = light.directional != 0;
|
||||
res.lighting.light[light_index].two_sided_diffuse = light.two_sided_diffuse != 0;
|
||||
res.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num);
|
||||
res.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRaw(light.dist_atten_bias).ToFloat32();
|
||||
res.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRaw(light.dist_atten_scale).ToFloat32();
|
||||
}
|
||||
|
||||
res.lighting.lut_d0.enable = regs.lighting.disable_lut_d0 == 0;
|
||||
res.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0;
|
||||
res.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value();
|
||||
res.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0);
|
||||
|
||||
res.lighting.lut_d1.enable = regs.lighting.disable_lut_d1 == 0;
|
||||
res.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0;
|
||||
res.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value();
|
||||
res.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1);
|
||||
|
||||
res.lighting.lut_fr.enable = regs.lighting.disable_lut_fr == 0;
|
||||
res.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0;
|
||||
res.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value();
|
||||
res.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr);
|
||||
|
||||
res.lighting.lut_rr.enable = regs.lighting.disable_lut_rr == 0;
|
||||
res.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0;
|
||||
res.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value();
|
||||
res.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr);
|
||||
|
||||
res.lighting.lut_rg.enable = regs.lighting.disable_lut_rg == 0;
|
||||
res.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0;
|
||||
res.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value();
|
||||
res.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg);
|
||||
|
||||
res.lighting.lut_rb.enable = regs.lighting.disable_lut_rb == 0;
|
||||
res.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0;
|
||||
res.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value();
|
||||
res.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb);
|
||||
|
||||
res.lighting.config = regs.lighting.config;
|
||||
res.lighting.fresnel_selector = regs.lighting.fresnel_selector;
|
||||
res.lighting.bump_mode = regs.lighting.bump_mode;
|
||||
res.lighting.bump_selector = regs.lighting.bump_selector;
|
||||
res.lighting.bump_renorm = regs.lighting.disable_bump_renorm == 0;
|
||||
res.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0;
|
||||
|
||||
return res;
|
||||
}
|
||||
|
||||
@ -86,9 +140,37 @@ struct PicaShaderConfig {
|
||||
return std::memcmp(this, &o, sizeof(PicaShaderConfig)) == 0;
|
||||
};
|
||||
|
||||
Pica::Regs::CompareFunc alpha_test_func;
|
||||
Pica::Regs::CompareFunc alpha_test_func = Pica::Regs::CompareFunc::Never;
|
||||
std::array<Pica::Regs::TevStageConfig, 6> tev_stages = {};
|
||||
u8 combiner_buffer_input;
|
||||
u8 combiner_buffer_input = 0;
|
||||
|
||||
struct {
|
||||
struct {
|
||||
unsigned num = 0;
|
||||
bool directional = false;
|
||||
bool two_sided_diffuse = false;
|
||||
bool dist_atten_enable = false;
|
||||
GLfloat dist_atten_scale = 0.0f;
|
||||
GLfloat dist_atten_bias = 0.0f;
|
||||
} light[8];
|
||||
|
||||
bool enable = false;
|
||||
unsigned src_num = 0;
|
||||
Pica::Regs::LightingBumpMode bump_mode = Pica::Regs::LightingBumpMode::None;
|
||||
unsigned bump_selector = 0;
|
||||
bool bump_renorm = false;
|
||||
bool clamp_highlights = false;
|
||||
|
||||
Pica::Regs::LightingConfig config = Pica::Regs::LightingConfig::Config0;
|
||||
Pica::Regs::LightingFresnelSelector fresnel_selector = Pica::Regs::LightingFresnelSelector::None;
|
||||
|
||||
struct {
|
||||
bool enable = false;
|
||||
bool abs_input = false;
|
||||
Pica::Regs::LightingLutInput type = Pica::Regs::LightingLutInput::NH;
|
||||
float scale = 1.0f;
|
||||
} lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb;
|
||||
} lighting;
|
||||
};
|
||||
|
||||
namespace std {
|
||||
@ -167,7 +249,7 @@ private:
|
||||
|
||||
/// Structure that the hardware rendered vertices are composed of
|
||||
struct HardwareVertex {
|
||||
HardwareVertex(const Pica::Shader::OutputVertex& v) {
|
||||
HardwareVertex(const Pica::Shader::OutputVertex& v, bool flip_quaternion) {
|
||||
position[0] = v.pos.x.ToFloat32();
|
||||
position[1] = v.pos.y.ToFloat32();
|
||||
position[2] = v.pos.z.ToFloat32();
|
||||
@ -182,6 +264,19 @@ private:
|
||||
tex_coord1[1] = v.tc1.y.ToFloat32();
|
||||
tex_coord2[0] = v.tc2.x.ToFloat32();
|
||||
tex_coord2[1] = v.tc2.y.ToFloat32();
|
||||
normquat[0] = v.quat.x.ToFloat32();
|
||||
normquat[1] = v.quat.y.ToFloat32();
|
||||
normquat[2] = v.quat.z.ToFloat32();
|
||||
normquat[3] = v.quat.w.ToFloat32();
|
||||
view[0] = v.view.x.ToFloat32();
|
||||
view[1] = v.view.y.ToFloat32();
|
||||
view[2] = v.view.z.ToFloat32();
|
||||
|
||||
if (flip_quaternion) {
|
||||
for (float& x : normquat) {
|
||||
x = -x;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
GLfloat position[4];
|
||||
@ -189,20 +284,31 @@ private:
|
||||
GLfloat tex_coord0[2];
|
||||
GLfloat tex_coord1[2];
|
||||
GLfloat tex_coord2[2];
|
||||
GLfloat normquat[4];
|
||||
GLfloat view[3];
|
||||
};
|
||||
|
||||
struct LightSrc {
|
||||
alignas(16) GLvec3 specular_0;
|
||||
alignas(16) GLvec3 specular_1;
|
||||
alignas(16) GLvec3 diffuse;
|
||||
alignas(16) GLvec3 ambient;
|
||||
alignas(16) GLvec3 position;
|
||||
};
|
||||
|
||||
/// Uniform structure for the Uniform Buffer Object, all members must be 16-byte aligned
|
||||
struct UniformData {
|
||||
// A vec4 color for each of the six tev stages
|
||||
std::array<GLfloat, 4> const_color[6];
|
||||
std::array<GLfloat, 4> tev_combiner_buffer_color;
|
||||
GLvec4 const_color[6];
|
||||
GLvec4 tev_combiner_buffer_color;
|
||||
GLint alphatest_ref;
|
||||
GLfloat depth_offset;
|
||||
INSERT_PADDING_BYTES(8);
|
||||
alignas(16) GLvec3 lighting_global_ambient;
|
||||
LightSrc light_src[8];
|
||||
};
|
||||
|
||||
static_assert(sizeof(UniformData) == 0x80, "The size of the UniformData structure has changed, update the structure in the shader");
|
||||
static_assert(sizeof(UniformData) < 16000, "UniformData structure must be less than 16kb as per the OpenGL spec");
|
||||
static_assert(sizeof(UniformData) == 0x310, "The size of the UniformData structure has changed, update the structure in the shader");
|
||||
static_assert(sizeof(UniformData) < 16384, "UniformData structure must be less than 16kb as per the OpenGL spec");
|
||||
|
||||
/// Reconfigure the OpenGL color texture to use the given format and dimensions
|
||||
void ReconfigureColorTexture(TextureInfo& texture, Pica::Regs::ColorFormat format, u32 width, u32 height);
|
||||
@ -249,6 +355,27 @@ private:
|
||||
/// Syncs the TEV combiner color buffer to match the PICA register
|
||||
void SyncCombinerColor();
|
||||
|
||||
/// Syncs the lighting global ambient color to match the PICA register
|
||||
void SyncGlobalAmbient();
|
||||
|
||||
/// Syncs the lighting lookup tables
|
||||
void SyncLightingLUT(unsigned index);
|
||||
|
||||
/// Syncs the specified light's diffuse color to match the PICA register
|
||||
void SyncLightDiffuse(int light_index);
|
||||
|
||||
/// Syncs the specified light's ambient color to match the PICA register
|
||||
void SyncLightAmbient(int light_index);
|
||||
|
||||
/// Syncs the specified light's position to match the PICA register
|
||||
void SyncLightPosition(int light_index);
|
||||
|
||||
/// Syncs the specified light's specular 0 color to match the PICA register
|
||||
void SyncLightSpecular0(int light_index);
|
||||
|
||||
/// Syncs the specified light's specular 1 color to match the PICA register
|
||||
void SyncLightSpecular1(int light_index);
|
||||
|
||||
/// Syncs the remaining OpenGL drawing state to match the current PICA state
|
||||
void SyncDrawState();
|
||||
|
||||
@ -291,6 +418,7 @@ private:
|
||||
|
||||
struct {
|
||||
UniformData data;
|
||||
bool lut_dirty[6];
|
||||
bool dirty;
|
||||
} uniform_block_data;
|
||||
|
||||
@ -298,4 +426,7 @@ private:
|
||||
OGLBuffer vertex_buffer;
|
||||
OGLBuffer uniform_buffer;
|
||||
OGLFramebuffer framebuffer;
|
||||
|
||||
std::array<OGLTexture, 6> lighting_lut;
|
||||
std::array<std::array<GLvec4, 256>, 6> lighting_lut_data;
|
||||
};
|
||||
|
@ -32,12 +32,10 @@ static void AppendSource(std::string& out, TevStageConfig::Source source,
|
||||
out += "primary_color";
|
||||
break;
|
||||
case Source::PrimaryFragmentColor:
|
||||
// HACK: Until we implement fragment lighting, use primary_color
|
||||
out += "primary_color";
|
||||
out += "primary_fragment_color";
|
||||
break;
|
||||
case Source::SecondaryFragmentColor:
|
||||
// HACK: Until we implement fragment lighting, use zero
|
||||
out += "vec4(0.0)";
|
||||
out += "secondary_fragment_color";
|
||||
break;
|
||||
case Source::Texture0:
|
||||
out += "texture(tex[0], texcoord[0])";
|
||||
@ -320,26 +318,229 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi
|
||||
out += "next_combiner_buffer.a = last_tex_env_out.a;\n";
|
||||
}
|
||||
|
||||
/// Writes the code to emulate fragment lighting
|
||||
static void WriteLighting(std::string& out, const PicaShaderConfig& config) {
|
||||
// Define lighting globals
|
||||
out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n"
|
||||
"vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n"
|
||||
"vec3 light_vector = vec3(0.0);\n"
|
||||
"vec3 refl_value = vec3(0.0);\n";
|
||||
|
||||
// Compute fragment normals
|
||||
if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) {
|
||||
// Bump mapping is enabled using a normal map, read perturbation vector from the selected texture
|
||||
std::string bump_selector = std::to_string(config.lighting.bump_selector);
|
||||
out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], texcoord[" + bump_selector + "]).rgb - 1.0;\n";
|
||||
|
||||
// Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher precision result
|
||||
if (config.lighting.bump_renorm) {
|
||||
std::string val = "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))";
|
||||
out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n";
|
||||
}
|
||||
} else if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) {
|
||||
// Bump mapping is enabled using a tangent map
|
||||
LOG_CRITICAL(HW_GPU, "unimplemented bump mapping mode (tangent mapping)");
|
||||
UNIMPLEMENTED();
|
||||
} else {
|
||||
// No bump mapping - surface local normal is just a unit normal
|
||||
out += "vec3 surface_normal = vec3(0.0, 0.0, 1.0);\n";
|
||||
}
|
||||
|
||||
// Rotate the surface-local normal by the interpolated normal quaternion to convert it to eyespace
|
||||
out += "vec3 normal = normalize(quaternion_rotate(normquat, surface_normal));\n";
|
||||
|
||||
// Gets the index into the specified lookup table for specular lighting
|
||||
auto GetLutIndex = [config](unsigned light_num, Regs::LightingLutInput input, bool abs) {
|
||||
const std::string half_angle = "normalize(normalize(view) + light_vector)";
|
||||
std::string index;
|
||||
switch (input) {
|
||||
case Regs::LightingLutInput::NH:
|
||||
index = "dot(normal, " + half_angle + ")";
|
||||
break;
|
||||
|
||||
case Regs::LightingLutInput::VH:
|
||||
index = std::string("dot(normalize(view), " + half_angle + ")");
|
||||
break;
|
||||
|
||||
case Regs::LightingLutInput::NV:
|
||||
index = std::string("dot(normal, normalize(view))");
|
||||
break;
|
||||
|
||||
case Regs::LightingLutInput::LN:
|
||||
index = std::string("dot(light_vector, normal)");
|
||||
break;
|
||||
|
||||
default:
|
||||
LOG_CRITICAL(HW_GPU, "Unknown lighting LUT input %d\n", (int)input);
|
||||
UNIMPLEMENTED();
|
||||
break;
|
||||
}
|
||||
|
||||
if (abs) {
|
||||
// LUT index is in the range of (0.0, 1.0)
|
||||
index = config.lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)";
|
||||
return "(FLOAT_255 * clamp(" + index + ", 0.0, 1.0))";
|
||||
} else {
|
||||
// LUT index is in the range of (-1.0, 1.0)
|
||||
index = "clamp(" + index + ", -1.0, 1.0)";
|
||||
return "(FLOAT_255 * ((" + index + " < 0) ? " + index + " + 2.0 : " + index + ") / 2.0)";
|
||||
}
|
||||
|
||||
return std::string();
|
||||
};
|
||||
|
||||
// Gets the lighting lookup table value given the specified sampler and index
|
||||
auto GetLutValue = [](Regs::LightingSampler sampler, std::string lut_index) {
|
||||
return std::string("texture(lut[" + std::to_string((unsigned)sampler / 4) + "], " +
|
||||
lut_index + ")[" + std::to_string((unsigned)sampler & 3) + "]");
|
||||
};
|
||||
|
||||
// Write the code to emulate each enabled light
|
||||
for (unsigned light_index = 0; light_index < config.lighting.src_num; ++light_index) {
|
||||
const auto& light_config = config.lighting.light[light_index];
|
||||
std::string light_src = "light_src[" + std::to_string(light_config.num) + "]";
|
||||
|
||||
// Compute light vector (directional or positional)
|
||||
if (light_config.directional)
|
||||
out += "light_vector = normalize(" + light_src + ".position);\n";
|
||||
else
|
||||
out += "light_vector = normalize(" + light_src + ".position + view);\n";
|
||||
|
||||
// Compute dot product of light_vector and normal, adjust if lighting is one-sided or two-sided
|
||||
std::string dot_product = light_config.two_sided_diffuse ? "abs(dot(light_vector, normal))" : "max(dot(light_vector, normal), 0.0)";
|
||||
|
||||
// If enabled, compute distance attenuation value
|
||||
std::string dist_atten = "1.0";
|
||||
if (light_config.dist_atten_enable) {
|
||||
std::string scale = std::to_string(light_config.dist_atten_scale);
|
||||
std::string bias = std::to_string(light_config.dist_atten_bias);
|
||||
std::string index = "(" + scale + " * length(-view - " + light_src + ".position) + " + bias + ")";
|
||||
index = "((clamp(" + index + ", 0.0, FLOAT_255)))";
|
||||
const unsigned lut_num = ((unsigned)Regs::LightingSampler::DistanceAttenuation + light_config.num);
|
||||
dist_atten = GetLutValue((Regs::LightingSampler)lut_num, index);
|
||||
}
|
||||
|
||||
// If enabled, clamp specular component if lighting result is negative
|
||||
std::string clamp_highlights = config.lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0";
|
||||
|
||||
// Specular 0 component
|
||||
std::string d0_lut_value = "1.0";
|
||||
if (config.lighting.lut_d0.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution0)) {
|
||||
// Lookup specular "distribution 0" LUT value
|
||||
std::string index = GetLutIndex(light_config.num, config.lighting.lut_d0.type, config.lighting.lut_d0.abs_input);
|
||||
d0_lut_value = "(" + std::to_string(config.lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, index) + ")";
|
||||
}
|
||||
std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)";
|
||||
|
||||
// If enabled, lookup ReflectRed value, otherwise, 1.0 is used
|
||||
if (config.lighting.lut_rr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectRed)) {
|
||||
std::string index = GetLutIndex(light_config.num, config.lighting.lut_rr.type, config.lighting.lut_rr.abs_input);
|
||||
std::string value = "(" + std::to_string(config.lighting.lut_rr.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")";
|
||||
out += "refl_value.r = " + value + ";\n";
|
||||
} else {
|
||||
out += "refl_value.r = 1.0;\n";
|
||||
}
|
||||
|
||||
// If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used
|
||||
if (config.lighting.lut_rg.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectGreen)) {
|
||||
std::string index = GetLutIndex(light_config.num, config.lighting.lut_rg.type, config.lighting.lut_rg.abs_input);
|
||||
std::string value = "(" + std::to_string(config.lighting.lut_rg.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")";
|
||||
out += "refl_value.g = " + value + ";\n";
|
||||
} else {
|
||||
out += "refl_value.g = refl_value.r;\n";
|
||||
}
|
||||
|
||||
// If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used
|
||||
if (config.lighting.lut_rb.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectBlue)) {
|
||||
std::string index = GetLutIndex(light_config.num, config.lighting.lut_rb.type, config.lighting.lut_rb.abs_input);
|
||||
std::string value = "(" + std::to_string(config.lighting.lut_rb.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")";
|
||||
out += "refl_value.b = " + value + ";\n";
|
||||
} else {
|
||||
out += "refl_value.b = refl_value.r;\n";
|
||||
}
|
||||
|
||||
// Specular 1 component
|
||||
std::string d1_lut_value = "1.0";
|
||||
if (config.lighting.lut_d1.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution1)) {
|
||||
// Lookup specular "distribution 1" LUT value
|
||||
std::string index = GetLutIndex(light_config.num, config.lighting.lut_d1.type, config.lighting.lut_d1.abs_input);
|
||||
d1_lut_value = "(" + std::to_string(config.lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, index) + ")";
|
||||
}
|
||||
std::string specular_1 = "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)";
|
||||
|
||||
// Fresnel
|
||||
if (config.lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Fresnel)) {
|
||||
// Lookup fresnel LUT value
|
||||
std::string index = GetLutIndex(light_config.num, config.lighting.lut_fr.type, config.lighting.lut_fr.abs_input);
|
||||
std::string value = "(" + std::to_string(config.lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, index) + ")";
|
||||
|
||||
// Enabled for difffuse lighting alpha component
|
||||
if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha ||
|
||||
config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both)
|
||||
out += "diffuse_sum.a *= " + value + ";\n";
|
||||
|
||||
// Enabled for the specular lighting alpha component
|
||||
if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::SecondaryAlpha ||
|
||||
config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both)
|
||||
out += "specular_sum.a *= " + value + ";\n";
|
||||
}
|
||||
|
||||
// Compute primary fragment color (diffuse lighting) function
|
||||
out += "diffuse_sum.rgb += ((" + light_src + ".diffuse * " + dot_product + ") + " + light_src + ".ambient) * " + dist_atten + ";\n";
|
||||
|
||||
// Compute secondary fragment color (specular lighting) function
|
||||
out += "specular_sum.rgb += (" + specular_0 + " + " + specular_1 + ") * " + clamp_highlights + " * " + dist_atten + ";\n";
|
||||
}
|
||||
|
||||
// Sum final lighting result
|
||||
out += "diffuse_sum.rgb += lighting_global_ambient;\n";
|
||||
out += "primary_fragment_color = clamp(diffuse_sum, vec4(0.0), vec4(1.0));\n";
|
||||
out += "secondary_fragment_color = clamp(specular_sum, vec4(0.0), vec4(1.0));\n";
|
||||
}
|
||||
|
||||
std::string GenerateFragmentShader(const PicaShaderConfig& config) {
|
||||
std::string out = R"(
|
||||
#version 330 core
|
||||
#define NUM_TEV_STAGES 6
|
||||
#define NUM_LIGHTS 8
|
||||
#define LIGHTING_LUT_SIZE 256
|
||||
#define FLOAT_255 (255.0 / 256.0)
|
||||
|
||||
in vec4 primary_color;
|
||||
in vec2 texcoord[3];
|
||||
in vec4 normquat;
|
||||
in vec3 view;
|
||||
|
||||
out vec4 color;
|
||||
|
||||
struct LightSrc {
|
||||
vec3 specular_0;
|
||||
vec3 specular_1;
|
||||
vec3 diffuse;
|
||||
vec3 ambient;
|
||||
vec3 position;
|
||||
};
|
||||
|
||||
layout (std140) uniform shader_data {
|
||||
vec4 const_color[NUM_TEV_STAGES];
|
||||
vec4 tev_combiner_buffer_color;
|
||||
int alphatest_ref;
|
||||
float depth_offset;
|
||||
vec3 lighting_global_ambient;
|
||||
LightSrc light_src[NUM_LIGHTS];
|
||||
};
|
||||
|
||||
uniform sampler2D tex[3];
|
||||
uniform sampler1D lut[6];
|
||||
|
||||
// Rotate the vector v by the quaternion q
|
||||
vec3 quaternion_rotate(vec4 q, vec3 v) {
|
||||
return v + 2.0 * cross(q.xyz, cross(q.xyz, v) + q.w * v);
|
||||
}
|
||||
|
||||
void main() {
|
||||
vec4 primary_fragment_color = vec4(0.0);
|
||||
vec4 secondary_fragment_color = vec4(0.0);
|
||||
)";
|
||||
|
||||
// Do not do any sort of processing if it's obvious we're not going to pass the alpha test
|
||||
@ -348,6 +549,9 @@ void main() {
|
||||
return out;
|
||||
}
|
||||
|
||||
if (config.lighting.enable)
|
||||
WriteLighting(out, config);
|
||||
|
||||
out += "vec4 combiner_buffer = vec4(0.0);\n";
|
||||
out += "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n";
|
||||
out += "vec4 last_tex_env_out = vec4(0.0);\n";
|
||||
@ -369,21 +573,28 @@ void main() {
|
||||
|
||||
std::string GenerateVertexShader() {
|
||||
std::string out = "#version 330 core\n";
|
||||
|
||||
out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + ") in vec4 vert_position;\n";
|
||||
out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n";
|
||||
out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + ") in vec2 vert_texcoord0;\n";
|
||||
out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + ") in vec2 vert_texcoord1;\n";
|
||||
out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + ") in vec2 vert_texcoord2;\n";
|
||||
out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT) + ") in vec4 vert_normquat;\n";
|
||||
out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n";
|
||||
|
||||
out += R"(
|
||||
out vec4 primary_color;
|
||||
out vec2 texcoord[3];
|
||||
out vec4 normquat;
|
||||
out vec3 view;
|
||||
|
||||
void main() {
|
||||
primary_color = vert_color;
|
||||
texcoord[0] = vert_texcoord0;
|
||||
texcoord[1] = vert_texcoord1;
|
||||
texcoord[2] = vert_texcoord2;
|
||||
normquat = vert_normquat;
|
||||
view = vert_view;
|
||||
gl_Position = vec4(vert_position.x, vert_position.y, -vert_position.z, vert_position.w);
|
||||
}
|
||||
)";
|
||||
|
@ -14,6 +14,8 @@ enum Attributes {
|
||||
ATTRIBUTE_TEXCOORD0,
|
||||
ATTRIBUTE_TEXCOORD1,
|
||||
ATTRIBUTE_TEXCOORD2,
|
||||
ATTRIBUTE_NORMQUAT,
|
||||
ATTRIBUTE_VIEW,
|
||||
};
|
||||
|
||||
/**
|
||||
|
@ -170,6 +170,14 @@ void OpenGLState::Apply() {
|
||||
}
|
||||
}
|
||||
|
||||
// Lighting LUTs
|
||||
for (unsigned i = 0; i < ARRAY_SIZE(lighting_lut); ++i) {
|
||||
if (lighting_lut[i].texture_1d != cur_state.lighting_lut[i].texture_1d) {
|
||||
glActiveTexture(GL_TEXTURE3 + i);
|
||||
glBindTexture(GL_TEXTURE_1D, lighting_lut[i].texture_1d);
|
||||
}
|
||||
}
|
||||
|
||||
// Framebuffer
|
||||
if (draw.framebuffer != cur_state.draw.framebuffer) {
|
||||
glBindFramebuffer(GL_FRAMEBUFFER, draw.framebuffer);
|
||||
|
@ -61,6 +61,10 @@ public:
|
||||
GLuint sampler; // GL_SAMPLER_BINDING
|
||||
} texture_units[3];
|
||||
|
||||
struct {
|
||||
GLuint texture_1d; // GL_TEXTURE_BINDING_1D
|
||||
} lighting_lut[6];
|
||||
|
||||
struct {
|
||||
GLuint framebuffer; // GL_DRAW_FRAMEBUFFER_BINDING
|
||||
GLuint vertex_array; // GL_VERTEX_ARRAY_BINDING
|
||||
|
@ -10,6 +10,9 @@
|
||||
|
||||
#include "video_core/pica.h"
|
||||
|
||||
using GLvec3 = std::array<GLfloat, 3>;
|
||||
using GLvec4 = std::array<GLfloat, 4>;
|
||||
|
||||
namespace PicaToGL {
|
||||
|
||||
inline GLenum TextureFilterMode(Pica::Regs::TextureConfig::TextureFilter mode) {
|
||||
@ -175,7 +178,7 @@ inline GLenum StencilOp(Pica::Regs::StencilAction action) {
|
||||
return stencil_op_table[(unsigned)action];
|
||||
}
|
||||
|
||||
inline std::array<GLfloat, 4> ColorRGBA8(const u32 color) {
|
||||
inline GLvec4 ColorRGBA8(const u32 color) {
|
||||
return { { (color >> 0 & 0xFF) / 255.0f,
|
||||
(color >> 8 & 0xFF) / 255.0f,
|
||||
(color >> 16 & 0xFF) / 255.0f,
|
||||
@ -183,4 +186,11 @@ inline std::array<GLfloat, 4> ColorRGBA8(const u32 color) {
|
||||
} };
|
||||
}
|
||||
|
||||
inline std::array<GLfloat, 3> LightColor(const Pica::Regs::LightColor& color) {
|
||||
return { { color.r / 255.0f,
|
||||
color.g / 255.0f,
|
||||
color.b / 255.0f
|
||||
} };
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
@ -81,8 +81,8 @@ struct ScreenRectVertex {
|
||||
* The projection part of the matrix is trivial, hence these operations are represented
|
||||
* by a 3x2 matrix.
|
||||
*/
|
||||
static std::array<GLfloat, 3*2> MakeOrthographicMatrix(const float width, const float height) {
|
||||
std::array<GLfloat, 3*2> matrix;
|
||||
static std::array<GLfloat, 3 * 2> MakeOrthographicMatrix(const float width, const float height) {
|
||||
std::array<GLfloat, 3 * 2> matrix;
|
||||
|
||||
matrix[0] = 2.f / width; matrix[2] = 0.f; matrix[4] = -1.f;
|
||||
matrix[1] = 0.f; matrix[3] = -2.f / height; matrix[5] = 1.f;
|
||||
|
@ -134,11 +134,13 @@ OutputVertex Run(UnitState<false>& state, const InputVertex& input, int num_attr
|
||||
std::fmin(std::fabs(ret.color[i].ToFloat32()), 1.0f));
|
||||
}
|
||||
|
||||
LOG_TRACE(Render_Software, "Output vertex: pos (%.2f, %.2f, %.2f, %.2f), quat (%.2f, %.2f, %.2f, %.2f), col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f)",
|
||||
LOG_TRACE(Render_Software, "Output vertex: pos(%.2f, %.2f, %.2f, %.2f), quat(%.2f, %.2f, %.2f, %.2f), "
|
||||
"col(%.2f, %.2f, %.2f, %.2f), tc0(%.2f, %.2f), view(%.2f, %.2f, %.2f)",
|
||||
ret.pos.x.ToFloat32(), ret.pos.y.ToFloat32(), ret.pos.z.ToFloat32(), ret.pos.w.ToFloat32(),
|
||||
ret.quat.x.ToFloat32(), ret.quat.y.ToFloat32(), ret.quat.z.ToFloat32(), ret.quat.w.ToFloat32(),
|
||||
ret.color.x.ToFloat32(), ret.color.y.ToFloat32(), ret.color.z.ToFloat32(), ret.color.w.ToFloat32(),
|
||||
ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32());
|
||||
ret.tc0.u().ToFloat32(), ret.tc0.v().ToFloat32(),
|
||||
ret.view.x.ToFloat32(), ret.view.y.ToFloat32(), ret.view.z.ToFloat32());
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
@ -37,17 +37,19 @@ struct OutputVertex {
|
||||
Math::Vec4<float24> color;
|
||||
Math::Vec2<float24> tc0;
|
||||
Math::Vec2<float24> tc1;
|
||||
float24 pad[6];
|
||||
INSERT_PADDING_WORDS(2);
|
||||
Math::Vec3<float24> view;
|
||||
INSERT_PADDING_WORDS(1);
|
||||
Math::Vec2<float24> tc2;
|
||||
|
||||
// Padding for optimal alignment
|
||||
float24 pad2[4];
|
||||
INSERT_PADDING_WORDS(4);
|
||||
|
||||
// Attributes used to store intermediate results
|
||||
|
||||
// position after perspective divide
|
||||
Math::Vec3<float24> screenpos;
|
||||
float24 pad3;
|
||||
INSERT_PADDING_WORDS(1);
|
||||
|
||||
// Linear interpolation
|
||||
// factor: 0=this, 1=vtx
|
||||
|
Loading…
Reference in New Issue
Block a user