diff --git a/CMakeLists.txt b/CMakeLists.txt index d628ecc50..8f2898973 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -152,12 +152,15 @@ if (ENABLE_SDL2) download_bundled_external("sdl2/" ${SDL2_VER} SDL2_PREFIX) endif() + set(SDL2_FOUND YES) set(SDL2_INCLUDE_DIR "${SDL2_PREFIX}/include" CACHE PATH "Path to SDL2 headers") set(SDL2_LIBRARY "${SDL2_PREFIX}/lib/x64/SDL2.lib" CACHE PATH "Path to SDL2 library") set(SDL2_DLL_DIR "${SDL2_PREFIX}/lib/x64/" CACHE PATH "Path to SDL2.dll") else() find_package(SDL2 REQUIRED) endif() +else() + set(SDL2_FOUND NO) endif() IF (APPLE) diff --git a/src/audio_core/CMakeLists.txt b/src/audio_core/CMakeLists.txt index 5a2747e78..13b5e400e 100644 --- a/src/audio_core/CMakeLists.txt +++ b/src/audio_core/CMakeLists.txt @@ -4,6 +4,7 @@ set(SRCS hle/dsp.cpp hle/filter.cpp hle/pipe.cpp + hle/source.cpp interpolate.cpp sink_details.cpp ) @@ -15,6 +16,7 @@ set(HEADERS hle/dsp.h hle/filter.h hle/pipe.h + hle/source.h interpolate.h null_sink.h sink.h @@ -23,7 +25,18 @@ set(HEADERS include_directories(../../externals/soundtouch/include) +if(SDL2_FOUND) + set(SRCS ${SRCS} sdl2_sink.cpp) + set(HEADERS ${HEADERS} sdl2_sink.h) + include_directories(${SDL2_INCLUDE_DIR}) +endif() + create_directory_groups(${SRCS} ${HEADERS}) add_library(audio_core STATIC ${SRCS} ${HEADERS}) target_link_libraries(audio_core SoundTouch) + +if(SDL2_FOUND) + target_link_libraries(audio_core ${SDL2_LIBRARY}) + set_property(TARGET audio_core APPEND PROPERTY COMPILE_DEFINITIONS HAVE_SDL2) +endif() diff --git a/src/audio_core/hle/common.h b/src/audio_core/hle/common.h index 7910f42ae..596b67eaf 100644 --- a/src/audio_core/hle/common.h +++ b/src/audio_core/hle/common.h @@ -27,7 +27,7 @@ using QuadFrame32 = std::array, samples_per_frame>; */ template void FilterFrame(FrameT& frame, FilterT& filter) { - std::transform(frame.begin(), frame.end(), frame.begin(), [&filter](const typename FrameT::value_type& sample) { + std::transform(frame.begin(), frame.end(), frame.begin(), [&filter](const auto& sample) { return filter.ProcessSample(sample); }); } diff --git a/src/audio_core/hle/dsp.cpp b/src/audio_core/hle/dsp.cpp index 4d44bd2d9..0cdbdb06a 100644 --- a/src/audio_core/hle/dsp.cpp +++ b/src/audio_core/hle/dsp.cpp @@ -2,10 +2,12 @@ // Licensed under GPLv2 or any later version // Refer to the license.txt file included. +#include #include #include "audio_core/hle/dsp.h" #include "audio_core/hle/pipe.h" +#include "audio_core/hle/source.h" #include "audio_core/sink.h" namespace DSP { @@ -38,16 +40,38 @@ static SharedMemory& WriteRegion() { return g_regions[1 - CurrentRegionIndex()]; } +static std::array sources = { + Source(0), Source(1), Source(2), Source(3), Source(4), Source(5), + Source(6), Source(7), Source(8), Source(9), Source(10), Source(11), + Source(12), Source(13), Source(14), Source(15), Source(16), Source(17), + Source(18), Source(19), Source(20), Source(21), Source(22), Source(23) +}; + static std::unique_ptr sink; void Init() { DSP::HLE::ResetPipes(); + for (auto& source : sources) { + source.Reset(); + } } void Shutdown() { } bool Tick() { + SharedMemory& read = ReadRegion(); + SharedMemory& write = WriteRegion(); + + std::array intermediate_mixes = {}; + + for (size_t i = 0; i < num_sources; i++) { + write.source_statuses.status[i] = sources[i].Tick(read.source_configurations.config[i], read.adpcm_coefficients.coeff[i]); + for (size_t mix = 0; mix < 3; mix++) { + sources[i].MixInto(intermediate_mixes[mix], mix); + } + } + return true; } diff --git a/src/audio_core/hle/dsp.h b/src/audio_core/hle/dsp.h index 4f2410c27..f6e53f68f 100644 --- a/src/audio_core/hle/dsp.h +++ b/src/audio_core/hle/dsp.h @@ -33,13 +33,9 @@ namespace HLE { // double-buffer. The frame counter is located as the very last u16 of each region and is incremented // each audio tick. -struct SharedMemory; - constexpr VAddr region0_base = 0x1FF50000; constexpr VAddr region1_base = 0x1FF70000; -extern std::array g_regions; - /** * The DSP is native 16-bit. The DSP also appears to be big-endian. When reading 32-bit numbers from * its memory regions, the higher and lower 16-bit halves are swapped compared to the little-endian @@ -169,9 +165,9 @@ struct SourceConfiguration { float_le rate_multiplier; enum class InterpolationMode : u8 { - None = 0, + Polyphase = 0, Linear = 1, - Polyphase = 2 + None = 2 }; InterpolationMode interpolation_mode; @@ -318,10 +314,10 @@ ASSERT_DSP_STRUCT(SourceConfiguration::Configuration::Buffer, 20); struct SourceStatus { struct Status { u8 is_enabled; ///< Is this channel enabled? (Doesn't have to be playing anything.) - u8 previous_buffer_id_dirty; ///< Non-zero when previous_buffer_id changes + u8 current_buffer_id_dirty; ///< Non-zero when current_buffer_id changes u16_le sync; ///< Is set by the DSP to the value of SourceConfiguration::sync u32_dsp buffer_position; ///< Number of samples into the current buffer - u16_le previous_buffer_id; ///< Updated when a buffer finishes playing + u16_le current_buffer_id; ///< Updated when a buffer finishes playing INSERT_PADDING_DSPWORDS(1); }; @@ -507,6 +503,8 @@ struct SharedMemory { }; ASSERT_DSP_STRUCT(SharedMemory, 0x8000); +extern std::array g_regions; + // Structures must have an offset that is a multiple of two. static_assert(offsetof(SharedMemory, frame_counter) % 2 == 0, "Structures in DSP::HLE::SharedMemory must be 2-byte aligned"); static_assert(offsetof(SharedMemory, source_configurations) % 2 == 0, "Structures in DSP::HLE::SharedMemory must be 2-byte aligned"); diff --git a/src/audio_core/hle/filter.h b/src/audio_core/hle/filter.h index 75738f600..43d2035cd 100644 --- a/src/audio_core/hle/filter.h +++ b/src/audio_core/hle/filter.h @@ -16,6 +16,7 @@ namespace HLE { /// Preprocessing filters. There is an independent set of filters for each Source. class SourceFilters final { +public: SourceFilters() { Reset(); } /// Reset internal state. diff --git a/src/audio_core/hle/pipe.cpp b/src/audio_core/hle/pipe.cpp index 03280780f..44dff1345 100644 --- a/src/audio_core/hle/pipe.cpp +++ b/src/audio_core/hle/pipe.cpp @@ -36,12 +36,17 @@ std::vector PipeRead(DspPipe pipe_number, u32 length) { return {}; } + if (length > UINT16_MAX) { // Can only read at most UINT16_MAX from the pipe + LOG_ERROR(Audio_DSP, "length of %u greater than max of %u", length, UINT16_MAX); + return {}; + } + std::vector& data = pipe_data[pipe_index]; if (length > data.size()) { LOG_WARNING(Audio_DSP, "pipe_number = %zu is out of data, application requested read of %u but %zu remain", pipe_index, length, data.size()); - length = data.size(); + length = static_cast(data.size()); } if (length == 0) @@ -94,7 +99,7 @@ static void AudioPipeWriteStructAddresses() { }; // Begin with a u16 denoting the number of structs. - WriteU16(DspPipe::Audio, struct_addresses.size()); + WriteU16(DspPipe::Audio, static_cast(struct_addresses.size())); // Then write the struct addresses. for (u16 addr : struct_addresses) { WriteU16(DspPipe::Audio, addr); diff --git a/src/audio_core/hle/pipe.h b/src/audio_core/hle/pipe.h index 64d97f8ba..b714c0496 100644 --- a/src/audio_core/hle/pipe.h +++ b/src/audio_core/hle/pipe.h @@ -24,10 +24,14 @@ enum class DspPipe { constexpr size_t NUM_DSP_PIPE = 8; /** - * Read a DSP pipe. - * @param pipe_number The Pipe ID - * @param length How much data to request. - * @return The data read from the pipe. The size of this vector can be less than the length requested. + * Reads `length` bytes from the DSP pipe identified with `pipe_number`. + * @note Can read up to the maximum value of a u16 in bytes (65,535). + * @note IF an error is encoutered with either an invalid `pipe_number` or `length` value, an empty vector will be returned. + * @note IF `length` is set to 0, an empty vector will be returned. + * @note IF `length` is greater than the amount of data available, this function will only read the available amount. + * @param pipe_number a `DspPipe` + * @param length the number of bytes to read. The max is 65,535 (max of u16). + * @returns a vector of bytes from the specified pipe. On error, will be empty. */ std::vector PipeRead(DspPipe pipe_number, u32 length); diff --git a/src/audio_core/hle/source.cpp b/src/audio_core/hle/source.cpp new file mode 100644 index 000000000..30552fe26 --- /dev/null +++ b/src/audio_core/hle/source.cpp @@ -0,0 +1,320 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include "audio_core/codec.h" +#include "audio_core/hle/common.h" +#include "audio_core/hle/source.h" +#include "audio_core/interpolate.h" + +#include "common/assert.h" +#include "common/logging/log.h" + +#include "core/memory.h" + +namespace DSP { +namespace HLE { + +SourceStatus::Status Source::Tick(SourceConfiguration::Configuration& config, const s16_le (&adpcm_coeffs)[16]) { + ParseConfig(config, adpcm_coeffs); + + if (state.enabled) { + GenerateFrame(); + } + + return GetCurrentStatus(); +} + +void Source::MixInto(QuadFrame32& dest, size_t intermediate_mix_id) const { + if (!state.enabled) + return; + + const std::array& gains = state.gain.at(intermediate_mix_id); + for (size_t samplei = 0; samplei < samples_per_frame; samplei++) { + // Conversion from stereo (current_frame) to quadraphonic (dest) occurs here. + dest[samplei][0] += static_cast(gains[0] * current_frame[samplei][0]); + dest[samplei][1] += static_cast(gains[1] * current_frame[samplei][1]); + dest[samplei][2] += static_cast(gains[2] * current_frame[samplei][0]); + dest[samplei][3] += static_cast(gains[3] * current_frame[samplei][1]); + } +} + +void Source::Reset() { + current_frame.fill({}); + state = {}; +} + +void Source::ParseConfig(SourceConfiguration::Configuration& config, const s16_le (&adpcm_coeffs)[16]) { + if (!config.dirty_raw) { + return; + } + + if (config.reset_flag) { + config.reset_flag.Assign(0); + Reset(); + LOG_TRACE(Audio_DSP, "source_id=%zu reset", source_id); + } + + if (config.partial_reset_flag) { + config.partial_reset_flag.Assign(0); + state.input_queue = std::priority_queue, BufferOrder>{}; + LOG_TRACE(Audio_DSP, "source_id=%zu partial_reset", source_id); + } + + if (config.enable_dirty) { + config.enable_dirty.Assign(0); + state.enabled = config.enable != 0; + LOG_TRACE(Audio_DSP, "source_id=%zu enable=%d", source_id, state.enabled); + } + + if (config.sync_dirty) { + config.sync_dirty.Assign(0); + state.sync = config.sync; + LOG_TRACE(Audio_DSP, "source_id=%zu sync=%u", source_id, state.sync); + } + + if (config.rate_multiplier_dirty) { + config.rate_multiplier_dirty.Assign(0); + state.rate_multiplier = config.rate_multiplier; + LOG_TRACE(Audio_DSP, "source_id=%zu rate=%f", source_id, state.rate_multiplier); + + if (state.rate_multiplier <= 0) { + LOG_ERROR(Audio_DSP, "Was given an invalid rate multiplier: source_id=%zu rate=%f", source_id, state.rate_multiplier); + state.rate_multiplier = 1.0f; + // Note: Actual firmware starts producing garbage if this occurs. + } + } + + if (config.adpcm_coefficients_dirty) { + config.adpcm_coefficients_dirty.Assign(0); + std::transform(adpcm_coeffs, adpcm_coeffs + state.adpcm_coeffs.size(), state.adpcm_coeffs.begin(), + [](const auto& coeff) { return static_cast(coeff); }); + LOG_TRACE(Audio_DSP, "source_id=%zu adpcm update", source_id); + } + + if (config.gain_0_dirty) { + config.gain_0_dirty.Assign(0); + std::transform(config.gain[0], config.gain[0] + state.gain[0].size(), state.gain[0].begin(), + [](const auto& coeff) { return static_cast(coeff); }); + LOG_TRACE(Audio_DSP, "source_id=%zu gain 0 update", source_id); + } + + if (config.gain_1_dirty) { + config.gain_1_dirty.Assign(0); + std::transform(config.gain[1], config.gain[1] + state.gain[1].size(), state.gain[1].begin(), + [](const auto& coeff) { return static_cast(coeff); }); + LOG_TRACE(Audio_DSP, "source_id=%zu gain 1 update", source_id); + } + + if (config.gain_2_dirty) { + config.gain_2_dirty.Assign(0); + std::transform(config.gain[2], config.gain[2] + state.gain[2].size(), state.gain[2].begin(), + [](const auto& coeff) { return static_cast(coeff); }); + LOG_TRACE(Audio_DSP, "source_id=%zu gain 2 update", source_id); + } + + if (config.filters_enabled_dirty) { + config.filters_enabled_dirty.Assign(0); + state.filters.Enable(config.simple_filter_enabled.ToBool(), config.biquad_filter_enabled.ToBool()); + LOG_TRACE(Audio_DSP, "source_id=%zu enable_simple=%hu enable_biquad=%hu", + source_id, config.simple_filter_enabled.Value(), config.biquad_filter_enabled.Value()); + } + + if (config.simple_filter_dirty) { + config.simple_filter_dirty.Assign(0); + state.filters.Configure(config.simple_filter); + LOG_TRACE(Audio_DSP, "source_id=%zu simple filter update", source_id); + } + + if (config.biquad_filter_dirty) { + config.biquad_filter_dirty.Assign(0); + state.filters.Configure(config.biquad_filter); + LOG_TRACE(Audio_DSP, "source_id=%zu biquad filter update", source_id); + } + + if (config.interpolation_dirty) { + config.interpolation_dirty.Assign(0); + state.interpolation_mode = config.interpolation_mode; + LOG_TRACE(Audio_DSP, "source_id=%zu interpolation_mode=%zu", source_id, static_cast(state.interpolation_mode)); + } + + if (config.format_dirty || config.embedded_buffer_dirty) { + config.format_dirty.Assign(0); + state.format = config.format; + LOG_TRACE(Audio_DSP, "source_id=%zu format=%zu", source_id, static_cast(state.format)); + } + + if (config.mono_or_stereo_dirty || config.embedded_buffer_dirty) { + config.mono_or_stereo_dirty.Assign(0); + state.mono_or_stereo = config.mono_or_stereo; + LOG_TRACE(Audio_DSP, "source_id=%zu mono_or_stereo=%zu", source_id, static_cast(state.mono_or_stereo)); + } + + if (config.embedded_buffer_dirty) { + config.embedded_buffer_dirty.Assign(0); + state.input_queue.emplace(Buffer{ + config.physical_address, + config.length, + static_cast(config.adpcm_ps), + { config.adpcm_yn[0], config.adpcm_yn[1] }, + config.adpcm_dirty.ToBool(), + config.is_looping.ToBool(), + config.buffer_id, + state.mono_or_stereo, + state.format, + false + }); + LOG_TRACE(Audio_DSP, "enqueuing embedded addr=0x%08x len=%u id=%hu", config.physical_address, config.length, config.buffer_id); + } + + if (config.buffer_queue_dirty) { + config.buffer_queue_dirty.Assign(0); + for (size_t i = 0; i < 4; i++) { + if (config.buffers_dirty & (1 << i)) { + const auto& b = config.buffers[i]; + state.input_queue.emplace(Buffer{ + b.physical_address, + b.length, + static_cast(b.adpcm_ps), + { b.adpcm_yn[0], b.adpcm_yn[1] }, + b.adpcm_dirty != 0, + b.is_looping != 0, + b.buffer_id, + state.mono_or_stereo, + state.format, + true + }); + LOG_TRACE(Audio_DSP, "enqueuing queued %zu addr=0x%08x len=%u id=%hu", i, b.physical_address, b.length, b.buffer_id); + } + } + config.buffers_dirty = 0; + } + + if (config.dirty_raw) { + LOG_DEBUG(Audio_DSP, "source_id=%zu remaining_dirty=%x", source_id, config.dirty_raw); + } + + config.dirty_raw = 0; +} + +void Source::GenerateFrame() { + current_frame.fill({}); + + if (state.current_buffer.empty() && !DequeueBuffer()) { + state.enabled = false; + state.buffer_update = true; + state.current_buffer_id = 0; + return; + } + + size_t frame_position = 0; + + state.current_sample_number = state.next_sample_number; + while (frame_position < current_frame.size()) { + if (state.current_buffer.empty() && !DequeueBuffer()) { + break; + } + + const size_t size_to_copy = std::min(state.current_buffer.size(), current_frame.size() - frame_position); + + std::copy(state.current_buffer.begin(), state.current_buffer.begin() + size_to_copy, current_frame.begin() + frame_position); + state.current_buffer.erase(state.current_buffer.begin(), state.current_buffer.begin() + size_to_copy); + + frame_position += size_to_copy; + state.next_sample_number += static_cast(size_to_copy); + } + + state.filters.ProcessFrame(current_frame); +} + + +bool Source::DequeueBuffer() { + ASSERT_MSG(state.current_buffer.empty(), "Shouldn't dequeue; we still have data in current_buffer"); + + if (state.input_queue.empty()) + return false; + + const Buffer buf = state.input_queue.top(); + state.input_queue.pop(); + + if (buf.adpcm_dirty) { + state.adpcm_state.yn1 = buf.adpcm_yn[0]; + state.adpcm_state.yn2 = buf.adpcm_yn[1]; + } + + if (buf.is_looping) { + LOG_ERROR(Audio_DSP, "Looped buffers are unimplemented at the moment"); + } + + const u8* const memory = Memory::GetPhysicalPointer(buf.physical_address); + if (memory) { + const unsigned num_channels = buf.mono_or_stereo == MonoOrStereo::Stereo ? 2 : 1; + switch (buf.format) { + case Format::PCM8: + state.current_buffer = Codec::DecodePCM8(num_channels, memory, buf.length); + break; + case Format::PCM16: + state.current_buffer = Codec::DecodePCM16(num_channels, memory, buf.length); + break; + case Format::ADPCM: + DEBUG_ASSERT(num_channels == 1); + state.current_buffer = Codec::DecodeADPCM(memory, buf.length, state.adpcm_coeffs, state.adpcm_state); + break; + default: + UNIMPLEMENTED(); + break; + } + } else { + LOG_WARNING(Audio_DSP, "source_id=%zu buffer_id=%hu length=%u: Invalid physical address 0x%08X", + source_id, buf.buffer_id, buf.length, buf.physical_address); + state.current_buffer.clear(); + return true; + } + + switch (state.interpolation_mode) { + case InterpolationMode::None: + state.current_buffer = AudioInterp::None(state.interp_state, state.current_buffer, state.rate_multiplier); + break; + case InterpolationMode::Linear: + state.current_buffer = AudioInterp::Linear(state.interp_state, state.current_buffer, state.rate_multiplier); + break; + case InterpolationMode::Polyphase: + // TODO(merry): Implement polyphase interpolation + state.current_buffer = AudioInterp::Linear(state.interp_state, state.current_buffer, state.rate_multiplier); + break; + default: + UNIMPLEMENTED(); + break; + } + + state.current_sample_number = 0; + state.next_sample_number = 0; + state.current_buffer_id = buf.buffer_id; + state.buffer_update = buf.from_queue; + + LOG_TRACE(Audio_DSP, "source_id=%zu buffer_id=%hu from_queue=%s current_buffer.size()=%zu", + source_id, buf.buffer_id, buf.from_queue ? "true" : "false", state.current_buffer.size()); + return true; +} + +SourceStatus::Status Source::GetCurrentStatus() { + SourceStatus::Status ret; + + // Applications depend on the correct emulation of + // current_buffer_id_dirty and current_buffer_id to synchronise + // audio with video. + ret.is_enabled = state.enabled; + ret.current_buffer_id_dirty = state.buffer_update ? 1 : 0; + state.buffer_update = false; + ret.current_buffer_id = state.current_buffer_id; + ret.buffer_position = state.current_sample_number; + ret.sync = state.sync; + + return ret; +} + +} // namespace HLE +} // namespace DSP diff --git a/src/audio_core/hle/source.h b/src/audio_core/hle/source.h new file mode 100644 index 000000000..7ee08d424 --- /dev/null +++ b/src/audio_core/hle/source.h @@ -0,0 +1,144 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include +#include + +#include "audio_core/codec.h" +#include "audio_core/hle/common.h" +#include "audio_core/hle/dsp.h" +#include "audio_core/hle/filter.h" +#include "audio_core/interpolate.h" + +#include "common/common_types.h" + +namespace DSP { +namespace HLE { + +/** + * This module performs: + * - Buffer management + * - Decoding of buffers + * - Buffer resampling and interpolation + * - Per-source filtering (SimpleFilter, BiquadFilter) + * - Per-source gain + * - Other per-source processing + */ +class Source final { +public: + explicit Source(size_t source_id_) : source_id(source_id_) { + Reset(); + } + + /// Resets internal state. + void Reset(); + + /** + * This is called once every audio frame. This performs per-source processing every frame. + * @param config The new configuration we've got for this Source from the application. + * @param adpcm_coeffs ADPCM coefficients to use if config tells us to use them (may contain invalid values otherwise). + * @return The current status of this Source. This is given back to the emulated application via SharedMemory. + */ + SourceStatus::Status Tick(SourceConfiguration::Configuration& config, const s16_le (&adpcm_coeffs)[16]); + + /** + * Mix this source's output into dest, using the gains for the `intermediate_mix_id`-th intermediate mixer. + * @param dest The QuadFrame32 to mix into. + * @param intermediate_mix_id The id of the intermediate mix whose gains we are using. + */ + void MixInto(QuadFrame32& dest, size_t intermediate_mix_id) const; + +private: + const size_t source_id; + StereoFrame16 current_frame; + + using Format = SourceConfiguration::Configuration::Format; + using InterpolationMode = SourceConfiguration::Configuration::InterpolationMode; + using MonoOrStereo = SourceConfiguration::Configuration::MonoOrStereo; + + /// Internal representation of a buffer for our buffer queue + struct Buffer { + PAddr physical_address; + u32 length; + u8 adpcm_ps; + std::array adpcm_yn; + bool adpcm_dirty; + bool is_looping; + u16 buffer_id; + + MonoOrStereo mono_or_stereo; + Format format; + + bool from_queue; + }; + + struct BufferOrder { + bool operator() (const Buffer& a, const Buffer& b) const { + // Lower buffer_id comes first. + return a.buffer_id > b.buffer_id; + } + }; + + struct { + + // State variables + + bool enabled = false; + u16 sync = 0; + + // Mixing + + std::array, 3> gain = {}; + + // Buffer queue + + std::priority_queue, BufferOrder> input_queue; + MonoOrStereo mono_or_stereo = MonoOrStereo::Mono; + Format format = Format::ADPCM; + + // Current buffer + + u32 current_sample_number = 0; + u32 next_sample_number = 0; + std::vector> current_buffer; + + // buffer_id state + + bool buffer_update = false; + u32 current_buffer_id = 0; + + // Decoding state + + std::array adpcm_coeffs = {}; + Codec::ADPCMState adpcm_state = {}; + + // Resampling state + + float rate_multiplier = 1.0; + InterpolationMode interpolation_mode = InterpolationMode::Polyphase; + AudioInterp::State interp_state = {}; + + // Filter state + + SourceFilters filters; + + } state; + + // Internal functions + + /// INTERNAL: Update our internal state based on the current config. + void ParseConfig(SourceConfiguration::Configuration& config, const s16_le (&adpcm_coeffs)[16]); + /// INTERNAL: Generate the current audio output for this frame based on our internal state. + void GenerateFrame(); + /// INTERNAL: Dequeues a buffer and does preprocessing on it (decoding, resampling). Puts it into current_buffer. + bool DequeueBuffer(); + /// INTERNAL: Generates a SourceStatus::Status based on our internal state. + SourceStatus::Status GetCurrentStatus(); +}; + +} // namespace HLE +} // namespace DSP diff --git a/src/audio_core/sdl2_sink.cpp b/src/audio_core/sdl2_sink.cpp new file mode 100644 index 000000000..dc75c04ee --- /dev/null +++ b/src/audio_core/sdl2_sink.cpp @@ -0,0 +1,126 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include + +#include + +#include "audio_core/audio_core.h" +#include "audio_core/sdl2_sink.h" + +#include "common/assert.h" +#include "common/logging/log.h" +#include + +namespace AudioCore { + +struct SDL2Sink::Impl { + unsigned int sample_rate = 0; + + SDL_AudioDeviceID audio_device_id = 0; + + std::list> queue; + + static void Callback(void* impl_, u8* buffer, int buffer_size_in_bytes); +}; + +SDL2Sink::SDL2Sink() : impl(std::make_unique()) { + if (SDL_Init(SDL_INIT_AUDIO) < 0) { + LOG_CRITICAL(Audio_Sink, "SDL_Init(SDL_INIT_AUDIO) failed"); + impl->audio_device_id = 0; + return; + } + + SDL_AudioSpec desired_audiospec; + SDL_zero(desired_audiospec); + desired_audiospec.format = AUDIO_S16; + desired_audiospec.channels = 2; + desired_audiospec.freq = native_sample_rate; + desired_audiospec.samples = 1024; + desired_audiospec.userdata = impl.get(); + desired_audiospec.callback = &Impl::Callback; + + SDL_AudioSpec obtained_audiospec; + SDL_zero(obtained_audiospec); + + impl->audio_device_id = SDL_OpenAudioDevice(nullptr, false, &desired_audiospec, &obtained_audiospec, 0); + if (impl->audio_device_id <= 0) { + LOG_CRITICAL(Audio_Sink, "SDL_OpenAudioDevice failed"); + return; + } + + impl->sample_rate = obtained_audiospec.freq; + + // SDL2 audio devices start out paused, unpause it: + SDL_PauseAudioDevice(impl->audio_device_id, 0); +} + +SDL2Sink::~SDL2Sink() { + if (impl->audio_device_id <= 0) + return; + + SDL_CloseAudioDevice(impl->audio_device_id); +} + +unsigned int SDL2Sink::GetNativeSampleRate() const { + if (impl->audio_device_id <= 0) + return native_sample_rate; + + return impl->sample_rate; +} + +void SDL2Sink::EnqueueSamples(const std::vector& samples) { + if (impl->audio_device_id <= 0) + return; + + ASSERT_MSG(samples.size() % 2 == 0, "Samples must be in interleaved stereo PCM16 format (size must be a multiple of two)"); + + SDL_LockAudioDevice(impl->audio_device_id); + impl->queue.emplace_back(samples); + SDL_UnlockAudioDevice(impl->audio_device_id); +} + +size_t SDL2Sink::SamplesInQueue() const { + if (impl->audio_device_id <= 0) + return 0; + + SDL_LockAudioDevice(impl->audio_device_id); + + size_t total_size = std::accumulate(impl->queue.begin(), impl->queue.end(), static_cast(0), + [](size_t sum, const auto& buffer) { + // Division by two because each stereo sample is made of two s16. + return sum + buffer.size() / 2; + }); + + SDL_UnlockAudioDevice(impl->audio_device_id); + + return total_size; +} + +void SDL2Sink::Impl::Callback(void* impl_, u8* buffer, int buffer_size_in_bytes) { + Impl* impl = reinterpret_cast(impl_); + + size_t remaining_size = static_cast(buffer_size_in_bytes) / sizeof(s16); // Keep track of size in 16-bit increments. + + while (remaining_size > 0 && !impl->queue.empty()) { + if (impl->queue.front().size() <= remaining_size) { + memcpy(buffer, impl->queue.front().data(), impl->queue.front().size() * sizeof(s16)); + buffer += impl->queue.front().size() * sizeof(s16); + remaining_size -= impl->queue.front().size(); + impl->queue.pop_front(); + } else { + memcpy(buffer, impl->queue.front().data(), remaining_size * sizeof(s16)); + buffer += remaining_size * sizeof(s16); + impl->queue.front().erase(impl->queue.front().begin(), impl->queue.front().begin() + remaining_size); + remaining_size = 0; + } + } + + if (remaining_size > 0) { + memset(buffer, 0, remaining_size * sizeof(s16)); + } +} + +} // namespace AudioCore diff --git a/src/audio_core/sdl2_sink.h b/src/audio_core/sdl2_sink.h new file mode 100644 index 000000000..0f296b673 --- /dev/null +++ b/src/audio_core/sdl2_sink.h @@ -0,0 +1,30 @@ +// Copyright 2016 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#pragma once + +#include +#include + +#include "audio_core/sink.h" + +namespace AudioCore { + +class SDL2Sink final : public Sink { +public: + SDL2Sink(); + ~SDL2Sink() override; + + unsigned int GetNativeSampleRate() const override; + + void EnqueueSamples(const std::vector& samples) override; + + size_t SamplesInQueue() const override; + +private: + struct Impl; + std::unique_ptr impl; +}; + +} // namespace AudioCore diff --git a/src/audio_core/sink.h b/src/audio_core/sink.h index cad21a85e..1c881c3d2 100644 --- a/src/audio_core/sink.h +++ b/src/audio_core/sink.h @@ -19,7 +19,7 @@ public: virtual ~Sink() = default; /// The native rate of this sink. The sink expects to be fed samples that respect this. (Units: samples/sec) - virtual unsigned GetNativeSampleRate() const = 0; + virtual unsigned int GetNativeSampleRate() const = 0; /** * Feed stereo samples to sink. diff --git a/src/audio_core/sink_details.cpp b/src/audio_core/sink_details.cpp index d2cc74103..ba5e83d17 100644 --- a/src/audio_core/sink_details.cpp +++ b/src/audio_core/sink_details.cpp @@ -8,10 +8,17 @@ #include "audio_core/null_sink.h" #include "audio_core/sink_details.h" +#ifdef HAVE_SDL2 +#include "audio_core/sdl2_sink.h" +#endif + namespace AudioCore { // g_sink_details is ordered in terms of desirability, with the best choice at the top. const std::vector g_sink_details = { +#ifdef HAVE_SDL2 + { "sdl2", []() { return std::make_unique(); } }, +#endif { "null", []() { return std::make_unique(); } }, }; diff --git a/src/citra/config.cpp b/src/citra/config.cpp index 1b14c0b1c..684eba338 100644 --- a/src/citra/config.cpp +++ b/src/citra/config.cpp @@ -88,7 +88,7 @@ void Config::ReadValues() { // Debugging Settings::values.use_gdbstub = sdl2_config->GetBoolean("Debugging", "use_gdbstub", false); - Settings::values.gdbstub_port = sdl2_config->GetInteger("Debugging", "gdbstub_port", 24689); + Settings::values.gdbstub_port = static_cast(sdl2_config->GetInteger("Debugging", "gdbstub_port", 24689)); } void Config::Reload() { diff --git a/src/citra/default_ini.h b/src/citra/default_ini.h index e6647a277..859185317 100644 --- a/src/citra/default_ini.h +++ b/src/citra/default_ini.h @@ -58,7 +58,7 @@ bg_green = [Audio] # Which audio output engine to use. -# auto (default): Auto-select, null: No audio output +# auto (default): Auto-select, null: No audio output, sdl2: SDL2 (if available) output_engine = [Data Storage] diff --git a/src/citra/emu_window/emu_window_sdl2.cpp b/src/citra/emu_window/emu_window_sdl2.cpp index 924189f4c..12cdd9d95 100644 --- a/src/citra/emu_window/emu_window_sdl2.cpp +++ b/src/citra/emu_window/emu_window_sdl2.cpp @@ -9,6 +9,8 @@ #define SDL_MAIN_HANDLED #include +#include + #include "common/key_map.h" #include "common/logging/log.h" #include "common/scm_rev.h" @@ -98,6 +100,11 @@ EmuWindow_SDL2::EmuWindow_SDL2() { exit(1); } + if (!gladLoadGLLoader(static_cast(SDL_GL_GetProcAddress))) { + LOG_CRITICAL(Frontend, "Failed to initialize GL functions! Exiting..."); + exit(1); + } + OnResize(); OnMinimalClientAreaChangeRequest(GetActiveConfig().min_client_area_size); SDL_PumpEvents(); diff --git a/src/citra_qt/CMakeLists.txt b/src/citra_qt/CMakeLists.txt index cc9e0c624..3f0099200 100644 --- a/src/citra_qt/CMakeLists.txt +++ b/src/citra_qt/CMakeLists.txt @@ -55,6 +55,7 @@ set(HEADERS configure_dialog.h configure_general.h game_list.h + game_list_p.h hotkeys.h main.h ui_settings.h diff --git a/src/citra_qt/debugger/graphics_breakpoints.cpp b/src/citra_qt/debugger/graphics_breakpoints.cpp index c8510128a..fe66918a8 100644 --- a/src/citra_qt/debugger/graphics_breakpoints.cpp +++ b/src/citra_qt/debugger/graphics_breakpoints.cpp @@ -44,7 +44,7 @@ QVariant BreakPointModel::data(const QModelIndex& index, int role) const { Pica::DebugContext::Event::PicaCommandProcessed, tr("Pica command processed") }, { Pica::DebugContext::Event::IncomingPrimitiveBatch, tr("Incoming primitive batch") }, { Pica::DebugContext::Event::FinishedPrimitiveBatch, tr("Finished primitive batch") }, - { Pica::DebugContext::Event::VertexLoaded, tr("Vertex loaded") }, + { Pica::DebugContext::Event::VertexShaderInvocation, tr("Vertex shader invocation") }, { Pica::DebugContext::Event::IncomingDisplayTransfer, tr("Incoming display transfer") }, { Pica::DebugContext::Event::GSPCommandProcessed, tr("GSP command processed") }, { Pica::DebugContext::Event::BufferSwapped, tr("Buffers swapped") } diff --git a/src/citra_qt/debugger/graphics_vertex_shader.cpp b/src/citra_qt/debugger/graphics_vertex_shader.cpp index d648d4640..391666d35 100644 --- a/src/citra_qt/debugger/graphics_vertex_shader.cpp +++ b/src/citra_qt/debugger/graphics_vertex_shader.cpp @@ -365,7 +365,7 @@ GraphicsVertexShaderWidget::GraphicsVertexShaderWidget(std::shared_ptr< Pica::De input_data[i]->setValidator(new QDoubleValidator(input_data[i])); } - breakpoint_warning = new QLabel(tr("(data only available at VertexLoaded breakpoints)")); + breakpoint_warning = new QLabel(tr("(data only available at vertex shader invocation breakpoints)")); // TODO: Add some button for jumping to the shader entry point @@ -454,7 +454,7 @@ GraphicsVertexShaderWidget::GraphicsVertexShaderWidget(std::shared_ptr< Pica::De void GraphicsVertexShaderWidget::OnBreakPointHit(Pica::DebugContext::Event event, void* data) { auto input = static_cast(data); - if (event == Pica::DebugContext::Event::VertexLoaded) { + if (event == Pica::DebugContext::Event::VertexShaderInvocation) { Reload(true, data); } else { // No vertex data is retrievable => invalidate currently stored vertex data @@ -501,7 +501,7 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d info.labels.insert({ entry_point, "main" }); // Generate debug information - debug_data = Pica::Shader::ProduceDebugInfo(input_vertex, num_attributes, shader_config, shader_setup); + debug_data = Pica::g_state.vs.ProduceDebugInfo(input_vertex, num_attributes, shader_config, shader_setup); // Reload widget state for (int attr = 0; attr < num_attributes; ++attr) { @@ -515,7 +515,7 @@ void GraphicsVertexShaderWidget::Reload(bool replace_vertex_data, void* vertex_d } // Initialize debug info text for current cycle count - cycle_index->setMaximum(debug_data.records.size() - 1); + cycle_index->setMaximum(static_cast(debug_data.records.size() - 1)); OnCycleIndexChanged(cycle_index->value()); model->endResetModel(); diff --git a/src/citra_qt/game_list.cpp b/src/citra_qt/game_list.cpp index d14532102..d4ac9c96e 100644 --- a/src/citra_qt/game_list.cpp +++ b/src/citra_qt/game_list.cpp @@ -34,8 +34,8 @@ GameList::GameList(QWidget* parent) tree_view->setUniformRowHeights(true); item_model->insertColumns(0, COLUMN_COUNT); - item_model->setHeaderData(COLUMN_FILE_TYPE, Qt::Horizontal, "File type"); item_model->setHeaderData(COLUMN_NAME, Qt::Horizontal, "Name"); + item_model->setHeaderData(COLUMN_FILE_TYPE, Qt::Horizontal, "File type"); item_model->setHeaderData(COLUMN_SIZE, Qt::Horizontal, "Size"); connect(tree_view, SIGNAL(activated(const QModelIndex&)), this, SLOT(ValidateEntry(const QModelIndex&))); @@ -109,7 +109,11 @@ void GameList::SaveInterfaceLayout() void GameList::LoadInterfaceLayout() { auto header = tree_view->header(); - header->restoreState(UISettings::values.gamelist_header_state); + if (!header->restoreState(UISettings::values.gamelist_header_state)) { + // We are using the name column to display icons and titles + // so make it as large as possible as default. + header->resizeSection(COLUMN_NAME, header->width()); + } item_model->sort(header->sortIndicatorSection(), header->sortIndicatorOrder()); } @@ -143,9 +147,15 @@ void GameListWorker::AddFstEntriesToGameList(const std::string& dir_path, bool d LOG_WARNING(Frontend, "Filetype and extension of file %s do not match.", physical_name.c_str()); } + std::vector smdh; + std::unique_ptr loader = Loader::GetLoader(FileUtil::IOFile(physical_name, "rb"), filetype, filename_filename, physical_name); + + if (loader) + loader->ReadIcon(smdh); + emit EntryReady({ + new GameListItemPath(QString::fromStdString(physical_name), smdh), new GameListItem(QString::fromStdString(Loader::GetFileTypeString(filetype))), - new GameListItemPath(QString::fromStdString(physical_name)), new GameListItemSize(FileUtil::GetSize(physical_name)), }); } diff --git a/src/citra_qt/game_list.h b/src/citra_qt/game_list.h index 48febdc60..198674f04 100644 --- a/src/citra_qt/game_list.h +++ b/src/citra_qt/game_list.h @@ -20,8 +20,8 @@ class GameList : public QWidget { public: enum { - COLUMN_FILE_TYPE, COLUMN_NAME, + COLUMN_FILE_TYPE, COLUMN_SIZE, COLUMN_COUNT, // Number of columns }; diff --git a/src/citra_qt/game_list_p.h b/src/citra_qt/game_list_p.h index 820012bce..284f5da81 100644 --- a/src/citra_qt/game_list_p.h +++ b/src/citra_qt/game_list_p.h @@ -6,13 +6,85 @@ #include +#include #include #include #include #include "citra_qt/util/util.h" #include "common/string_util.h" +#include "common/color.h" +#include "core/loader/loader.h" + +#include "video_core/utils.h" + +/** + * Tests if data is a valid SMDH by its length and magic number. + * @param smdh_data data buffer to test + * @return bool test result + */ +static bool IsValidSMDH(const std::vector& smdh_data) { + if (smdh_data.size() < sizeof(Loader::SMDH)) + return false; + + u32 magic; + memcpy(&magic, smdh_data.data(), 4); + + return Loader::MakeMagic('S', 'M', 'D', 'H') == magic; +} + +/** + * Gets game icon from SMDH + * @param sdmh SMDH data + * @param large If true, returns large icon (48x48), otherwise returns small icon (24x24) + * @return QPixmap game icon + */ +static QPixmap GetIconFromSMDH(const Loader::SMDH& smdh, bool large) { + u32 size; + const u8* icon_data; + + if (large) { + size = 48; + icon_data = smdh.large_icon.data(); + } else { + size = 24; + icon_data = smdh.small_icon.data(); + } + + QImage icon(size, size, QImage::Format::Format_RGB888); + for (u32 x = 0; x < size; ++x) { + for (u32 y = 0; y < size; ++y) { + u32 coarse_y = y & ~7; + auto v = Color::DecodeRGB565( + icon_data + VideoCore::GetMortonOffset(x, y, 2) + coarse_y * size * 2); + icon.setPixel(x, y, qRgb(v.r(), v.g(), v.b())); + } + } + return QPixmap::fromImage(icon); +} + +/** + * Gets the default icon (for games without valid SMDH) + * @param large If true, returns large icon (48x48), otherwise returns small icon (24x24) + * @return QPixmap default icon + */ +static QPixmap GetDefaultIcon(bool large) { + int size = large ? 48 : 24; + QPixmap icon(size, size); + icon.fill(Qt::transparent); + return icon; +} + +/** + * Gets the short game title fromn SMDH + * @param sdmh SMDH data + * @param language title language + * @return QString short title + */ +static QString GetShortTitleFromSMDH(const Loader::SMDH& smdh, Loader::SMDH::TitleLanguage language) { + return QString::fromUtf16(smdh.titles[static_cast(language)].short_title.data()); +} class GameListItem : public QStandardItem { @@ -27,29 +99,43 @@ public: * A specialization of GameListItem for path values. * This class ensures that for every full path value it holds, a correct string representation * of just the filename (with no extension) will be displayed to the user. + * If this class recieves valid SMDH data, it will also display game icons and titles. */ class GameListItemPath : public GameListItem { public: static const int FullPathRole = Qt::UserRole + 1; + static const int TitleRole = Qt::UserRole + 2; GameListItemPath(): GameListItem() {} - GameListItemPath(const QString& game_path): GameListItem() + GameListItemPath(const QString& game_path, const std::vector& smdh_data): GameListItem() { setData(game_path, FullPathRole); + + if (!IsValidSMDH(smdh_data)) { + // SMDH is not valid, set a default icon + setData(GetDefaultIcon(true), Qt::DecorationRole); + return; + } + + Loader::SMDH smdh; + memcpy(&smdh, smdh_data.data(), sizeof(Loader::SMDH)); + + // Get icon from SMDH + setData(GetIconFromSMDH(smdh, true), Qt::DecorationRole); + + // Get title form SMDH + setData(GetShortTitleFromSMDH(smdh, Loader::SMDH::TitleLanguage::English), TitleRole); } - void setData(const QVariant& value, int role) override - { - // By specializing setData for FullPathRole, we can ensure that the two string - // representations of the data are always accurate and in the correct format. - if (role == FullPathRole) { + QVariant data(int role) const override { + if (role == Qt::DisplayRole) { std::string filename; - Common::SplitPath(value.toString().toStdString(), nullptr, &filename, nullptr); - GameListItem::setData(QString::fromStdString(filename), Qt::DisplayRole); - GameListItem::setData(value, FullPathRole); + Common::SplitPath(data(FullPathRole).toString().toStdString(), nullptr, &filename, nullptr); + QString title = data(TitleRole).toString(); + return QString::fromStdString(filename) + (title.isEmpty() ? "" : "\n " + title); } else { - GameListItem::setData(value, role); + return GameListItem::data(role); } } }; diff --git a/src/citra_qt/main.cpp b/src/citra_qt/main.cpp index f1ab29755..a85c94a4b 100644 --- a/src/citra_qt/main.cpp +++ b/src/citra_qt/main.cpp @@ -6,6 +6,9 @@ #include #include +#include + +#define QT_NO_OPENGL #include #include #include @@ -240,6 +243,14 @@ bool GMainWindow::InitializeSystem() { if (emu_thread != nullptr) ShutdownGame(); + render_window->MakeCurrent(); + if (!gladLoadGL()) { + QMessageBox::critical(this, tr("Error while starting Citra!"), + tr("Failed to initialize the video core!\n\n" + "Please ensure that your GPU supports OpenGL 3.3 and that you have the latest graphics driver.")); + return false; + } + // Initialize the core emulation System::Result system_result = System::Init(render_window); if (System::Result::Success != system_result) { diff --git a/src/citra_qt/util/util.cpp b/src/citra_qt/util/util.cpp index 8734a8efd..2f9beb5cc 100644 --- a/src/citra_qt/util/util.cpp +++ b/src/citra_qt/util/util.cpp @@ -19,7 +19,7 @@ QString ReadableByteSize(qulonglong size) { static const std::array units = { "B", "KiB", "MiB", "GiB", "TiB", "PiB" }; if (size == 0) return "0"; - int digit_groups = std::min((int)(std::log10(size) / std::log10(1024)), units.size()); + int digit_groups = std::min(static_cast(std::log10(size) / std::log10(1024)), static_cast(units.size())); return QString("%L1 %2").arg(size / std::pow(1024, digit_groups), 0, 'f', 1) .arg(units[digit_groups]); } diff --git a/src/common/logging/backend.cpp b/src/common/logging/backend.cpp index 3d39f94d5..d7008fc66 100644 --- a/src/common/logging/backend.cpp +++ b/src/common/logging/backend.cpp @@ -65,6 +65,7 @@ namespace Log { SUB(Render, OpenGL) \ CLS(Audio) \ SUB(Audio, DSP) \ + SUB(Audio, Sink) \ CLS(Loader) // GetClassName is a macro defined by Windows.h, grrr... diff --git a/src/common/logging/log.h b/src/common/logging/log.h index 521362317..c6910b1c7 100644 --- a/src/common/logging/log.h +++ b/src/common/logging/log.h @@ -78,8 +78,9 @@ enum class Class : ClassType { Render, ///< Emulator video output and hardware acceleration Render_Software, ///< Software renderer backend Render_OpenGL, ///< OpenGL backend - Audio, ///< Emulator audio output + Audio, ///< Audio emulation Audio_DSP, ///< The HLE implementation of the DSP + Audio_Sink, ///< Emulator audio output backend Loader, ///< ROM loader Count ///< Total number of logging classes diff --git a/src/common/swap.h b/src/common/swap.h index a7c37bc44..1749bd7a4 100644 --- a/src/common/swap.h +++ b/src/common/swap.h @@ -25,6 +25,8 @@ #include #endif +#include + #include "common/common_types.h" // GCC 4.6+ @@ -58,9 +60,6 @@ namespace Common { -inline u8 swap8(u8 _data) {return _data;} -inline u32 swap24(const u8* _data) {return (_data[0] << 16) | (_data[1] << 8) | _data[2];} - #ifdef _MSC_VER inline u16 swap16(u16 _data) {return _byteswap_ushort(_data);} inline u32 swap32(u32 _data) {return _byteswap_ulong (_data);} @@ -92,52 +91,29 @@ inline u64 swap64(u64 data) {return ((u64)swap32(data) << 32) | swap32(data >> 3 #endif inline float swapf(float f) { - union { - float f; - unsigned int u32; - } dat1, dat2; + static_assert(sizeof(u32) == sizeof(float), + "float must be the same size as uint32_t."); - dat1.f = f; - dat2.u32 = swap32(dat1.u32); + u32 value; + std::memcpy(&value, &f, sizeof(u32)); - return dat2.f; + value = swap32(value); + std::memcpy(&f, &value, sizeof(u32)); + + return f; } inline double swapd(double f) { - union { - double f; - unsigned long long u64; - } dat1, dat2; + static_assert(sizeof(u64) == sizeof(double), + "double must be the same size as uint64_t."); - dat1.f = f; - dat2.u64 = swap64(dat1.u64); + u64 value; + std::memcpy(&value, &f, sizeof(u64)); - return dat2.f; -} + value = swap64(value); + std::memcpy(&f, &value, sizeof(u64)); -inline u16 swap16(const u8* _pData) {return swap16(*(const u16*)_pData);} -inline u32 swap32(const u8* _pData) {return swap32(*(const u32*)_pData);} -inline u64 swap64(const u8* _pData) {return swap64(*(const u64*)_pData);} - -template -void swap(u8*); - -template <> -inline void swap<1>(u8* data) { } - -template <> -inline void swap<2>(u8* data) { - *reinterpret_cast(data) = swap16(data); -} - -template <> -inline void swap<4>(u8* data) { - *reinterpret_cast(data) = swap32(data); -} - -template <> -inline void swap<8>(u8* data) { - *reinterpret_cast(data) = swap64(data); + return f; } } // Namespace Common @@ -534,35 +510,35 @@ bool operator==(const S &p, const swap_struct_t v) { template struct swap_64_t { static T swap(T x) { - return (T)Common::swap64(*(u64 *)&x); + return static_cast(Common::swap64(x)); } }; template struct swap_32_t { static T swap(T x) { - return (T)Common::swap32(*(u32 *)&x); + return static_cast(Common::swap32(x)); } }; template struct swap_16_t { static T swap(T x) { - return (T)Common::swap16(*(u16 *)&x); + return static_cast(Common::swap16(x)); } }; template struct swap_float_t { static T swap(T x) { - return (T)Common::swapf(*(float *)&x); + return static_cast(Common::swapf(x)); } }; template struct swap_double_t { static T swap(T x) { - return (T)Common::swapd(*(double *)&x); + return static_cast(Common::swapd(x)); } }; diff --git a/src/core/arm/dyncom/arm_dyncom.cpp b/src/core/arm/dyncom/arm_dyncom.cpp index a3581132c..13492a08b 100644 --- a/src/core/arm/dyncom/arm_dyncom.cpp +++ b/src/core/arm/dyncom/arm_dyncom.cpp @@ -93,7 +93,7 @@ void ARM_DynCom::ResetContext(Core::ThreadContext& context, u32 stack_top, u32 e context.cpu_registers[0] = arg; context.pc = entry_point; context.sp = stack_top; - context.cpsr = 0x1F | ((entry_point & 1) << 5); // Usermode and THUMB mode + context.cpsr = USER32MODE | ((entry_point & 1) << 5); // Usermode and THUMB mode } void ARM_DynCom::SaveContext(Core::ThreadContext& ctx) { diff --git a/src/core/core.cpp b/src/core/core.cpp index 3bb843aab..cabab744a 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -51,7 +51,7 @@ void RunLoop(int tight_loop) { } HW::Update(); - if (HLE::g_reschedule) { + if (HLE::IsReschedulePending()) { Kernel::Reschedule(); } } diff --git a/src/core/gdbstub/gdbstub.cpp b/src/core/gdbstub/gdbstub.cpp index ae0c116ef..820b19e1a 100644 --- a/src/core/gdbstub/gdbstub.cpp +++ b/src/core/gdbstub/gdbstub.cpp @@ -374,7 +374,7 @@ static void SendReply(const char* reply) { memset(command_buffer, 0, sizeof(command_buffer)); - command_length = strlen(reply); + command_length = static_cast(strlen(reply)); if (command_length + 4 > sizeof(command_buffer)) { LOG_ERROR(Debug_GDBStub, "command_buffer overflow in SendReply"); return; @@ -437,7 +437,7 @@ static void HandleSetThread() { * * @param signal Signal to be sent to client. */ -void SendSignal(u32 signal) { +static void SendSignal(u32 signal) { if (gdbserver_socket == -1) { return; } @@ -515,7 +515,7 @@ static bool IsDataAvailable() { return false; } - return FD_ISSET(gdbserver_socket, &fd_socket); + return FD_ISSET(gdbserver_socket, &fd_socket) != 0; } /// Send requested register to gdb client. @@ -633,10 +633,10 @@ static void ReadMemory() { auto start_offset = command_buffer+1; auto addr_pos = std::find(start_offset, command_buffer+command_length, ','); - PAddr addr = HexToInt(start_offset, addr_pos - start_offset); + PAddr addr = HexToInt(start_offset, static_cast(addr_pos - start_offset)); start_offset = addr_pos+1; - u32 len = HexToInt(start_offset, (command_buffer + command_length) - start_offset); + u32 len = HexToInt(start_offset, static_cast((command_buffer + command_length) - start_offset)); LOG_DEBUG(Debug_GDBStub, "gdb: addr: %08x len: %08x\n", addr, len); @@ -658,11 +658,11 @@ static void ReadMemory() { static void WriteMemory() { auto start_offset = command_buffer+1; auto addr_pos = std::find(start_offset, command_buffer+command_length, ','); - PAddr addr = HexToInt(start_offset, addr_pos - start_offset); + PAddr addr = HexToInt(start_offset, static_cast(addr_pos - start_offset)); start_offset = addr_pos+1; auto len_pos = std::find(start_offset, command_buffer+command_length, ':'); - u32 len = HexToInt(start_offset, len_pos - start_offset); + u32 len = HexToInt(start_offset, static_cast(len_pos - start_offset)); u8* dst = Memory::GetPointer(addr); if (!dst) { @@ -713,7 +713,7 @@ static void Continue() { * @param addr Address of breakpoint. * @param len Length of breakpoint. */ -bool CommitBreakpoint(BreakpointType type, PAddr addr, u32 len) { +static bool CommitBreakpoint(BreakpointType type, PAddr addr, u32 len) { std::map& p = GetBreakpointList(type); Breakpoint breakpoint; @@ -752,10 +752,10 @@ static void AddBreakpoint() { auto start_offset = command_buffer+3; auto addr_pos = std::find(start_offset, command_buffer+command_length, ','); - PAddr addr = HexToInt(start_offset, addr_pos - start_offset); + PAddr addr = HexToInt(start_offset, static_cast(addr_pos - start_offset)); start_offset = addr_pos+1; - u32 len = HexToInt(start_offset, (command_buffer + command_length) - start_offset); + u32 len = HexToInt(start_offset, static_cast((command_buffer + command_length) - start_offset)); if (type == BreakpointType::Access) { // Access is made up of Read and Write types, so add both breakpoints @@ -800,10 +800,10 @@ static void RemoveBreakpoint() { auto start_offset = command_buffer+3; auto addr_pos = std::find(start_offset, command_buffer+command_length, ','); - PAddr addr = HexToInt(start_offset, addr_pos - start_offset); + PAddr addr = HexToInt(start_offset, static_cast(addr_pos - start_offset)); start_offset = addr_pos+1; - u32 len = HexToInt(start_offset, (command_buffer + command_length) - start_offset); + u32 len = HexToInt(start_offset, static_cast((command_buffer + command_length) - start_offset)); if (type == BreakpointType::Access) { // Access is made up of Read and Write types, so add both breakpoints @@ -907,7 +907,7 @@ void ToggleServer(bool status) { } } -void Init(u16 port) { +static void Init(u16 port) { if (!g_server_enabled) { // Set the halt loop to false in case the user enabled the gdbstub mid-execution. // This way the CPU can still execute normally. diff --git a/src/core/hle/applets/mii_selector.cpp b/src/core/hle/applets/mii_selector.cpp index 708d2f630..b4456ca90 100644 --- a/src/core/hle/applets/mii_selector.cpp +++ b/src/core/hle/applets/mii_selector.cpp @@ -21,13 +21,6 @@ namespace HLE { namespace Applets { -MiiSelector::MiiSelector(Service::APT::AppletId id) : Applet(id), started(false) { - // Create the SharedMemory that will hold the framebuffer data - // TODO(Subv): What size should we use here? - using Kernel::MemoryPermission; - framebuffer_memory = Kernel::SharedMemory::Create(0x1000, MemoryPermission::ReadWrite, MemoryPermission::ReadWrite, "MiiSelector Memory"); -} - ResultCode MiiSelector::ReceiveParameter(const Service::APT::MessageParameter& parameter) { if (parameter.signal != static_cast(Service::APT::SignalType::LibAppJustStarted)) { LOG_ERROR(Service_APT, "unsupported signal %u", parameter.signal); @@ -36,8 +29,18 @@ ResultCode MiiSelector::ReceiveParameter(const Service::APT::MessageParameter& p return ResultCode(-1); } + // The LibAppJustStarted message contains a buffer with the size of the framebuffer shared memory. + // Create the SharedMemory that will hold the framebuffer data + Service::APT::CaptureBufferInfo capture_info; + ASSERT(sizeof(capture_info) == parameter.buffer_size); + + memcpy(&capture_info, parameter.data, sizeof(capture_info)); + using Kernel::MemoryPermission; + framebuffer_memory = Kernel::SharedMemory::Create(capture_info.size, MemoryPermission::ReadWrite, + MemoryPermission::ReadWrite, "MiiSelector Memory"); + + // Send the response message with the newly created SharedMemory Service::APT::MessageParameter result; - // The buffer passed in parameter contains the data returned by GSPGPU::ImportDisplayCaptureInfo result.signal = static_cast(Service::APT::SignalType::LibAppFinished); result.data = nullptr; result.buffer_size = 0; @@ -55,6 +58,11 @@ ResultCode MiiSelector::StartImpl(const Service::APT::AppletStartupParameter& pa // TODO(Subv): Set the expected fields in the response buffer before resending it to the application. // TODO(Subv): Reverse the parameter format for the Mii Selector + if(parameter.buffer_size >= sizeof(u32)) { + // TODO: defaults return no error, but garbage in other unknown fields + memset(parameter.data, 0, sizeof(u32)); + } + // Let the application know that we're closing Service::APT::MessageParameter message; message.buffer_size = parameter.buffer_size; diff --git a/src/core/hle/applets/mii_selector.h b/src/core/hle/applets/mii_selector.h index 6a3e7c8eb..be6b04642 100644 --- a/src/core/hle/applets/mii_selector.h +++ b/src/core/hle/applets/mii_selector.h @@ -16,17 +16,61 @@ namespace HLE { namespace Applets { +struct MiiConfig { + u8 unk_000; + u8 unk_001; + u8 unk_002; + u8 unk_003; + u8 unk_004; + INSERT_PADDING_BYTES(3); + u16 unk_008; + INSERT_PADDING_BYTES(0x8C - 0xA); + u8 unk_08C; + INSERT_PADDING_BYTES(3); + u16 unk_090; + INSERT_PADDING_BYTES(2); + u32 unk_094; + u16 unk_098; + u8 unk_09A[0x64]; + u8 unk_0FE; + u8 unk_0FF; + u32 unk_100; +}; + +static_assert(sizeof(MiiConfig) == 0x104, "MiiConfig structure has incorrect size"); +#define ASSERT_REG_POSITION(field_name, position) static_assert(offsetof(MiiConfig, field_name) == position, "Field "#field_name" has invalid position") +ASSERT_REG_POSITION(unk_008, 0x08); +ASSERT_REG_POSITION(unk_08C, 0x8C); +ASSERT_REG_POSITION(unk_090, 0x90); +ASSERT_REG_POSITION(unk_094, 0x94); +ASSERT_REG_POSITION(unk_0FE, 0xFE); +#undef ASSERT_REG_POSITION + +struct MiiResult { + u32 result_code; + u8 unk_04; + INSERT_PADDING_BYTES(7); + u8 unk_0C[0x60]; + u8 unk_6C[0x16]; + INSERT_PADDING_BYTES(2); +}; +static_assert(sizeof(MiiResult) == 0x84, "MiiResult structure has incorrect size"); +#define ASSERT_REG_POSITION(field_name, position) static_assert(offsetof(MiiResult, field_name) == position, "Field "#field_name" has invalid position") +ASSERT_REG_POSITION(unk_0C, 0x0C); +ASSERT_REG_POSITION(unk_6C, 0x6C); +#undef ASSERT_REG_POSITION + class MiiSelector final : public Applet { public: - MiiSelector(Service::APT::AppletId id); + MiiSelector(Service::APT::AppletId id) : Applet(id), started(false) { } ResultCode ReceiveParameter(const Service::APT::MessageParameter& parameter) override; ResultCode StartImpl(const Service::APT::AppletStartupParameter& parameter) override; void Update() override; bool IsRunning() const override { return started; } - /// TODO(Subv): Find out what this is actually used for. - /// It is believed that the application stores the current screen image here. + /// This SharedMemory will be created when we receive the LibAppJustStarted message. + /// It holds the framebuffer info retrieved by the application with GSPGPU::ImportDisplayCaptureInfo Kernel::SharedPtr framebuffer_memory; /// Whether this applet is currently running instead of the host application or not. diff --git a/src/core/hle/applets/swkbd.cpp b/src/core/hle/applets/swkbd.cpp index 1db6b5a17..87238aa1c 100644 --- a/src/core/hle/applets/swkbd.cpp +++ b/src/core/hle/applets/swkbd.cpp @@ -24,13 +24,6 @@ namespace HLE { namespace Applets { -SoftwareKeyboard::SoftwareKeyboard(Service::APT::AppletId id) : Applet(id), started(false) { - // Create the SharedMemory that will hold the framebuffer data - // TODO(Subv): What size should we use here? - using Kernel::MemoryPermission; - framebuffer_memory = Kernel::SharedMemory::Create(0x1000, MemoryPermission::ReadWrite, MemoryPermission::ReadWrite, "SoftwareKeyboard Memory"); -} - ResultCode SoftwareKeyboard::ReceiveParameter(Service::APT::MessageParameter const& parameter) { if (parameter.signal != static_cast(Service::APT::SignalType::LibAppJustStarted)) { LOG_ERROR(Service_APT, "unsupported signal %u", parameter.signal); @@ -39,8 +32,19 @@ ResultCode SoftwareKeyboard::ReceiveParameter(Service::APT::MessageParameter con return ResultCode(-1); } + // The LibAppJustStarted message contains a buffer with the size of the framebuffer shared memory. + // Create the SharedMemory that will hold the framebuffer data + Service::APT::CaptureBufferInfo capture_info; + ASSERT(sizeof(capture_info) == parameter.buffer_size); + + memcpy(&capture_info, parameter.data, sizeof(capture_info)); + + using Kernel::MemoryPermission; + framebuffer_memory = Kernel::SharedMemory::Create(capture_info.size, MemoryPermission::ReadWrite, + MemoryPermission::ReadWrite, "SoftwareKeyboard Memory"); + + // Send the response message with the newly created SharedMemory Service::APT::MessageParameter result; - // The buffer passed in parameter contains the data returned by GSPGPU::ImportDisplayCaptureInfo result.signal = static_cast(Service::APT::SignalType::LibAppFinished); result.data = nullptr; result.buffer_size = 0; diff --git a/src/core/hle/applets/swkbd.h b/src/core/hle/applets/swkbd.h index cb95b8d90..cf26a8fb7 100644 --- a/src/core/hle/applets/swkbd.h +++ b/src/core/hle/applets/swkbd.h @@ -53,8 +53,7 @@ static_assert(sizeof(SoftwareKeyboardConfig) == 0x400, "Software Keyboard Config class SoftwareKeyboard final : public Applet { public: - SoftwareKeyboard(Service::APT::AppletId id); - ~SoftwareKeyboard() {} + SoftwareKeyboard(Service::APT::AppletId id) : Applet(id), started(false) { } ResultCode ReceiveParameter(const Service::APT::MessageParameter& parameter) override; ResultCode StartImpl(const Service::APT::AppletStartupParameter& parameter) override; @@ -72,8 +71,8 @@ public: */ void Finalize(); - /// TODO(Subv): Find out what this is actually used for. - /// It is believed that the application stores the current screen image here. + /// This SharedMemory will be created when we receive the LibAppJustStarted message. + /// It holds the framebuffer info retrieved by the application with GSPGPU::ImportDisplayCaptureInfo Kernel::SharedPtr framebuffer_memory; /// SharedMemory where the output text will be stored diff --git a/src/core/hle/hle.cpp b/src/core/hle/hle.cpp index e545de3b5..5c5373517 100644 --- a/src/core/hle/hle.cpp +++ b/src/core/hle/hle.cpp @@ -12,9 +12,13 @@ //////////////////////////////////////////////////////////////////////////////////////////////////// -namespace HLE { +namespace { -bool g_reschedule; ///< If true, immediately reschedules the CPU to a new thread +bool reschedule; ///< If true, immediately reschedules the CPU to a new thread + +} + +namespace HLE { void Reschedule(const char *reason) { DEBUG_ASSERT_MSG(reason != nullptr && strlen(reason) < 256, "Reschedule: Invalid or too long reason."); @@ -27,13 +31,21 @@ void Reschedule(const char *reason) { Core::g_app_core->PrepareReschedule(); - g_reschedule = true; + reschedule = true; +} + +bool IsReschedulePending() { + return reschedule; +} + +void DoneRescheduling() { + reschedule = false; } void Init() { Service::Init(); - g_reschedule = false; + reschedule = false; LOG_DEBUG(Kernel, "initialized OK"); } diff --git a/src/core/hle/hle.h b/src/core/hle/hle.h index e0b97797c..69ac0ade6 100644 --- a/src/core/hle/hle.h +++ b/src/core/hle/hle.h @@ -13,9 +13,9 @@ const Handle INVALID_HANDLE = 0; namespace HLE { -extern bool g_reschedule; ///< If true, immediately reschedules the CPU to a new thread - void Reschedule(const char *reason); +bool IsReschedulePending(); +void DoneRescheduling(); void Init(); void Shutdown(); diff --git a/src/core/hle/kernel/process.h b/src/core/hle/kernel/process.h index 6d2ca96a2..a06afef2b 100644 --- a/src/core/hle/kernel/process.h +++ b/src/core/hle/kernel/process.h @@ -107,6 +107,8 @@ public: ProcessFlags flags; /// Kernel compatibility version for this process u16 kernel_version = 0; + /// The default CPU for this process, threads are scheduled on this cpu by default. + u8 ideal_processor = 0; /// The id of this process u32 process_id = next_process_id++; diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index bf32f653d..6dc95d0f1 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -483,7 +483,8 @@ void Reschedule() { Thread* cur = GetCurrentThread(); Thread* next = PopNextReadyThread(); - HLE::g_reschedule = false; + + HLE::DoneRescheduling(); // Don't bother switching to the same thread if (next == cur) diff --git a/src/core/hle/service/apt/apt.h b/src/core/hle/service/apt/apt.h index 668b4a66f..1a1034fcc 100644 --- a/src/core/hle/service/apt/apt.h +++ b/src/core/hle/service/apt/apt.h @@ -5,6 +5,7 @@ #pragma once #include "common/common_types.h" +#include "common/swap.h" #include "core/hle/kernel/kernel.h" @@ -31,6 +32,20 @@ struct AppletStartupParameter { u8* data = nullptr; }; +/// Used by the application to pass information about the current framebuffer to applets. +struct CaptureBufferInfo { + u32_le size; + u8 is_3d; + INSERT_PADDING_BYTES(0x3); // Padding for alignment + u32_le top_screen_left_offset; + u32_le top_screen_right_offset; + u32_le top_screen_format; + u32_le bottom_screen_left_offset; + u32_le bottom_screen_right_offset; + u32_le bottom_screen_format; +}; +static_assert(sizeof(CaptureBufferInfo) == 0x20, "CaptureBufferInfo struct has incorrect size"); + /// Signals used by APT functions enum class SignalType : u32 { None = 0x0, diff --git a/src/core/hle/service/dsp_dsp.cpp b/src/core/hle/service/dsp_dsp.cpp index 995bee3f9..274fc751a 100644 --- a/src/core/hle/service/dsp_dsp.cpp +++ b/src/core/hle/service/dsp_dsp.cpp @@ -288,7 +288,7 @@ static void WriteProcessPipe(Service::Interface* self) { ASSERT_MSG(Memory::GetPointer(buffer) != nullptr, "Invalid Buffer: pipe=%u, size=0x%X, buffer=0x%08X", pipe_index, size, buffer); std::vector message(size); - for (size_t i = 0; i < size; i++) { + for (u32 i = 0; i < size; i++) { message[i] = Memory::Read8(buffer + i); } @@ -403,7 +403,7 @@ static void GetPipeReadableSize(Service::Interface* self) { cmd_buff[0] = IPC::MakeHeader(0xF, 2, 0); cmd_buff[1] = RESULT_SUCCESS.raw; // No error - cmd_buff[2] = DSP::HLE::GetPipeReadableSize(pipe); + cmd_buff[2] = static_cast(DSP::HLE::GetPipeReadableSize(pipe)); LOG_DEBUG(Service_DSP, "pipe=%u, unknown=0x%08X, return cmd_buff[2]=0x%08X", pipe_index, unknown, cmd_buff[2]); } diff --git a/src/core/hle/svc.cpp b/src/core/hle/svc.cpp index fb2aecbf2..60c8747f3 100644 --- a/src/core/hle/svc.cpp +++ b/src/core/hle/svc.cpp @@ -496,6 +496,11 @@ static ResultCode CreateThread(Handle* out_handle, s32 priority, u32 entry_point break; } + if (processor_id == THREADPROCESSORID_1 || processor_id == THREADPROCESSORID_ALL || + (processor_id == THREADPROCESSORID_DEFAULT && Kernel::g_current_process->ideal_processor == THREADPROCESSORID_1)) { + LOG_WARNING(Kernel_SVC, "Newly created thread is allowed to be run in the SysCore, unimplemented."); + } + CASCADE_RESULT(SharedPtr thread, Kernel::Thread::Create( name, entry_point, priority, arg, processor_id, stack_top)); CASCADE_RESULT(*out_handle, Kernel::g_handle_table.Create(std::move(thread))); diff --git a/src/core/hw/gpu.cpp b/src/core/hw/gpu.cpp index 2fe856293..a4dfb7e43 100644 --- a/src/core/hw/gpu.cpp +++ b/src/core/hw/gpu.cpp @@ -188,10 +188,10 @@ inline void Write(u32 addr, const T data) { u32 output_gap = config.texture_copy.output_gap * 16; size_t contiguous_input_size = config.texture_copy.size / input_width * (input_width + input_gap); - Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), contiguous_input_size); + Memory::RasterizerFlushRegion(config.GetPhysicalInputAddress(), static_cast(contiguous_input_size)); size_t contiguous_output_size = config.texture_copy.size / output_width * (output_width + output_gap); - Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), contiguous_output_size); + Memory::RasterizerFlushAndInvalidateRegion(config.GetPhysicalOutputAddress(), static_cast(contiguous_output_size)); u32 remaining_size = config.texture_copy.size; u32 remaining_input = input_width; diff --git a/src/core/loader/3dsx.cpp b/src/core/loader/3dsx.cpp index 5fb3b9e2b..98e7ab48f 100644 --- a/src/core/loader/3dsx.cpp +++ b/src/core/loader/3dsx.cpp @@ -178,11 +178,11 @@ static THREEDSX_Error Load3DSXFile(FileUtil::IOFile& file, u32 base_addr, Shared for (unsigned current_inprogress = 0; current_inprogress < remaining && pos < end_pos; current_inprogress++) { const auto& table = reloc_table[current_inprogress]; LOG_TRACE(Loader, "(t=%d,skip=%u,patch=%u)", current_segment_reloc_table, - (u32)table.skip, (u32)table.patch); + static_cast(table.skip), static_cast(table.patch)); pos += table.skip; s32 num_patches = table.patch; while (0 < num_patches && pos < end_pos) { - u32 in_addr = (u8*)pos - program_image.data(); + u32 in_addr = static_cast(reinterpret_cast(pos) - program_image.data()); u32 addr = TranslateAddr(*pos, &loadinfo, offsets); LOG_TRACE(Loader, "Patching %08X <-- rel(%08X,%d) (%08X)", base_addr + in_addr, addr, current_segment_reloc_table, *pos); @@ -284,7 +284,7 @@ ResultStatus AppLoader_THREEDSX::ReadRomFS(std::shared_ptr& ro // Check if the 3DSX has a RomFS... if (hdr.fs_offset != 0) { u32 romfs_offset = hdr.fs_offset; - u32 romfs_size = file.GetSize() - hdr.fs_offset; + u32 romfs_size = static_cast(file.GetSize()) - hdr.fs_offset; LOG_DEBUG(Loader, "RomFS offset: 0x%08X", romfs_offset); LOG_DEBUG(Loader, "RomFS size: 0x%08X", romfs_size); @@ -303,4 +303,31 @@ ResultStatus AppLoader_THREEDSX::ReadRomFS(std::shared_ptr& ro return ResultStatus::ErrorNotUsed; } +ResultStatus AppLoader_THREEDSX::ReadIcon(std::vector& buffer) { + if (!file.IsOpen()) + return ResultStatus::Error; + + // Reset read pointer in case this file has been read before. + file.Seek(0, SEEK_SET); + + THREEDSX_Header hdr; + if (file.ReadBytes(&hdr, sizeof(THREEDSX_Header)) != sizeof(THREEDSX_Header)) + return ResultStatus::Error; + + if (hdr.header_size != sizeof(THREEDSX_Header)) + return ResultStatus::Error; + + // Check if the 3DSX has a SMDH... + if (hdr.smdh_offset != 0) { + file.Seek(hdr.smdh_offset, SEEK_SET); + buffer.resize(hdr.smdh_size); + + if (file.ReadBytes(&buffer[0], hdr.smdh_size) != hdr.smdh_size) + return ResultStatus::Error; + + return ResultStatus::Success; + } + return ResultStatus::ErrorNotUsed; +} + } // namespace Loader diff --git a/src/core/loader/3dsx.h b/src/core/loader/3dsx.h index 365ddb7a5..3ee686703 100644 --- a/src/core/loader/3dsx.h +++ b/src/core/loader/3dsx.h @@ -17,7 +17,7 @@ namespace Loader { /// Loads an 3DSX file class AppLoader_THREEDSX final : public AppLoader { public: - AppLoader_THREEDSX(FileUtil::IOFile&& file, std::string filename, const std::string& filepath) + AppLoader_THREEDSX(FileUtil::IOFile&& file, const std::string& filename, const std::string& filepath) : AppLoader(std::move(file)), filename(std::move(filename)), filepath(filepath) {} /** @@ -33,6 +33,13 @@ public: */ ResultStatus Load() override; + /** + * Get the icon (typically icon section) of the application + * @param buffer Reference to buffer to store data + * @return ResultStatus result of function + */ + ResultStatus ReadIcon(std::vector& buffer) override; + /** * Get the RomFS of the application * @param romfs_file Reference to buffer to store data diff --git a/src/core/loader/loader.cpp b/src/core/loader/loader.cpp index 886501c41..af3f62248 100644 --- a/src/core/loader/loader.cpp +++ b/src/core/loader/loader.cpp @@ -90,6 +90,28 @@ const char* GetFileTypeString(FileType type) { return "unknown"; } +std::unique_ptr GetLoader(FileUtil::IOFile&& file, FileType type, + const std::string& filename, const std::string& filepath) { + switch (type) { + + // 3DSX file format. + case FileType::THREEDSX: + return std::make_unique(std::move(file), filename, filepath); + + // Standard ELF file format. + case FileType::ELF: + return std::make_unique(std::move(file), filename); + + // NCCH/NCSD container formats. + case FileType::CXI: + case FileType::CCI: + return std::make_unique(std::move(file), filepath); + + default: + return std::unique_ptr(); + } +} + ResultStatus LoadFile(const std::string& filename) { FileUtil::IOFile file(filename, "rb"); if (!file.IsOpen()) { @@ -111,37 +133,28 @@ ResultStatus LoadFile(const std::string& filename) { LOG_INFO(Loader, "Loading file %s as %s...", filename.c_str(), GetFileTypeString(type)); + std::unique_ptr app_loader = GetLoader(std::move(file), type, filename_filename, filename); + switch (type) { - //3DSX file format... + // 3DSX file format... + // or NCCH/NCSD container formats... case FileType::THREEDSX: + case FileType::CXI: + case FileType::CCI: { - AppLoader_THREEDSX app_loader(std::move(file), filename_filename, filename); // Load application and RomFS - if (ResultStatus::Success == app_loader.Load()) { - Service::FS::RegisterArchiveType(std::make_unique(app_loader), Service::FS::ArchiveIdCode::RomFS); + ResultStatus result = app_loader->Load(); + if (ResultStatus::Success == result) { + Service::FS::RegisterArchiveType(std::make_unique(*app_loader), Service::FS::ArchiveIdCode::RomFS); return ResultStatus::Success; } - break; + return result; } // Standard ELF file format... case FileType::ELF: - return AppLoader_ELF(std::move(file), filename_filename).Load(); - - // NCCH/NCSD container formats... - case FileType::CXI: - case FileType::CCI: - { - AppLoader_NCCH app_loader(std::move(file), filename); - - // Load application and RomFS - ResultStatus result = app_loader.Load(); - if (ResultStatus::Success == result) { - Service::FS::RegisterArchiveType(std::make_unique(app_loader), Service::FS::ArchiveIdCode::RomFS); - } - return result; - } + return app_loader->Load(); // CIA file format... case FileType::CIA: diff --git a/src/core/loader/loader.h b/src/core/loader/loader.h index 84a4ce5fc..9d3e9ed3b 100644 --- a/src/core/loader/loader.h +++ b/src/core/loader/loader.h @@ -10,8 +10,10 @@ #include #include +#include "common/common_funcs.h" #include "common/common_types.h" #include "common/file_util.h" +#include "common/swap.h" namespace Kernel { struct AddressMapping; @@ -78,6 +80,51 @@ constexpr u32 MakeMagic(char a, char b, char c, char d) { return a | b << 8 | c << 16 | d << 24; } +/// SMDH data structure that contains titles, icons etc. See https://www.3dbrew.org/wiki/SMDH +struct SMDH { + u32_le magic; + u16_le version; + INSERT_PADDING_BYTES(2); + + struct Title { + std::array short_title; + std::array long_title; + std::array publisher; + }; + std::array titles; + + std::array ratings; + u32_le region_lockout; + u32_le match_maker_id; + u64_le match_maker_bit_id; + u32_le flags; + u16_le eula_version; + INSERT_PADDING_BYTES(2); + float_le banner_animation_frame; + u32_le cec_id; + INSERT_PADDING_BYTES(8); + + std::array small_icon; + std::array large_icon; + + /// indicates the language used for each title entry + enum class TitleLanguage { + Japanese = 0, + English = 1, + French = 2, + German = 3, + Italian = 4, + Spanish = 5, + SimplifiedChinese = 6, + Korean= 7, + Dutch = 8, + Portuguese = 9, + Russian = 10, + TraditionalChinese = 11 + }; +}; +static_assert(sizeof(SMDH) == 0x36C0, "SMDH structure size is wrong"); + /// Interface for loading an application class AppLoader : NonCopyable { public: @@ -149,6 +196,16 @@ protected: */ extern const std::initializer_list default_address_mappings; +/** + * Get a loader for a file with a specific type + * @param file The file to load + * @param type The type of the file + * @param filename the file name (without path) + * @param filepath the file full path (with name) + * @return std::unique_ptr a pointer to a loader object; nullptr for unsupported type + */ +std::unique_ptr GetLoader(FileUtil::IOFile&& file, FileType type, const std::string& filename, const std::string& filepath); + /** * Identifies and loads a bootable file * @param filename String filename of bootable file diff --git a/src/core/loader/ncch.cpp b/src/core/loader/ncch.cpp index 066e91a9e..7391bdb26 100644 --- a/src/core/loader/ncch.cpp +++ b/src/core/loader/ncch.cpp @@ -156,6 +156,9 @@ ResultStatus AppLoader_NCCH::LoadExec() { Kernel::g_current_process->resource_limit = Kernel::ResourceLimit::GetForCategory( static_cast(exheader_header.arm11_system_local_caps.resource_limit_category)); + // Set the default CPU core for this process + Kernel::g_current_process->ideal_processor = exheader_header.arm11_system_local_caps.ideal_processor; + // Copy data while converting endianess std::array kernel_caps; std::copy_n(exheader_header.arm11_kernel_caps.descriptors, kernel_caps.size(), begin(kernel_caps)); @@ -173,6 +176,10 @@ ResultStatus AppLoader_NCCH::LoadSectionExeFS(const char* name, std::vector& if (!file.IsOpen()) return ResultStatus::Error; + ResultStatus result = LoadExeFS(); + if (result != ResultStatus::Success) + return result; + LOG_DEBUG(Loader, "%d sections:", kMaxSections); // Iterate through the ExeFs archive until we find a section with the specified name... for (unsigned section_number = 0; section_number < kMaxSections; section_number++) { @@ -215,9 +222,9 @@ ResultStatus AppLoader_NCCH::LoadSectionExeFS(const char* name, std::vector& return ResultStatus::ErrorNotUsed; } -ResultStatus AppLoader_NCCH::Load() { - if (is_loaded) - return ResultStatus::ErrorAlreadyLoaded; +ResultStatus AppLoader_NCCH::LoadExeFS() { + if (is_exefs_loaded) + return ResultStatus::Success; if (!file.IsOpen()) return ResultStatus::Error; @@ -282,6 +289,18 @@ ResultStatus AppLoader_NCCH::Load() { if (file.ReadBytes(&exefs_header, sizeof(ExeFs_Header)) != sizeof(ExeFs_Header)) return ResultStatus::Error; + is_exefs_loaded = true; + return ResultStatus::Success; +} + +ResultStatus AppLoader_NCCH::Load() { + if (is_loaded) + return ResultStatus::ErrorAlreadyLoaded; + + ResultStatus result = LoadExeFS(); + if (result != ResultStatus::Success) + return result; + is_loaded = true; // Set state to loaded return LoadExec(); // Load the executable into memory for booting diff --git a/src/core/loader/ncch.h b/src/core/loader/ncch.h index ca6772a78..fd852c3de 100644 --- a/src/core/loader/ncch.h +++ b/src/core/loader/ncch.h @@ -232,6 +232,13 @@ private: */ ResultStatus LoadExec(); + /** + * Ensure ExeFS is loaded and ready for reading sections + * @return ResultStatus result of function + */ + ResultStatus LoadExeFS(); + + bool is_exefs_loaded = false; bool is_compressed = false; u32 entry_point = 0; diff --git a/src/core/tracer/recorder.cpp b/src/core/tracer/recorder.cpp index c6dc35c83..7abaacf70 100644 --- a/src/core/tracer/recorder.cpp +++ b/src/core/tracer/recorder.cpp @@ -26,17 +26,17 @@ void Recorder::Finish(const std::string& filename) { // Calculate file offsets auto& initial = header.initial_state_offsets; - initial.gpu_registers_size = initial_state.gpu_registers.size(); - initial.lcd_registers_size = initial_state.lcd_registers.size(); - initial.pica_registers_size = initial_state.pica_registers.size(); - initial.default_attributes_size = initial_state.default_attributes.size(); - initial.vs_program_binary_size = initial_state.vs_program_binary.size(); - initial.vs_swizzle_data_size = initial_state.vs_swizzle_data.size(); - initial.vs_float_uniforms_size = initial_state.vs_float_uniforms.size(); - initial.gs_program_binary_size = initial_state.gs_program_binary.size(); - initial.gs_swizzle_data_size = initial_state.gs_swizzle_data.size(); - initial.gs_float_uniforms_size = initial_state.gs_float_uniforms.size(); - header.stream_size = stream.size(); + initial.gpu_registers_size = static_cast(initial_state.gpu_registers.size()); + initial.lcd_registers_size = static_cast(initial_state.lcd_registers.size()); + initial.pica_registers_size = static_cast(initial_state.pica_registers.size()); + initial.default_attributes_size = static_cast(initial_state.default_attributes.size()); + initial.vs_program_binary_size = static_cast(initial_state.vs_program_binary.size()); + initial.vs_swizzle_data_size = static_cast(initial_state.vs_swizzle_data.size()); + initial.vs_float_uniforms_size = static_cast(initial_state.vs_float_uniforms.size()); + initial.gs_program_binary_size = static_cast(initial_state.gs_program_binary.size()); + initial.gs_swizzle_data_size = static_cast(initial_state.gs_swizzle_data.size()); + initial.gs_float_uniforms_size = static_cast(initial_state.gs_float_uniforms.size()); + header.stream_size = static_cast(stream.size()); initial.gpu_registers = sizeof(header); initial.lcd_registers = initial.gpu_registers + initial.gpu_registers_size * sizeof(u32); @@ -68,7 +68,7 @@ void Recorder::Finish(const std::string& filename) { DEBUG_ASSERT(stream_element.extra_data.size() == 0); break; } - header.stream_offset += stream_element.extra_data.size(); + header.stream_offset += static_cast(stream_element.extra_data.size()); } try { diff --git a/src/video_core/clipper.cpp b/src/video_core/clipper.cpp index 2bc747102..db99ce666 100644 --- a/src/video_core/clipper.cpp +++ b/src/video_core/clipper.cpp @@ -75,8 +75,6 @@ static void InitScreenCoordinates(OutputVertex& vtx) viewport.halfsize_y = float24::FromRaw(regs.viewport_size_y); viewport.offset_x = float24::FromFloat32(static_cast(regs.viewport_corner.x)); viewport.offset_y = float24::FromFloat32(static_cast(regs.viewport_corner.y)); - viewport.zscale = float24::FromRaw(regs.viewport_depth_range); - viewport.offset_z = float24::FromRaw(regs.viewport_depth_far_plane); float24 inv_w = float24::FromFloat32(1.f) / vtx.pos.w; vtx.color *= inv_w; @@ -89,7 +87,7 @@ static void InitScreenCoordinates(OutputVertex& vtx) vtx.screenpos[0] = (vtx.pos.x * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_x + viewport.offset_x; vtx.screenpos[1] = (vtx.pos.y * inv_w + float24::FromFloat32(1.0)) * viewport.halfsize_y + viewport.offset_y; - vtx.screenpos[2] = viewport.offset_z + vtx.pos.z * inv_w * viewport.zscale; + vtx.screenpos[2] = vtx.pos.z * inv_w; } void ProcessTriangle(const OutputVertex &v0, const OutputVertex &v1, const OutputVertex &v2) { diff --git a/src/video_core/command_processor.cpp b/src/video_core/command_processor.cpp index be1a936b2..e7dc5ddac 100644 --- a/src/video_core/command_processor.cpp +++ b/src/video_core/command_processor.cpp @@ -144,13 +144,12 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { immediate_attribute_id = 0; Shader::UnitState shader_unit; - Shader::Setup(); - - if (g_debug_context) - g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, static_cast(&immediate_input)); + g_state.vs.Setup(); // Send to vertex shader - Shader::OutputVertex output = Shader::Run(shader_unit, immediate_input, regs.vs.num_input_attributes+1); + if (g_debug_context) + g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, static_cast(&immediate_input)); + Shader::OutputVertex output = g_state.vs.Run(shader_unit, immediate_input, regs.vs.num_input_attributes+1); // Send to renderer using Pica::Shader::OutputVertex; @@ -238,7 +237,7 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { vertex_cache_ids.fill(-1); Shader::UnitState shader_unit; - Shader::Setup(); + g_state.vs.Setup(); for (unsigned int index = 0; index < regs.num_vertices; ++index) { @@ -272,11 +271,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) { Shader::InputVertex input; loader.LoadVertex(base_address, index, vertex, input, memory_accesses); - if (g_debug_context) - g_debug_context->OnEvent(DebugContext::Event::VertexLoaded, (void*)&input); - // Send to vertex shader - output = Shader::Run(shader_unit, input, loader.GetNumTotalAttributes()); + if (g_debug_context) + g_debug_context->OnEvent(DebugContext::Event::VertexShaderInvocation, (void*)&input); + output = g_state.vs.Run(shader_unit, input, loader.GetNumTotalAttributes()); if (is_indexed) { vertex_cache[vertex_cache_pos] = output; diff --git a/src/video_core/debug_utils/debug_utils.cpp b/src/video_core/debug_utils/debug_utils.cpp index fb20f81dd..2f645b441 100644 --- a/src/video_core/debug_utils/debug_utils.cpp +++ b/src/video_core/debug_utils/debug_utils.cpp @@ -208,11 +208,12 @@ void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, c // TODO: Reduce the amount of binary code written to relevant portions dvlp.binary_offset = write_offset - dvlp_offset; - dvlp.binary_size_words = setup.program_code.size(); - QueueForWriting(reinterpret_cast(setup.program_code.data()), setup.program_code.size() * sizeof(u32)); + dvlp.binary_size_words = static_cast(setup.program_code.size()); + QueueForWriting(reinterpret_cast(setup.program_code.data()), + static_cast(setup.program_code.size()) * sizeof(u32)); dvlp.swizzle_info_offset = write_offset - dvlp_offset; - dvlp.swizzle_info_num_entries = setup.swizzle_data.size(); + dvlp.swizzle_info_num_entries = static_cast(setup.swizzle_data.size()); u32 dummy = 0; for (unsigned int i = 0; i < setup.swizzle_data.size(); ++i) { QueueForWriting(reinterpret_cast(&setup.swizzle_data[i]), sizeof(setup.swizzle_data[i])); @@ -264,7 +265,7 @@ void DumpShader(const std::string& filename, const Regs::ShaderConfig& config, c constant_table.emplace_back(constant); } dvle.constant_table_offset = write_offset - dvlb.dvle_offset; - dvle.constant_table_size = constant_table.size(); + dvle.constant_table_size = static_cast(constant_table.size()); for (const auto& constant : constant_table) { QueueForWriting(reinterpret_cast(&constant), sizeof(constant)); } diff --git a/src/video_core/debug_utils/debug_utils.h b/src/video_core/debug_utils/debug_utils.h index be2d0301a..f628292a4 100644 --- a/src/video_core/debug_utils/debug_utils.h +++ b/src/video_core/debug_utils/debug_utils.h @@ -40,7 +40,7 @@ public: PicaCommandProcessed, IncomingPrimitiveBatch, FinishedPrimitiveBatch, - VertexLoaded, + VertexShaderInvocation, IncomingDisplayTransfer, GSPCommandProcessed, BufferSwapped, diff --git a/src/video_core/pica.cpp b/src/video_core/pica.cpp index be82cf4b5..ec78f9593 100644 --- a/src/video_core/pica.cpp +++ b/src/video_core/pica.cpp @@ -500,7 +500,7 @@ void Init() { } void Shutdown() { - Shader::Shutdown(); + Shader::ClearCache(); } template diff --git a/src/video_core/pica.h b/src/video_core/pica.h index 5891fb72a..86c0a0096 100644 --- a/src/video_core/pica.h +++ b/src/video_core/pica.h @@ -70,7 +70,7 @@ struct Regs { INSERT_PADDING_WORDS(0x9); BitField<0, 24, u32> viewport_depth_range; // float24 - BitField<0, 24, u32> viewport_depth_far_plane; // float24 + BitField<0, 24, u32> viewport_depth_near_plane; // float24 BitField<0, 3, u32> vs_output_total; @@ -122,9 +122,31 @@ struct Regs { BitField<16, 10, s32> y; } viewport_corner; - INSERT_PADDING_WORDS(0x17); + INSERT_PADDING_WORDS(0x1); + + //TODO: early depth + INSERT_PADDING_WORDS(0x1); + + INSERT_PADDING_WORDS(0x2); + + enum DepthBuffering : u32 { + WBuffering = 0, + ZBuffering = 1, + }; + BitField< 0, 1, DepthBuffering> depthmap_enable; + + INSERT_PADDING_WORDS(0x12); struct TextureConfig { + enum TextureType : u32 { + Texture2D = 0, + TextureCube = 1, + Shadow2D = 2, + Projection2D = 3, + ShadowCube = 4, + Disabled = 5, + }; + enum WrapMode : u32 { ClampToEdge = 0, ClampToBorder = 1, @@ -155,6 +177,7 @@ struct Regs { BitField< 2, 1, TextureFilter> min_filter; BitField< 8, 2, WrapMode> wrap_t; BitField<12, 2, WrapMode> wrap_s; + BitField<28, 2, TextureType> type; ///< @note Only valid for texture 0 according to 3DBrew. }; INSERT_PADDING_WORDS(0x1); @@ -1279,10 +1302,11 @@ ASSERT_REG_POSITION(cull_mode, 0x40); ASSERT_REG_POSITION(viewport_size_x, 0x41); ASSERT_REG_POSITION(viewport_size_y, 0x43); ASSERT_REG_POSITION(viewport_depth_range, 0x4d); -ASSERT_REG_POSITION(viewport_depth_far_plane, 0x4e); +ASSERT_REG_POSITION(viewport_depth_near_plane, 0x4e); ASSERT_REG_POSITION(vs_output_attributes[0], 0x50); ASSERT_REG_POSITION(vs_output_attributes[1], 0x51); ASSERT_REG_POSITION(viewport_corner, 0x68); +ASSERT_REG_POSITION(depthmap_enable, 0x6D); ASSERT_REG_POSITION(texture0_enable, 0x80); ASSERT_REG_POSITION(texture0, 0x81); ASSERT_REG_POSITION(texture0_format, 0x8e); diff --git a/src/video_core/pica_state.h b/src/video_core/pica_state.h index bbecad850..1059c6ae4 100644 --- a/src/video_core/pica_state.h +++ b/src/video_core/pica_state.h @@ -56,7 +56,7 @@ struct State { // Used to buffer partial vertices for immediate-mode rendering. Shader::InputVertex input_vertex; // Index of the next attribute to be loaded into `input_vertex`. - int current_attribute = 0; + u32 current_attribute = 0; } immediate; // This is constructed with a dummy triangle topology diff --git a/src/video_core/rasterizer.cpp b/src/video_core/rasterizer.cpp index df67b9081..65168f05a 100644 --- a/src/video_core/rasterizer.cpp +++ b/src/video_core/rasterizer.cpp @@ -442,8 +442,33 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, DEBUG_ASSERT(0 != texture.config.address); - int s = (int)(uv[i].u() * float24::FromFloat32(static_cast(texture.config.width))).ToFloat32(); - int t = (int)(uv[i].v() * float24::FromFloat32(static_cast(texture.config.height))).ToFloat32(); + float24 u = uv[i].u(); + float24 v = uv[i].v(); + + // Only unit 0 respects the texturing type (according to 3DBrew) + // TODO: Refactor so cubemaps and shadowmaps can be handled + if (i == 0) { + switch(texture.config.type) { + case Regs::TextureConfig::Texture2D: + break; + case Regs::TextureConfig::Projection2D: { + auto tc0_w = GetInterpolatedAttribute(v0.tc0_w, v1.tc0_w, v2.tc0_w); + u /= tc0_w; + v /= tc0_w; + break; + } + default: + // TODO: Change to LOG_ERROR when more types are handled. + LOG_DEBUG(HW_GPU, "Unhandled texture type %x", (int)texture.config.type); + UNIMPLEMENTED(); + break; + } + } + + int s = (int)(u * float24::FromFloat32(static_cast(texture.config.width))).ToFloat32(); + int t = (int)(v * float24::FromFloat32(static_cast(texture.config.height))).ToFloat32(); + + static auto GetWrappedTexCoord = [](Regs::TextureConfig::WrapMode mode, int val, unsigned size) { switch (mode) { case Regs::TextureConfig::ClampToEdge: @@ -862,10 +887,30 @@ static void ProcessTriangleInternal(const Shader::OutputVertex& v0, } } + // interpolated_z = z / w + float interpolated_z_over_w = (v0.screenpos[2].ToFloat32() * w0 + + v1.screenpos[2].ToFloat32() * w1 + + v2.screenpos[2].ToFloat32() * w2) / wsum; + + // Not fully accurate. About 3 bits in precision are missing. + // Z-Buffer (z / w * scale + offset) + float depth_scale = float24::FromRaw(regs.viewport_depth_range).ToFloat32(); + float depth_offset = float24::FromRaw(regs.viewport_depth_near_plane).ToFloat32(); + float depth = interpolated_z_over_w * depth_scale + depth_offset; + + // Potentially switch to W-Buffer + if (regs.depthmap_enable == Pica::Regs::DepthBuffering::WBuffering) { + + // W-Buffer (z * scale + w * offset = (z / w * scale + offset) * w) + depth *= interpolated_w_inverse.ToFloat32() * wsum; + } + + // Clamp the result + depth = MathUtil::Clamp(depth, 0.0f, 1.0f); + + // Convert float to integer unsigned num_bits = Regs::DepthBitsPerPixel(regs.framebuffer.depth_format); - u32 z = (u32)((v0.screenpos[2].ToFloat32() * w0 + - v1.screenpos[2].ToFloat32() * w1 + - v2.screenpos[2].ToFloat32() * w2) * ((1 << num_bits) - 1) / wsum); + u32 z = (u32)(depth * ((1 << num_bits) - 1)); if (output_merger.depth_test_enable) { u32 ref_z = GetDepth(x >> 4, y >> 4); diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp index 519d81aeb..ed2e2f3ae 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.cpp +++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp @@ -76,6 +76,9 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD1); glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD2); + glVertexAttribPointer(GLShader::ATTRIBUTE_TEXCOORD0_W, 1, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, tex_coord0_w)); + glEnableVertexAttribArray(GLShader::ATTRIBUTE_TEXCOORD0_W); + glVertexAttribPointer(GLShader::ATTRIBUTE_NORMQUAT, 4, GL_FLOAT, GL_FALSE, sizeof(HardwareVertex), (GLvoid*)offsetof(HardwareVertex, normquat)); glEnableVertexAttribArray(GLShader::ATTRIBUTE_NORMQUAT); @@ -93,7 +96,7 @@ RasterizerOpenGL::RasterizerOpenGL() : shader_dirty(true) { state.Apply(); for (size_t i = 0; i < lighting_luts.size(); ++i) { - glActiveTexture(GL_TEXTURE3 + i); + glActiveTexture(static_cast(GL_TEXTURE3 + i)); glTexImage1D(GL_TEXTURE_1D, 0, GL_RGBA32F, 256, 0, GL_RGBA, GL_FLOAT, nullptr); glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); @@ -256,10 +259,15 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { // Depth modifiers case PICA_REG_INDEX(viewport_depth_range): - case PICA_REG_INDEX(viewport_depth_far_plane): + case PICA_REG_INDEX(viewport_depth_near_plane): SyncDepthModifiers(); break; + // Depth buffering + case PICA_REG_INDEX(depthmap_enable): + shader_dirty = true; + break; + // Blending case PICA_REG_INDEX(output_merger.alphablend_enable): SyncBlendEnabled(); @@ -314,6 +322,11 @@ void RasterizerOpenGL::NotifyPicaRegisterChanged(u32 id) { SyncLogicOp(); break; + // Texture 0 type + case PICA_REG_INDEX(texture0.type): + shader_dirty = true; + break; + // TEV stages case PICA_REG_INDEX(tev_stage0.color_source1): case PICA_REG_INDEX(tev_stage0.color_modifier1): @@ -910,10 +923,10 @@ void RasterizerOpenGL::SyncCullMode() { } void RasterizerOpenGL::SyncDepthModifiers() { - float depth_scale = -Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32(); - float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_far_plane).ToFloat32() / 2.0f; + float depth_scale = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_range).ToFloat32(); + float depth_offset = Pica::float24::FromRaw(Pica::g_state.regs.viewport_depth_near_plane).ToFloat32(); - // TODO: Implement scale modifier + uniform_block_data.data.depth_scale = depth_scale; uniform_block_data.data.depth_offset = depth_offset; uniform_block_data.dirty = true; } diff --git a/src/video_core/renderer_opengl/gl_rasterizer.h b/src/video_core/renderer_opengl/gl_rasterizer.h index 63ff7716d..eed00011a 100644 --- a/src/video_core/renderer_opengl/gl_rasterizer.h +++ b/src/video_core/renderer_opengl/gl_rasterizer.h @@ -39,139 +39,185 @@ struct ScreenInfo; * directly accessing Pica registers. This should reduce the risk of bugs in shader generation where * Pica state is not being captured in the shader cache key, thereby resulting in (what should be) * two separate shaders sharing the same key. + * + * We use a union because "implicitly-defined copy/move constructor for a union X copies the object representation of X." + * and "implicitly-defined copy assignment operator for a union X copies the object representation (3.9) of X." + * = Bytewise copy instead of memberwise copy. + * This is important because the padding bytes are included in the hash and comparison between objects. */ -struct PicaShaderConfig { +union PicaShaderConfig { + /// Construct a PicaShaderConfig with the current Pica register configuration. static PicaShaderConfig CurrentConfig() { PicaShaderConfig res; + + auto& state = res.state; + std::memset(&state, 0, sizeof(PicaShaderConfig::State)); + const auto& regs = Pica::g_state.regs; - res.alpha_test_func = regs.output_merger.alpha_test.enable ? + state.depthmap_enable = regs.depthmap_enable; + + state.alpha_test_func = regs.output_merger.alpha_test.enable ? regs.output_merger.alpha_test.func.Value() : Pica::Regs::CompareFunc::Always; - // Copy tev stages + state.texture0_type = regs.texture0.type; + + // Copy relevant tev stages fields. + // We don't sync const_color here because of the high variance, it is a + // shader uniform instead. const auto& tev_stages = regs.GetTevStages(); - DEBUG_ASSERT(res.tev_stages.size() == tev_stages.size()); + DEBUG_ASSERT(state.tev_stages.size() == tev_stages.size()); for (size_t i = 0; i < tev_stages.size(); i++) { const auto& tev_stage = tev_stages[i]; - res.tev_stages[i].sources_raw = tev_stage.sources_raw; - res.tev_stages[i].modifiers_raw = tev_stage.modifiers_raw; - res.tev_stages[i].ops_raw = tev_stage.ops_raw; - res.tev_stages[i].const_color = tev_stage.const_color; - res.tev_stages[i].scales_raw = tev_stage.scales_raw; + state.tev_stages[i].sources_raw = tev_stage.sources_raw; + state.tev_stages[i].modifiers_raw = tev_stage.modifiers_raw; + state.tev_stages[i].ops_raw = tev_stage.ops_raw; + state.tev_stages[i].scales_raw = tev_stage.scales_raw; } - res.combiner_buffer_input = + state.combiner_buffer_input = regs.tev_combiner_buffer_input.update_mask_rgb.Value() | regs.tev_combiner_buffer_input.update_mask_a.Value() << 4; // Fragment lighting - res.lighting.enable = !regs.lighting.disable; - res.lighting.src_num = regs.lighting.num_lights + 1; + state.lighting.enable = !regs.lighting.disable; + state.lighting.src_num = regs.lighting.num_lights + 1; - for (unsigned light_index = 0; light_index < res.lighting.src_num; ++light_index) { + for (unsigned light_index = 0; light_index < state.lighting.src_num; ++light_index) { unsigned num = regs.lighting.light_enable.GetNum(light_index); const auto& light = regs.lighting.light[num]; - res.lighting.light[light_index].num = num; - res.lighting.light[light_index].directional = light.directional != 0; - res.lighting.light[light_index].two_sided_diffuse = light.two_sided_diffuse != 0; - res.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num); - res.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRaw(light.dist_atten_bias).ToFloat32(); - res.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRaw(light.dist_atten_scale).ToFloat32(); + state.lighting.light[light_index].num = num; + state.lighting.light[light_index].directional = light.directional != 0; + state.lighting.light[light_index].two_sided_diffuse = light.two_sided_diffuse != 0; + state.lighting.light[light_index].dist_atten_enable = !regs.lighting.IsDistAttenDisabled(num); + state.lighting.light[light_index].dist_atten_bias = Pica::float20::FromRaw(light.dist_atten_bias).ToFloat32(); + state.lighting.light[light_index].dist_atten_scale = Pica::float20::FromRaw(light.dist_atten_scale).ToFloat32(); } - res.lighting.lut_d0.enable = regs.lighting.disable_lut_d0 == 0; - res.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0; - res.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value(); - res.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0); + state.lighting.lut_d0.enable = regs.lighting.disable_lut_d0 == 0; + state.lighting.lut_d0.abs_input = regs.lighting.abs_lut_input.disable_d0 == 0; + state.lighting.lut_d0.type = regs.lighting.lut_input.d0.Value(); + state.lighting.lut_d0.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d0); - res.lighting.lut_d1.enable = regs.lighting.disable_lut_d1 == 0; - res.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0; - res.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value(); - res.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1); + state.lighting.lut_d1.enable = regs.lighting.disable_lut_d1 == 0; + state.lighting.lut_d1.abs_input = regs.lighting.abs_lut_input.disable_d1 == 0; + state.lighting.lut_d1.type = regs.lighting.lut_input.d1.Value(); + state.lighting.lut_d1.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.d1); - res.lighting.lut_fr.enable = regs.lighting.disable_lut_fr == 0; - res.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0; - res.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value(); - res.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr); + state.lighting.lut_fr.enable = regs.lighting.disable_lut_fr == 0; + state.lighting.lut_fr.abs_input = regs.lighting.abs_lut_input.disable_fr == 0; + state.lighting.lut_fr.type = regs.lighting.lut_input.fr.Value(); + state.lighting.lut_fr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.fr); - res.lighting.lut_rr.enable = regs.lighting.disable_lut_rr == 0; - res.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0; - res.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value(); - res.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr); + state.lighting.lut_rr.enable = regs.lighting.disable_lut_rr == 0; + state.lighting.lut_rr.abs_input = regs.lighting.abs_lut_input.disable_rr == 0; + state.lighting.lut_rr.type = regs.lighting.lut_input.rr.Value(); + state.lighting.lut_rr.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rr); - res.lighting.lut_rg.enable = regs.lighting.disable_lut_rg == 0; - res.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0; - res.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value(); - res.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg); + state.lighting.lut_rg.enable = regs.lighting.disable_lut_rg == 0; + state.lighting.lut_rg.abs_input = regs.lighting.abs_lut_input.disable_rg == 0; + state.lighting.lut_rg.type = regs.lighting.lut_input.rg.Value(); + state.lighting.lut_rg.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rg); - res.lighting.lut_rb.enable = regs.lighting.disable_lut_rb == 0; - res.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0; - res.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value(); - res.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb); + state.lighting.lut_rb.enable = regs.lighting.disable_lut_rb == 0; + state.lighting.lut_rb.abs_input = regs.lighting.abs_lut_input.disable_rb == 0; + state.lighting.lut_rb.type = regs.lighting.lut_input.rb.Value(); + state.lighting.lut_rb.scale = regs.lighting.lut_scale.GetScale(regs.lighting.lut_scale.rb); - res.lighting.config = regs.lighting.config; - res.lighting.fresnel_selector = regs.lighting.fresnel_selector; - res.lighting.bump_mode = regs.lighting.bump_mode; - res.lighting.bump_selector = regs.lighting.bump_selector; - res.lighting.bump_renorm = regs.lighting.disable_bump_renorm == 0; - res.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0; + state.lighting.config = regs.lighting.config; + state.lighting.fresnel_selector = regs.lighting.fresnel_selector; + state.lighting.bump_mode = regs.lighting.bump_mode; + state.lighting.bump_selector = regs.lighting.bump_selector; + state.lighting.bump_renorm = regs.lighting.disable_bump_renorm == 0; + state.lighting.clamp_highlights = regs.lighting.clamp_highlights != 0; return res; } bool TevStageUpdatesCombinerBufferColor(unsigned stage_index) const { - return (stage_index < 4) && (combiner_buffer_input & (1 << stage_index)); + return (stage_index < 4) && (state.combiner_buffer_input & (1 << stage_index)); } bool TevStageUpdatesCombinerBufferAlpha(unsigned stage_index) const { - return (stage_index < 4) && ((combiner_buffer_input >> 4) & (1 << stage_index)); + return (stage_index < 4) && ((state.combiner_buffer_input >> 4) & (1 << stage_index)); } bool operator ==(const PicaShaderConfig& o) const { - return std::memcmp(this, &o, sizeof(PicaShaderConfig)) == 0; + return std::memcmp(&state, &o.state, sizeof(PicaShaderConfig::State)) == 0; }; - Pica::Regs::CompareFunc alpha_test_func = Pica::Regs::CompareFunc::Never; - std::array tev_stages = {}; - u8 combiner_buffer_input = 0; + // NOTE: MSVC15 (Update 2) doesn't think `delete`'d constructors and operators are TC. + // This makes BitField not TC when used in a union or struct so we have to resort + // to this ugly hack. + // Once that bug is fixed we can use Pica::Regs::TevStageConfig here. + // Doesn't include const_color because we don't sync it, see comment in CurrentConfig() + struct TevStageConfigRaw { + u32 sources_raw; + u32 modifiers_raw; + u32 ops_raw; + u32 scales_raw; + explicit operator Pica::Regs::TevStageConfig() const noexcept { + Pica::Regs::TevStageConfig stage; + stage.sources_raw = sources_raw; + stage.modifiers_raw = modifiers_raw; + stage.ops_raw = ops_raw; + stage.const_color = 0; + stage.scales_raw = scales_raw; + return stage; + } + }; - struct { - struct { - unsigned num = 0; - bool directional = false; - bool two_sided_diffuse = false; - bool dist_atten_enable = false; - GLfloat dist_atten_scale = 0.0f; - GLfloat dist_atten_bias = 0.0f; - } light[8]; + struct State { - bool enable = false; - unsigned src_num = 0; - Pica::Regs::LightingBumpMode bump_mode = Pica::Regs::LightingBumpMode::None; - unsigned bump_selector = 0; - bool bump_renorm = false; - bool clamp_highlights = false; + Pica::Regs::CompareFunc alpha_test_func; + Pica::Regs::TextureConfig::TextureType texture0_type; + std::array tev_stages; + u8 combiner_buffer_input; - Pica::Regs::LightingConfig config = Pica::Regs::LightingConfig::Config0; - Pica::Regs::LightingFresnelSelector fresnel_selector = Pica::Regs::LightingFresnelSelector::None; + Pica::Regs::DepthBuffering depthmap_enable; struct { - bool enable = false; - bool abs_input = false; - Pica::Regs::LightingLutInput type = Pica::Regs::LightingLutInput::NH; - float scale = 1.0f; - } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb; - } lighting; + struct { + unsigned num; + bool directional; + bool two_sided_diffuse; + bool dist_atten_enable; + GLfloat dist_atten_scale; + GLfloat dist_atten_bias; + } light[8]; + + bool enable; + unsigned src_num; + Pica::Regs::LightingBumpMode bump_mode; + unsigned bump_selector; + bool bump_renorm; + bool clamp_highlights; + + Pica::Regs::LightingConfig config; + Pica::Regs::LightingFresnelSelector fresnel_selector; + + struct { + bool enable; + bool abs_input; + Pica::Regs::LightingLutInput type; + float scale; + } lut_d0, lut_d1, lut_fr, lut_rr, lut_rg, lut_rb; + } lighting; + + } state; }; +#if (__GNUC__ >= 5) || defined(__clang__) || defined(_MSC_VER) +static_assert(std::is_trivially_copyable::value, "PicaShaderConfig::State must be trivially copyable"); +#endif namespace std { template <> struct hash { size_t operator()(const PicaShaderConfig& k) const { - return Common::ComputeHash64(&k, sizeof(PicaShaderConfig)); + return Common::ComputeHash64(&k.state, sizeof(PicaShaderConfig::State)); } }; @@ -238,6 +284,7 @@ private: tex_coord1[1] = v.tc1.y.ToFloat32(); tex_coord2[0] = v.tc2.x.ToFloat32(); tex_coord2[1] = v.tc2.y.ToFloat32(); + tex_coord0_w = v.tc0_w.ToFloat32(); normquat[0] = v.quat.x.ToFloat32(); normquat[1] = v.quat.y.ToFloat32(); normquat[2] = v.quat.z.ToFloat32(); @@ -258,6 +305,7 @@ private: GLfloat tex_coord0[2]; GLfloat tex_coord1[2]; GLfloat tex_coord2[2]; + GLfloat tex_coord0_w; GLfloat normquat[4]; GLfloat view[3]; }; @@ -276,6 +324,7 @@ private: GLvec4 const_color[6]; GLvec4 tev_combiner_buffer_color; GLint alphatest_ref; + GLfloat depth_scale; GLfloat depth_offset; alignas(16) GLvec3 lighting_global_ambient; LightSrc light_src[8]; diff --git a/src/video_core/renderer_opengl/gl_shader_gen.cpp b/src/video_core/renderer_opengl/gl_shader_gen.cpp index 9011caa39..71d60e69c 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.cpp +++ b/src/video_core/renderer_opengl/gl_shader_gen.cpp @@ -32,8 +32,9 @@ static bool IsPassThroughTevStage(const TevStageConfig& stage) { } /// Writes the specified TEV stage source component(s) -static void AppendSource(std::string& out, TevStageConfig::Source source, +static void AppendSource(std::string& out, const PicaShaderConfig& config, TevStageConfig::Source source, const std::string& index_name) { + const auto& state = config.state; using Source = TevStageConfig::Source; switch (source) { case Source::PrimaryColor: @@ -46,7 +47,20 @@ static void AppendSource(std::string& out, TevStageConfig::Source source, out += "secondary_fragment_color"; break; case Source::Texture0: - out += "texture(tex[0], texcoord[0])"; + // Only unit 0 respects the texturing type (according to 3DBrew) + switch(state.texture0_type) { + case Pica::Regs::TextureConfig::Texture2D: + out += "texture(tex[0], texcoord[0])"; + break; + case Pica::Regs::TextureConfig::Projection2D: + out += "textureProj(tex[0], vec3(texcoord[0], texcoord0_w))"; + break; + default: + out += "texture(tex[0], texcoord[0])"; + LOG_CRITICAL(HW_GPU, "Unhandled texture type %x", static_cast(state.texture0_type)); + UNIMPLEMENTED(); + break; + } break; case Source::Texture1: out += "texture(tex[1], texcoord[1])"; @@ -71,53 +85,53 @@ static void AppendSource(std::string& out, TevStageConfig::Source source, } /// Writes the color components to use for the specified TEV stage color modifier -static void AppendColorModifier(std::string& out, TevStageConfig::ColorModifier modifier, +static void AppendColorModifier(std::string& out, const PicaShaderConfig& config, TevStageConfig::ColorModifier modifier, TevStageConfig::Source source, const std::string& index_name) { using ColorModifier = TevStageConfig::ColorModifier; switch (modifier) { case ColorModifier::SourceColor: - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".rgb"; break; case ColorModifier::OneMinusSourceColor: out += "vec3(1.0) - "; - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".rgb"; break; case ColorModifier::SourceAlpha: - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".aaa"; break; case ColorModifier::OneMinusSourceAlpha: out += "vec3(1.0) - "; - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".aaa"; break; case ColorModifier::SourceRed: - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".rrr"; break; case ColorModifier::OneMinusSourceRed: out += "vec3(1.0) - "; - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".rrr"; break; case ColorModifier::SourceGreen: - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".ggg"; break; case ColorModifier::OneMinusSourceGreen: out += "vec3(1.0) - "; - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".ggg"; break; case ColorModifier::SourceBlue: - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".bbb"; break; case ColorModifier::OneMinusSourceBlue: out += "vec3(1.0) - "; - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".bbb"; break; default: @@ -128,44 +142,44 @@ static void AppendColorModifier(std::string& out, TevStageConfig::ColorModifier } /// Writes the alpha component to use for the specified TEV stage alpha modifier -static void AppendAlphaModifier(std::string& out, TevStageConfig::AlphaModifier modifier, +static void AppendAlphaModifier(std::string& out, const PicaShaderConfig& config, TevStageConfig::AlphaModifier modifier, TevStageConfig::Source source, const std::string& index_name) { using AlphaModifier = TevStageConfig::AlphaModifier; switch (modifier) { case AlphaModifier::SourceAlpha: - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".a"; break; case AlphaModifier::OneMinusSourceAlpha: out += "1.0 - "; - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".a"; break; case AlphaModifier::SourceRed: - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".r"; break; case AlphaModifier::OneMinusSourceRed: out += "1.0 - "; - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".r"; break; case AlphaModifier::SourceGreen: - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".g"; break; case AlphaModifier::OneMinusSourceGreen: out += "1.0 - "; - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".g"; break; case AlphaModifier::SourceBlue: - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".b"; break; case AlphaModifier::OneMinusSourceBlue: out += "1.0 - "; - AppendSource(out, source, index_name); + AppendSource(out, config, source, index_name); out += ".b"; break; default: @@ -287,16 +301,16 @@ static void AppendAlphaTestCondition(std::string& out, Regs::CompareFunc func) { /// Writes the code to emulate the specified TEV stage static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsigned index) { - auto& stage = config.tev_stages[index]; + const auto stage = static_cast(config.state.tev_stages[index]); if (!IsPassThroughTevStage(stage)) { std::string index_name = std::to_string(index); out += "vec3 color_results_" + index_name + "[3] = vec3[3]("; - AppendColorModifier(out, stage.color_modifier1, stage.color_source1, index_name); + AppendColorModifier(out, config, stage.color_modifier1, stage.color_source1, index_name); out += ", "; - AppendColorModifier(out, stage.color_modifier2, stage.color_source2, index_name); + AppendColorModifier(out, config, stage.color_modifier2, stage.color_source2, index_name); out += ", "; - AppendColorModifier(out, stage.color_modifier3, stage.color_source3, index_name); + AppendColorModifier(out, config, stage.color_modifier3, stage.color_source3, index_name); out += ");\n"; out += "vec3 color_output_" + index_name + " = "; @@ -304,11 +318,11 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi out += ";\n"; out += "float alpha_results_" + index_name + "[3] = float[3]("; - AppendAlphaModifier(out, stage.alpha_modifier1, stage.alpha_source1, index_name); + AppendAlphaModifier(out, config, stage.alpha_modifier1, stage.alpha_source1, index_name); out += ", "; - AppendAlphaModifier(out, stage.alpha_modifier2, stage.alpha_source2, index_name); + AppendAlphaModifier(out, config, stage.alpha_modifier2, stage.alpha_source2, index_name); out += ", "; - AppendAlphaModifier(out, stage.alpha_modifier3, stage.alpha_source3, index_name); + AppendAlphaModifier(out, config, stage.alpha_modifier3, stage.alpha_source3, index_name); out += ");\n"; out += "float alpha_output_" + index_name + " = "; @@ -331,6 +345,8 @@ static void WriteTevStage(std::string& out, const PicaShaderConfig& config, unsi /// Writes the code to emulate fragment lighting static void WriteLighting(std::string& out, const PicaShaderConfig& config) { + const auto& lighting = config.state.lighting; + // Define lighting globals out += "vec4 diffuse_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" "vec4 specular_sum = vec4(0.0, 0.0, 0.0, 1.0);\n" @@ -338,17 +354,17 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { "vec3 refl_value = vec3(0.0);\n"; // Compute fragment normals - if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) { + if (lighting.bump_mode == Pica::Regs::LightingBumpMode::NormalMap) { // Bump mapping is enabled using a normal map, read perturbation vector from the selected texture - std::string bump_selector = std::to_string(config.lighting.bump_selector); + std::string bump_selector = std::to_string(lighting.bump_selector); out += "vec3 surface_normal = 2.0 * texture(tex[" + bump_selector + "], texcoord[" + bump_selector + "]).rgb - 1.0;\n"; // Recompute Z-component of perturbation if 'renorm' is enabled, this provides a higher precision result - if (config.lighting.bump_renorm) { + if (lighting.bump_renorm) { std::string val = "(1.0 - (surface_normal.x*surface_normal.x + surface_normal.y*surface_normal.y))"; out += "surface_normal.z = sqrt(max(" + val + ", 0.0));\n"; } - } else if (config.lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) { + } else if (lighting.bump_mode == Pica::Regs::LightingBumpMode::TangentMap) { // Bump mapping is enabled using a tangent map LOG_CRITICAL(HW_GPU, "unimplemented bump mapping mode (tangent mapping)"); UNIMPLEMENTED(); @@ -361,7 +377,7 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { out += "vec3 normal = normalize(quaternion_rotate(normquat, surface_normal));\n"; // Gets the index into the specified lookup table for specular lighting - auto GetLutIndex = [config](unsigned light_num, Regs::LightingLutInput input, bool abs) { + auto GetLutIndex = [&lighting](unsigned light_num, Regs::LightingLutInput input, bool abs) { const std::string half_angle = "normalize(normalize(view) + light_vector)"; std::string index; switch (input) { @@ -389,7 +405,7 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { if (abs) { // LUT index is in the range of (0.0, 1.0) - index = config.lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)"; + index = lighting.light[light_num].two_sided_diffuse ? "abs(" + index + ")" : "max(" + index + ", 0.f)"; return "(FLOAT_255 * clamp(" + index + ", 0.0, 1.0))"; } else { // LUT index is in the range of (-1.0, 1.0) @@ -407,8 +423,8 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { }; // Write the code to emulate each enabled light - for (unsigned light_index = 0; light_index < config.lighting.src_num; ++light_index) { - const auto& light_config = config.lighting.light[light_index]; + for (unsigned light_index = 0; light_index < lighting.src_num; ++light_index) { + const auto& light_config = lighting.light[light_index]; std::string light_src = "light_src[" + std::to_string(light_config.num) + "]"; // Compute light vector (directional or positional) @@ -432,39 +448,39 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { } // If enabled, clamp specular component if lighting result is negative - std::string clamp_highlights = config.lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0"; + std::string clamp_highlights = lighting.clamp_highlights ? "(dot(light_vector, normal) <= 0.0 ? 0.0 : 1.0)" : "1.0"; // Specular 0 component std::string d0_lut_value = "1.0"; - if (config.lighting.lut_d0.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution0)) { + if (lighting.lut_d0.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Distribution0)) { // Lookup specular "distribution 0" LUT value - std::string index = GetLutIndex(light_config.num, config.lighting.lut_d0.type, config.lighting.lut_d0.abs_input); - d0_lut_value = "(" + std::to_string(config.lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, index) + ")"; + std::string index = GetLutIndex(light_config.num, lighting.lut_d0.type, lighting.lut_d0.abs_input); + d0_lut_value = "(" + std::to_string(lighting.lut_d0.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution0, index) + ")"; } std::string specular_0 = "(" + d0_lut_value + " * " + light_src + ".specular_0)"; // If enabled, lookup ReflectRed value, otherwise, 1.0 is used - if (config.lighting.lut_rr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectRed)) { - std::string index = GetLutIndex(light_config.num, config.lighting.lut_rr.type, config.lighting.lut_rr.abs_input); - std::string value = "(" + std::to_string(config.lighting.lut_rr.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")"; + if (lighting.lut_rr.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectRed)) { + std::string index = GetLutIndex(light_config.num, lighting.lut_rr.type, lighting.lut_rr.abs_input); + std::string value = "(" + std::to_string(lighting.lut_rr.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectRed, index) + ")"; out += "refl_value.r = " + value + ";\n"; } else { out += "refl_value.r = 1.0;\n"; } // If enabled, lookup ReflectGreen value, otherwise, ReflectRed value is used - if (config.lighting.lut_rg.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectGreen)) { - std::string index = GetLutIndex(light_config.num, config.lighting.lut_rg.type, config.lighting.lut_rg.abs_input); - std::string value = "(" + std::to_string(config.lighting.lut_rg.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")"; + if (lighting.lut_rg.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectGreen)) { + std::string index = GetLutIndex(light_config.num, lighting.lut_rg.type, lighting.lut_rg.abs_input); + std::string value = "(" + std::to_string(lighting.lut_rg.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectGreen, index) + ")"; out += "refl_value.g = " + value + ";\n"; } else { out += "refl_value.g = refl_value.r;\n"; } // If enabled, lookup ReflectBlue value, otherwise, ReflectRed value is used - if (config.lighting.lut_rb.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::ReflectBlue)) { - std::string index = GetLutIndex(light_config.num, config.lighting.lut_rb.type, config.lighting.lut_rb.abs_input); - std::string value = "(" + std::to_string(config.lighting.lut_rb.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")"; + if (lighting.lut_rb.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::ReflectBlue)) { + std::string index = GetLutIndex(light_config.num, lighting.lut_rb.type, lighting.lut_rb.abs_input); + std::string value = "(" + std::to_string(lighting.lut_rb.scale) + " * " + GetLutValue(Regs::LightingSampler::ReflectBlue, index) + ")"; out += "refl_value.b = " + value + ";\n"; } else { out += "refl_value.b = refl_value.r;\n"; @@ -472,27 +488,27 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { // Specular 1 component std::string d1_lut_value = "1.0"; - if (config.lighting.lut_d1.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Distribution1)) { + if (lighting.lut_d1.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Distribution1)) { // Lookup specular "distribution 1" LUT value - std::string index = GetLutIndex(light_config.num, config.lighting.lut_d1.type, config.lighting.lut_d1.abs_input); - d1_lut_value = "(" + std::to_string(config.lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, index) + ")"; + std::string index = GetLutIndex(light_config.num, lighting.lut_d1.type, lighting.lut_d1.abs_input); + d1_lut_value = "(" + std::to_string(lighting.lut_d1.scale) + " * " + GetLutValue(Regs::LightingSampler::Distribution1, index) + ")"; } std::string specular_1 = "(" + d1_lut_value + " * refl_value * " + light_src + ".specular_1)"; // Fresnel - if (config.lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(config.lighting.config, Pica::Regs::LightingSampler::Fresnel)) { + if (lighting.lut_fr.enable && Pica::Regs::IsLightingSamplerSupported(lighting.config, Pica::Regs::LightingSampler::Fresnel)) { // Lookup fresnel LUT value - std::string index = GetLutIndex(light_config.num, config.lighting.lut_fr.type, config.lighting.lut_fr.abs_input); - std::string value = "(" + std::to_string(config.lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, index) + ")"; + std::string index = GetLutIndex(light_config.num, lighting.lut_fr.type, lighting.lut_fr.abs_input); + std::string value = "(" + std::to_string(lighting.lut_fr.scale) + " * " + GetLutValue(Regs::LightingSampler::Fresnel, index) + ")"; // Enabled for difffuse lighting alpha component - if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha || - config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) + if (lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::PrimaryAlpha || + lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) out += "diffuse_sum.a *= " + value + ";\n"; // Enabled for the specular lighting alpha component - if (config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::SecondaryAlpha || - config.lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) + if (lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::SecondaryAlpha || + lighting.fresnel_selector == Pica::Regs::LightingFresnelSelector::Both) out += "specular_sum.a *= " + value + ";\n"; } @@ -510,6 +526,8 @@ static void WriteLighting(std::string& out, const PicaShaderConfig& config) { } std::string GenerateFragmentShader(const PicaShaderConfig& config) { + const auto& state = config.state; + std::string out = R"( #version 330 core #define NUM_TEV_STAGES 6 @@ -519,6 +537,7 @@ std::string GenerateFragmentShader(const PicaShaderConfig& config) { in vec4 primary_color; in vec2 texcoord[3]; +in float texcoord0_w; in vec4 normquat; in vec3 view; @@ -536,6 +555,7 @@ layout (std140) uniform shader_data { vec4 const_color[NUM_TEV_STAGES]; vec4 tev_combiner_buffer_color; int alphatest_ref; + float depth_scale; float depth_offset; vec3 lighting_global_ambient; LightSrc light_src[NUM_LIGHTS]; @@ -555,29 +575,37 @@ vec4 secondary_fragment_color = vec4(0.0); )"; // Do not do any sort of processing if it's obvious we're not going to pass the alpha test - if (config.alpha_test_func == Regs::CompareFunc::Never) { + if (state.alpha_test_func == Regs::CompareFunc::Never) { out += "discard; }"; return out; } - if (config.lighting.enable) + if (state.lighting.enable) WriteLighting(out, config); out += "vec4 combiner_buffer = vec4(0.0);\n"; out += "vec4 next_combiner_buffer = tev_combiner_buffer_color;\n"; out += "vec4 last_tex_env_out = vec4(0.0);\n"; - for (size_t index = 0; index < config.tev_stages.size(); ++index) + for (size_t index = 0; index < state.tev_stages.size(); ++index) WriteTevStage(out, config, (unsigned)index); - if (config.alpha_test_func != Regs::CompareFunc::Always) { + if (state.alpha_test_func != Regs::CompareFunc::Always) { out += "if ("; - AppendAlphaTestCondition(out, config.alpha_test_func); + AppendAlphaTestCondition(out, state.alpha_test_func); out += ") discard;\n"; } out += "color = last_tex_env_out;\n"; - out += "gl_FragDepth = gl_FragCoord.z + depth_offset;\n}"; + + out += "float z_over_w = 1.0 - gl_FragCoord.z * 2.0;\n"; + out += "float depth = z_over_w * depth_scale + depth_offset;\n"; + if (state.depthmap_enable == Pica::Regs::DepthBuffering::WBuffering) { + out += "depth /= gl_FragCoord.w;\n"; + } + out += "gl_FragDepth = depth;\n"; + + out += "}"; return out; } @@ -585,17 +613,19 @@ vec4 secondary_fragment_color = vec4(0.0); std::string GenerateVertexShader() { std::string out = "#version 330 core\n"; - out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + ") in vec4 vert_position;\n"; - out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n"; - out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + ") in vec2 vert_texcoord0;\n"; - out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + ") in vec2 vert_texcoord1;\n"; - out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + ") in vec2 vert_texcoord2;\n"; - out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT) + ") in vec4 vert_normquat;\n"; - out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_POSITION) + ") in vec4 vert_position;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_COLOR) + ") in vec4 vert_color;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0) + ") in vec2 vert_texcoord0;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD1) + ") in vec2 vert_texcoord1;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD2) + ") in vec2 vert_texcoord2;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_TEXCOORD0_W) + ") in float vert_texcoord0_w;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_NORMQUAT) + ") in vec4 vert_normquat;\n"; + out += "layout(location = " + std::to_string((int)ATTRIBUTE_VIEW) + ") in vec3 vert_view;\n"; out += R"( out vec4 primary_color; out vec2 texcoord[3]; +out float texcoord0_w; out vec4 normquat; out vec3 view; @@ -604,6 +634,7 @@ void main() { texcoord[0] = vert_texcoord0; texcoord[1] = vert_texcoord1; texcoord[2] = vert_texcoord2; + texcoord0_w = vert_texcoord0_w; normquat = vert_normquat; view = vert_view; gl_Position = vec4(vert_position.x, vert_position.y, -vert_position.z, vert_position.w); diff --git a/src/video_core/renderer_opengl/gl_shader_gen.h b/src/video_core/renderer_opengl/gl_shader_gen.h index 3eb07d57a..bef3249cf 100644 --- a/src/video_core/renderer_opengl/gl_shader_gen.h +++ b/src/video_core/renderer_opengl/gl_shader_gen.h @@ -6,7 +6,7 @@ #include -struct PicaShaderConfig; +union PicaShaderConfig; namespace GLShader { diff --git a/src/video_core/renderer_opengl/gl_shader_util.h b/src/video_core/renderer_opengl/gl_shader_util.h index 097242f6f..f59912f79 100644 --- a/src/video_core/renderer_opengl/gl_shader_util.h +++ b/src/video_core/renderer_opengl/gl_shader_util.h @@ -14,6 +14,7 @@ enum Attributes { ATTRIBUTE_TEXCOORD0, ATTRIBUTE_TEXCOORD1, ATTRIBUTE_TEXCOORD2, + ATTRIBUTE_TEXCOORD0_W, ATTRIBUTE_NORMQUAT, ATTRIBUTE_VIEW, }; diff --git a/src/video_core/renderer_opengl/renderer_opengl.cpp b/src/video_core/renderer_opengl/renderer_opengl.cpp index 0e9a0be8b..8f424a435 100644 --- a/src/video_core/renderer_opengl/renderer_opengl.cpp +++ b/src/video_core/renderer_opengl/renderer_opengl.cpp @@ -192,7 +192,7 @@ void RendererOpenGL::LoadFBToScreenInfo(const GPU::Regs::FramebufferConfig& fram // only allows rows to have a memory alignement of 4. ASSERT(pixel_stride % 4 == 0); - if (!Rasterizer()->AccelerateDisplay(framebuffer, framebuffer_addr, pixel_stride, screen_info)) { + if (!Rasterizer()->AccelerateDisplay(framebuffer, framebuffer_addr, static_cast(pixel_stride), screen_info)) { // Reset the screen info's display texture to its own permanent texture screen_info.display_texture = screen_info.texture.resource.handle; screen_info.display_texcoords = MathUtil::Rectangle(0.f, 0.f, 1.f, 1.f); @@ -473,12 +473,6 @@ static void DebugHandler(GLenum source, GLenum type, GLuint id, GLenum severity, bool RendererOpenGL::Init() { render_window->MakeCurrent(); - // TODO: Make frontends initialize this, so they can use gladLoadGLLoader with their own loaders - if (!gladLoadGL()) { - LOG_CRITICAL(Render_OpenGL, "Failed to initialize GL functions! Exiting..."); - exit(-1); - } - if (GLAD_GL_KHR_debug) { glEnable(GL_DEBUG_OUTPUT); glDebugMessageCallback(DebugHandler, nullptr); diff --git a/src/video_core/shader/shader.cpp b/src/video_core/shader/shader.cpp index 65dcc9156..449fc703f 100644 --- a/src/video_core/shader/shader.cpp +++ b/src/video_core/shader/shader.cpp @@ -35,7 +35,13 @@ static std::unordered_map> shader_map; static const JitShader* jit_shader; #endif // ARCHITECTURE_x86_64 -void Setup() { +void ClearCache() { +#ifdef ARCHITECTURE_x86_64 + shader_map.clear(); +#endif // ARCHITECTURE_x86_64 +} + +void ShaderSetup::Setup() { #ifdef ARCHITECTURE_x86_64 if (VideoCore::g_shader_jit_enabled) { u64 cache_key = (Common::ComputeHash64(&g_state.vs.program_code, sizeof(g_state.vs.program_code)) ^ @@ -54,18 +60,12 @@ void Setup() { #endif // ARCHITECTURE_x86_64 } -void Shutdown() { -#ifdef ARCHITECTURE_x86_64 - shader_map.clear(); -#endif // ARCHITECTURE_x86_64 -} +MICROPROFILE_DEFINE(GPU_Shader, "GPU", "Shader", MP_RGB(50, 50, 240)); -MICROPROFILE_DEFINE(GPU_VertexShader, "GPU", "Vertex Shader", MP_RGB(50, 50, 240)); - -OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes) { +OutputVertex ShaderSetup::Run(UnitState& state, const InputVertex& input, int num_attributes) { auto& config = g_state.regs.vs; - MICROPROFILE_SCOPE(GPU_VertexShader); + MICROPROFILE_SCOPE(GPU_Shader); state.program_counter = config.main_offset; state.debug.max_offset = 0; @@ -140,7 +140,7 @@ OutputVertex Run(UnitState& state, const InputVertex& input, int num_attr return ret; } -DebugData ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) { +DebugData ShaderSetup::ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup) { UnitState state; state.program_counter = config.main_offset; diff --git a/src/video_core/shader/shader.h b/src/video_core/shader/shader.h index 56b83bfeb..7f417675a 100644 --- a/src/video_core/shader/shader.h +++ b/src/video_core/shader/shader.h @@ -43,7 +43,8 @@ struct OutputVertex { Math::Vec4 color; Math::Vec2 tc0; Math::Vec2 tc1; - INSERT_PADDING_WORDS(2); + float24 tc0_w; + INSERT_PADDING_WORDS(1); Math::Vec3 view; INSERT_PADDING_WORDS(1); Math::Vec2 tc2; @@ -83,23 +84,6 @@ struct OutputVertex { static_assert(std::is_pod::value, "Structure is not POD"); static_assert(sizeof(OutputVertex) == 32 * sizeof(float), "OutputVertex has invalid size"); -/// Vertex shader memory -struct ShaderSetup { - struct { - // The float uniforms are accessed by the shader JIT using SSE instructions, and are - // therefore required to be 16-byte aligned. - alignas(16) Math::Vec4 f[96]; - - std::array b; - std::array, 4> i; - } uniforms; - - Math::Vec4 default_attributes[16]; - - std::array program_code; - std::array swizzle_data; -}; - // Helper structure used to keep track of data useful for inspection of shader emulation template struct DebugData; @@ -342,33 +326,51 @@ struct UnitState { } }; -/** - * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per - * vertex, which would happen within the `Run` function). - */ -void Setup(); +/// Clears the shader cache +void ClearCache(); -/// Performs any cleanup when the emulator is shutdown -void Shutdown(); +struct ShaderSetup { -/** - * Runs the currently setup shader - * @param state Shader unit state, must be setup per shader and per shader unit - * @param input Input vertex into the shader - * @param num_attributes The number of vertex shader attributes - * @return The output vertex, after having been processed by the vertex shader - */ -OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes); + struct { + // The float uniforms are accessed by the shader JIT using SSE instructions, and are + // therefore required to be 16-byte aligned. + alignas(16) Math::Vec4 f[96]; -/** - * Produce debug information based on the given shader and input vertex - * @param input Input vertex into the shader - * @param num_attributes The number of vertex shader attributes - * @param config Configuration object for the shader pipeline - * @param setup Setup object for the shader pipeline - * @return Debug information for this shader with regards to the given vertex - */ -DebugData ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup); + std::array b; + std::array, 4> i; + } uniforms; + + Math::Vec4 default_attributes[16]; + + std::array program_code; + std::array swizzle_data; + + /** + * Performs any shader unit setup that only needs to happen once per shader (as opposed to once per + * vertex, which would happen within the `Run` function). + */ + void Setup(); + + /** + * Runs the currently setup shader + * @param state Shader unit state, must be setup per shader and per shader unit + * @param input Input vertex into the shader + * @param num_attributes The number of vertex shader attributes + * @return The output vertex, after having been processed by the vertex shader + */ + OutputVertex Run(UnitState& state, const InputVertex& input, int num_attributes); + + /** + * Produce debug information based on the given shader and input vertex + * @param input Input vertex into the shader + * @param num_attributes The number of vertex shader attributes + * @param config Configuration object for the shader pipeline + * @param setup Setup object for the shader pipeline + * @return Debug information for this shader with regards to the given vertex + */ + DebugData ProduceDebugInfo(const InputVertex& input, int num_attributes, const Regs::ShaderConfig& config, const ShaderSetup& setup); + +}; } // namespace Shader