From d7459354f58d1b71fc0c5ec48de9242e6a2fd00c Mon Sep 17 00:00:00 2001 From: Subv Date: Mon, 25 Sep 2017 13:06:42 -0500 Subject: [PATCH 1/8] Audio: Use std::deque instead of std::vector for the audio buffer type (StereoBuffer16). The current code inserts and deletes elements from the beginning of the audio buffer, which is very inefficient in an std::vector. Profiling was done using VisualStudio2017's Performance Analyzer in Super Mario 3D Land. Before this change: AudioInterp::Linear had 14.14% of the runtime (inclusive) and most of that time was spent in std::vector's insert implementation. After this change: AudioInterp::Linear has 0.36% of the runtime (inclusive) --- src/audio_core/codec.cpp | 4 +++- src/audio_core/codec.h | 4 ++-- src/audio_core/hle/source.h | 2 +- src/audio_core/interpolate.cpp | 2 +- src/audio_core/interpolate.h | 4 ++-- 5 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/audio_core/codec.cpp b/src/audio_core/codec.cpp index 7a3bd7eb3..6fba9fdae 100644 --- a/src/audio_core/codec.cpp +++ b/src/audio_core/codec.cpp @@ -117,7 +117,9 @@ StereoBuffer16 DecodePCM16(const unsigned num_channels, const u8* const data, ret[i].fill(sample); } } else { - std::memcpy(ret.data(), data, sample_count * 2 * sizeof(u16)); + for (size_t i = 0; i < sample_count; ++i) { + std::memcpy(&ret[i], data + i * sizeof(s16) * 2, 2 * sizeof(s16)); + } } return ret; diff --git a/src/audio_core/codec.h b/src/audio_core/codec.h index 2b0c395e6..877b2202d 100644 --- a/src/audio_core/codec.h +++ b/src/audio_core/codec.h @@ -5,13 +5,13 @@ #pragma once #include -#include +#include #include "common/common_types.h" namespace Codec { /// A variable length buffer of signed PCM16 stereo samples. -using StereoBuffer16 = std::vector>; +using StereoBuffer16 = std::deque>; /// See: Codec::DecodeADPCM struct ADPCMState { diff --git a/src/audio_core/hle/source.h b/src/audio_core/hle/source.h index ccb7f064f..c4d2debc2 100644 --- a/src/audio_core/hle/source.h +++ b/src/audio_core/hle/source.h @@ -108,7 +108,7 @@ private: u32 current_sample_number = 0; u32 next_sample_number = 0; - std::vector> current_buffer; + AudioInterp::StereoBuffer16 current_buffer; // buffer_id state diff --git a/src/audio_core/interpolate.cpp b/src/audio_core/interpolate.cpp index 16e68bc5c..83573d772 100644 --- a/src/audio_core/interpolate.cpp +++ b/src/audio_core/interpolate.cpp @@ -47,7 +47,7 @@ static void StepOverSamples(State& state, StereoBuffer16& input, float rate, state.xn1 = input[inputi + 1]; state.fposition = fposition - inputi * scale_factor; - input.erase(input.begin(), input.begin() + inputi + 2); + input.erase(input.begin(), std::next(input.begin(), inputi + 2)); } void None(State& state, StereoBuffer16& input, float rate, DSP::HLE::StereoFrame16& output, diff --git a/src/audio_core/interpolate.h b/src/audio_core/interpolate.h index 59f59bc14..8dff6111a 100644 --- a/src/audio_core/interpolate.h +++ b/src/audio_core/interpolate.h @@ -5,14 +5,14 @@ #pragma once #include -#include +#include #include "audio_core/hle/common.h" #include "common/common_types.h" namespace AudioInterp { /// A variable length buffer of signed PCM16 stereo samples. -using StereoBuffer16 = std::vector>; +using StereoBuffer16 = std::deque>; struct State { /// Two historical samples. From 41f6c9f87f3cd231954cd401be39653c4f78740a Mon Sep 17 00:00:00 2001 From: Subv Date: Sun, 24 Sep 2017 20:52:46 -0500 Subject: [PATCH 2/8] Memory/RasterizerCache: Ignore unmapped memory regions when caching physical regions. Not all physical regions need to be mapped into the address space of every process, for example, system modules do not have a VRAM mapping. This fixes a crash when loading applets and system modules. --- src/core/memory.cpp | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/src/core/memory.cpp b/src/core/memory.cpp index 68a6b1ac2..2f5cdcefe 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -316,8 +316,15 @@ void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) { for (unsigned i = 0; i < num_pages; ++i, paddr += PAGE_SIZE) { boost::optional maybe_vaddr = PhysicalToVirtualAddress(paddr); - if (!maybe_vaddr) + // While the physical <-> virtual mapping is 1:1 for the regions supported by the cache, + // some games (like Pokemon Super Mystery Dungeon) will try to use textures that go beyond + // the end address of VRAM, causing the Virtual->Physical translation to fail when flushing + // parts of the texture. + if (!maybe_vaddr) { + LOG_ERROR(HW_Memory, + "Trying to flush a cached region to an invalid physical address %08X", paddr); continue; + } VAddr vaddr = *maybe_vaddr; u8& res_count = current_page_table->cached_res_count[vaddr >> PAGE_BITS]; @@ -329,6 +336,10 @@ void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) { if (res_count == 0) { PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS]; switch (page_type) { + case PageType::Unmapped: + // It is not necessary for a process to have this region mapped into its address + // space, for example, a system module need not have a VRAM mapping. + break; case PageType::Memory: page_type = PageType::RasterizerCachedMemory; current_page_table->pointers[vaddr >> PAGE_BITS] = nullptr; @@ -347,6 +358,10 @@ void RasterizerMarkRegionCached(PAddr start, u32 size, int count_delta) { if (res_count == 0) { PageType& page_type = current_page_table->attributes[vaddr >> PAGE_BITS]; switch (page_type) { + case PageType::Unmapped: + // It is not necessary for a process to have this region mapped into its address + // space, for example, a system module need not have a VRAM mapping. + break; case PageType::RasterizerCachedMemory: { u8* pointer = GetPointerFromVMA(vaddr & ~PAGE_MASK); if (pointer == nullptr) { From 35da7f57efd5153be37a05ffcbb57412da74265a Mon Sep 17 00:00:00 2001 From: Subv Date: Tue, 26 Sep 2017 17:27:44 -0500 Subject: [PATCH 3/8] Memory: Allow IsValidVirtualAddress to be called with a specific process parameter. There is still an overload of IsValidVirtualAddress that only takes the VAddr and will default to the current process. --- src/core/memory.cpp | 25 ++++++++++++++++++------- src/core/memory.h | 7 +++++++ 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/src/core/memory.cpp b/src/core/memory.cpp index a6b5f6c99..c42f4326b 100644 --- a/src/core/memory.cpp +++ b/src/core/memory.cpp @@ -110,8 +110,8 @@ static u8* GetPointerFromVMA(VAddr vaddr) { /** * This function should only be called for virtual addreses with attribute `PageType::Special`. */ -static MMIORegionPointer GetMMIOHandler(VAddr vaddr) { - for (const auto& region : current_page_table->special_regions) { +static MMIORegionPointer GetMMIOHandler(const PageTable& page_table, VAddr vaddr) { + for (const auto& region : page_table.special_regions) { if (vaddr >= region.base && vaddr < (region.base + region.size)) { return region.handler; } @@ -120,6 +120,11 @@ static MMIORegionPointer GetMMIOHandler(VAddr vaddr) { return nullptr; // Should never happen } +static MMIORegionPointer GetMMIOHandler(VAddr vaddr) { + const PageTable& page_table = Kernel::g_current_process->vm_manager.page_table; + return GetMMIOHandler(page_table, vaddr); +} + template T ReadMMIO(MMIORegionPointer mmio_handler, VAddr addr); @@ -204,18 +209,20 @@ void Write(const VAddr vaddr, const T data) { } } -bool IsValidVirtualAddress(const VAddr vaddr) { - const u8* page_pointer = current_page_table->pointers[vaddr >> PAGE_BITS]; +bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr) { + auto& page_table = process.vm_manager.page_table; + + const u8* page_pointer = page_table.pointers[vaddr >> PAGE_BITS]; if (page_pointer) return true; - if (current_page_table->attributes[vaddr >> PAGE_BITS] == PageType::RasterizerCachedMemory) + if (page_table.attributes[vaddr >> PAGE_BITS] == PageType::RasterizerCachedMemory) return true; - if (current_page_table->attributes[vaddr >> PAGE_BITS] != PageType::Special) + if (page_table.attributes[vaddr >> PAGE_BITS] != PageType::Special) return false; - MMIORegionPointer mmio_region = GetMMIOHandler(vaddr); + MMIORegionPointer mmio_region = GetMMIOHandler(page_table, vaddr); if (mmio_region) { return mmio_region->IsValidAddress(vaddr); } @@ -223,6 +230,10 @@ bool IsValidVirtualAddress(const VAddr vaddr) { return false; } +bool IsValidVirtualAddress(const VAddr vaddr) { + return IsValidVirtualAddress(*Kernel::g_current_process, vaddr); +} + bool IsValidPhysicalAddress(const PAddr paddr) { return GetPhysicalPointer(paddr) != nullptr; } diff --git a/src/core/memory.h b/src/core/memory.h index 1865bfea0..347c08c78 100644 --- a/src/core/memory.h +++ b/src/core/memory.h @@ -12,6 +12,10 @@ #include "common/common_types.h" #include "core/mmio.h" +namespace Kernel { +class Process; +} + namespace Memory { /** @@ -185,7 +189,10 @@ enum : VAddr { void SetCurrentPageTable(PageTable* page_table); PageTable* GetCurrentPageTable(); +/// Determines if the given VAddr is valid for the specified process. +bool IsValidVirtualAddress(const Kernel::Process& process, const VAddr vaddr); bool IsValidVirtualAddress(const VAddr addr); + bool IsValidPhysicalAddress(const PAddr addr); u8 Read8(VAddr addr); From c102e3ae282ae849667ae91f5f0213a80adf474f Mon Sep 17 00:00:00 2001 From: Subv Date: Tue, 26 Sep 2017 17:29:06 -0500 Subject: [PATCH 4/8] Tests: Fixed ARM VFP tests --- src/tests/core/arm/arm_test_common.cpp | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/src/tests/core/arm/arm_test_common.cpp b/src/tests/core/arm/arm_test_common.cpp index cfe0d503a..484713a92 100644 --- a/src/tests/core/arm/arm_test_common.cpp +++ b/src/tests/core/arm/arm_test_common.cpp @@ -3,30 +3,34 @@ // Refer to the license.txt file included. #include "core/core.h" +#include "core/hle/kernel/process.h" #include "core/memory.h" #include "core/memory_setup.h" #include "tests/core/arm/arm_test_common.h" namespace ArmTests { -static Memory::PageTable page_table; +static Memory::PageTable* page_table = nullptr; TestEnvironment::TestEnvironment(bool mutable_memory_) : mutable_memory(mutable_memory_), test_memory(std::make_shared(this)) { - page_table.pointers.fill(nullptr); - page_table.attributes.fill(Memory::PageType::Unmapped); - page_table.cached_res_count.fill(0); + Kernel::g_current_process = Kernel::Process::Create(Kernel::CodeSet::Create("", 0)); + page_table = &Kernel::g_current_process->vm_manager.page_table; - Memory::MapIoRegion(page_table, 0x00000000, 0x80000000, test_memory); - Memory::MapIoRegion(page_table, 0x80000000, 0x80000000, test_memory); + page_table->pointers.fill(nullptr); + page_table->attributes.fill(Memory::PageType::Unmapped); + page_table->cached_res_count.fill(0); - Memory::SetCurrentPageTable(&page_table); + Memory::MapIoRegion(*page_table, 0x00000000, 0x80000000, test_memory); + Memory::MapIoRegion(*page_table, 0x80000000, 0x80000000, test_memory); + + Memory::SetCurrentPageTable(page_table); } TestEnvironment::~TestEnvironment() { - Memory::UnmapRegion(page_table, 0x80000000, 0x80000000); - Memory::UnmapRegion(page_table, 0x00000000, 0x80000000); + Memory::UnmapRegion(*page_table, 0x80000000, 0x80000000); + Memory::UnmapRegion(*page_table, 0x00000000, 0x80000000); } void TestEnvironment::SetMemory64(VAddr vaddr, u64 value) { From a8d2f5787f998e08eccaa66c2fe0cdab997c2253 Mon Sep 17 00:00:00 2001 From: Subv Date: Tue, 26 Sep 2017 17:31:50 -0500 Subject: [PATCH 5/8] Tests: Added Memory::IsValidVirtualAddress tests. --- src/tests/CMakeLists.txt | 1 + src/tests/core/memory/memory.cpp | 56 ++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 src/tests/core/memory/memory.cpp diff --git a/src/tests/CMakeLists.txt b/src/tests/CMakeLists.txt index 5e9c4c2bf..1aac0daa2 100644 --- a/src/tests/CMakeLists.txt +++ b/src/tests/CMakeLists.txt @@ -4,6 +4,7 @@ set(SRCS core/arm/dyncom/arm_dyncom_vfp_tests.cpp core/file_sys/path_parser.cpp core/hle/kernel/hle_ipc.cpp + core/memory/memory.cpp glad.cpp tests.cpp ) diff --git a/src/tests/core/memory/memory.cpp b/src/tests/core/memory/memory.cpp new file mode 100644 index 000000000..a01b896f7 --- /dev/null +++ b/src/tests/core/memory/memory.cpp @@ -0,0 +1,56 @@ +// Copyright 2017 Citra Emulator Project +// Licensed under GPLv2 or any later version +// Refer to the license.txt file included. + +#include +#include "core/hle/kernel/memory.h" +#include "core/hle/kernel/process.h" +#include "core/memory.h" + +TEST_CASE("Memory::IsValidVirtualAddress", "[core][memory]") { + SECTION("these regions should not be mapped on an empty process") { + auto process = Kernel::Process::Create(Kernel::CodeSet::Create("", 0)); + CHECK(Memory::IsValidVirtualAddress(*process, Memory::PROCESS_IMAGE_VADDR) == false); + CHECK(Memory::IsValidVirtualAddress(*process, Memory::HEAP_VADDR) == false); + CHECK(Memory::IsValidVirtualAddress(*process, Memory::LINEAR_HEAP_VADDR) == false); + CHECK(Memory::IsValidVirtualAddress(*process, Memory::VRAM_VADDR) == false); + CHECK(Memory::IsValidVirtualAddress(*process, Memory::CONFIG_MEMORY_VADDR) == false); + CHECK(Memory::IsValidVirtualAddress(*process, Memory::SHARED_PAGE_VADDR) == false); + CHECK(Memory::IsValidVirtualAddress(*process, Memory::TLS_AREA_VADDR) == false); + } + + SECTION("CONFIG_MEMORY_VADDR and SHARED_PAGE_VADDR should be valid after mapping them") { + auto process = Kernel::Process::Create(Kernel::CodeSet::Create("", 0)); + Kernel::MapSharedPages(process->vm_manager); + CHECK(Memory::IsValidVirtualAddress(*process, Memory::CONFIG_MEMORY_VADDR) == true); + CHECK(Memory::IsValidVirtualAddress(*process, Memory::SHARED_PAGE_VADDR) == true); + } + + SECTION("special regions should be valid after mapping them") { + auto process = Kernel::Process::Create(Kernel::CodeSet::Create("", 0)); + SECTION("VRAM") { + Kernel::HandleSpecialMapping(process->vm_manager, + {Memory::VRAM_VADDR, Memory::VRAM_SIZE, false, false}); + CHECK(Memory::IsValidVirtualAddress(*process, Memory::VRAM_VADDR) == true); + } + + SECTION("IO (Not yet implemented)") { + Kernel::HandleSpecialMapping( + process->vm_manager, {Memory::IO_AREA_VADDR, Memory::IO_AREA_SIZE, false, false}); + CHECK_FALSE(Memory::IsValidVirtualAddress(*process, Memory::IO_AREA_VADDR) == true); + } + + SECTION("DSP") { + Kernel::HandleSpecialMapping( + process->vm_manager, {Memory::DSP_RAM_VADDR, Memory::DSP_RAM_SIZE, false, false}); + CHECK(Memory::IsValidVirtualAddress(*process, Memory::DSP_RAM_VADDR) == true); + } + } + + SECTION("Unmapping a VAddr should make it invalid") { + auto process = Kernel::Process::Create(Kernel::CodeSet::Create("", 0)); + Kernel::MapSharedPages(process->vm_manager); + process->vm_manager.UnmapRange(Memory::CONFIG_MEMORY_VADDR, Memory::CONFIG_MEMORY_SIZE); + CHECK(Memory::IsValidVirtualAddress(*process, Memory::CONFIG_MEMORY_VADDR) == false); + } +} From 3165466b665185ecbc3e33b02b0b90e25e7248ba Mon Sep 17 00:00:00 2001 From: Subv Date: Tue, 26 Sep 2017 17:40:49 -0500 Subject: [PATCH 6/8] Kernel/Thread: Allow specifying which process a thread belongs to when creating it. Don't automatically assume that Thread::Create will only be called when the parent process is currently scheduled. This assumption will be broken when applets or system modules are loaded. --- src/core/hle/kernel/process.cpp | 2 +- src/core/hle/kernel/thread.cpp | 17 +++++++++-------- src/core/hle/kernel/thread.h | 15 +++++++++------ src/core/hle/svc.cpp | 5 +++-- 4 files changed, 22 insertions(+), 17 deletions(-) diff --git a/src/core/hle/kernel/process.cpp b/src/core/hle/kernel/process.cpp index 522ad2333..cf3163e0f 100644 --- a/src/core/hle/kernel/process.cpp +++ b/src/core/hle/kernel/process.cpp @@ -147,7 +147,7 @@ void Process::Run(s32 main_thread_priority, u32 stack_size) { } vm_manager.LogLayout(Log::Level::Debug); - Kernel::SetupMainThread(codeset->entrypoint, main_thread_priority); + Kernel::SetupMainThread(codeset->entrypoint, main_thread_priority, this); } VAddr Process::GetLinearHeapAreaAddress() const { diff --git a/src/core/hle/kernel/thread.cpp b/src/core/hle/kernel/thread.cpp index 61378211f..1033f8552 100644 --- a/src/core/hle/kernel/thread.cpp +++ b/src/core/hle/kernel/thread.cpp @@ -361,7 +361,8 @@ static void ResetThreadContext(ARM_Interface::ThreadContext& context, u32 stack_ } ResultVal> Thread::Create(std::string name, VAddr entry_point, u32 priority, - u32 arg, s32 processor_id, VAddr stack_top) { + u32 arg, s32 processor_id, VAddr stack_top, + SharedPtr owner_process) { // Check if priority is in ranged. Lowest priority -> highest priority id. if (priority > THREADPRIO_LOWEST) { LOG_ERROR(Kernel_SVC, "Invalid thread priority: %d", priority); @@ -375,7 +376,7 @@ ResultVal> Thread::Create(std::string name, VAddr entry_point, // TODO(yuriks): Other checks, returning 0xD9001BEA - if (!Memory::IsValidVirtualAddress(entry_point)) { + if (!Memory::IsValidVirtualAddress(*owner_process, entry_point)) { LOG_ERROR(Kernel_SVC, "(name=%s): invalid entry %08x", name.c_str(), entry_point); // TODO: Verify error return ResultCode(ErrorDescription::InvalidAddress, ErrorModule::Kernel, @@ -399,10 +400,10 @@ ResultVal> Thread::Create(std::string name, VAddr entry_point, thread->wait_address = 0; thread->name = std::move(name); thread->callback_handle = wakeup_callback_handle_table.Create(thread).Unwrap(); - thread->owner_process = g_current_process; + thread->owner_process = owner_process; // Find the next available TLS index, and mark it as used - auto& tls_slots = Kernel::g_current_process->tls_slots; + auto& tls_slots = owner_process->tls_slots; bool needs_allocation = true; u32 available_page; // Which allocated page has free space u32 available_slot; // Which slot within the page is free @@ -426,13 +427,13 @@ ResultVal> Thread::Create(std::string name, VAddr entry_point, // Allocate some memory from the end of the linear heap for this region. linheap_memory->insert(linheap_memory->end(), Memory::PAGE_SIZE, 0); memory_region->used += Memory::PAGE_SIZE; - Kernel::g_current_process->linear_heap_used += Memory::PAGE_SIZE; + owner_process->linear_heap_used += Memory::PAGE_SIZE; tls_slots.emplace_back(0); // The page is completely available at the start available_page = tls_slots.size() - 1; available_slot = 0; // Use the first slot in the new page - auto& vm_manager = Kernel::g_current_process->vm_manager; + auto& vm_manager = owner_process->vm_manager; vm_manager.RefreshMemoryBlockMappings(linheap_memory.get()); // Map the page to the current process' address space. @@ -486,10 +487,10 @@ void Thread::BoostPriority(s32 priority) { current_priority = priority; } -SharedPtr SetupMainThread(u32 entry_point, s32 priority) { +SharedPtr SetupMainThread(u32 entry_point, s32 priority, SharedPtr owner_process) { // Initialize new "main" thread auto thread_res = Thread::Create("main", entry_point, priority, 0, THREADPROCESSORID_0, - Memory::HEAP_VADDR_END); + Memory::HEAP_VADDR_END, owner_process); SharedPtr thread = std::move(thread_res).Unwrap(); diff --git a/src/core/hle/kernel/thread.h b/src/core/hle/kernel/thread.h index 6a3566f15..ddc0d15c5 100644 --- a/src/core/hle/kernel/thread.h +++ b/src/core/hle/kernel/thread.h @@ -56,10 +56,12 @@ public: * @param arg User data to pass to the thread * @param processor_id The ID(s) of the processors on which the thread is desired to be run * @param stack_top The address of the thread's stack top + * @param owner_process The parent process for the thread * @return A shared pointer to the newly created thread */ static ResultVal> Create(std::string name, VAddr entry_point, u32 priority, - u32 arg, s32 processor_id, VAddr stack_top); + u32 arg, s32 processor_id, VAddr stack_top, + SharedPtr owner_process); std::string GetName() const override { return name; @@ -116,9 +118,9 @@ public: void ResumeFromWait(); /** - * Schedules an event to wake up the specified thread after the specified delay - * @param nanoseconds The time this thread will be allowed to sleep for - */ + * Schedules an event to wake up the specified thread after the specified delay + * @param nanoseconds The time this thread will be allowed to sleep for + */ void WakeAfterDelay(s64 nanoseconds); /** @@ -214,9 +216,10 @@ private: * Sets up the primary application thread * @param entry_point The address at which the thread should start execution * @param priority The priority to give the main thread + * @param owner_process The parent process for the main thread * @return A shared pointer to the main thread */ -SharedPtr SetupMainThread(u32 entry_point, s32 priority); +SharedPtr SetupMainThread(u32 entry_point, s32 priority, SharedPtr owner_process); /** * Returns whether there are any threads that are ready to run. @@ -276,4 +279,4 @@ void ThreadingShutdown(); */ const std::vector>& GetThreadList(); -} // namespace +} // namespace Kernel diff --git a/src/core/hle/svc.cpp b/src/core/hle/svc.cpp index dfc36748c..05c6897bf 100644 --- a/src/core/hle/svc.cpp +++ b/src/core/hle/svc.cpp @@ -656,8 +656,9 @@ static ResultCode CreateThread(Kernel::Handle* out_handle, u32 priority, u32 ent "Newly created thread must run in the SysCore (Core1), unimplemented."); } - CASCADE_RESULT(SharedPtr thread, Kernel::Thread::Create(name, entry_point, priority, - arg, processor_id, stack_top)); + CASCADE_RESULT(SharedPtr thread, + Kernel::Thread::Create(name, entry_point, priority, arg, processor_id, stack_top, + Kernel::g_current_process)); thread->context.fpscr = FPSCR_DEFAULT_NAN | FPSCR_FLUSH_TO_ZERO | FPSCR_ROUND_TOZERO; // 0x03C00000 From 7f48aa8d2580da6b3b83a389e31804e493aba69f Mon Sep 17 00:00:00 2001 From: Subv Date: Tue, 26 Sep 2017 18:17:47 -0500 Subject: [PATCH 7/8] Loaders: Don't automatically set the current process every time we load an application. The loaders will now just create a Kernel::Process, construct it and return it to the caller, which is responsible for setting it as the current process and configuring the global page table. --- src/core/core.cpp | 6 ++++-- src/core/loader/3dsx.cpp | 15 +++++++-------- src/core/loader/3dsx.h | 2 +- src/core/loader/elf.cpp | 15 +++++++-------- src/core/loader/elf.h | 2 +- src/core/loader/loader.h | 13 ++++++++----- src/core/loader/ncch.cpp | 19 +++++++++---------- src/core/loader/ncch.h | 5 +++-- 8 files changed, 40 insertions(+), 37 deletions(-) diff --git a/src/core/core.cpp b/src/core/core.cpp index 59b8768e7..0c7a72987 100644 --- a/src/core/core.cpp +++ b/src/core/core.cpp @@ -13,6 +13,7 @@ #include "core/core_timing.h" #include "core/gdbstub/gdbstub.h" #include "core/hle/kernel/kernel.h" +#include "core/hle/kernel/process.h" #include "core/hle/kernel/thread.h" #include "core/hle/service/service.h" #include "core/hw/hw.h" @@ -100,7 +101,7 @@ System::ResultStatus System::Load(EmuWindow* emu_window, const std::string& file return init_result; } - const Loader::ResultStatus load_result{app_loader->Load()}; + const Loader::ResultStatus load_result{app_loader->Load(Kernel::g_current_process)}; if (Loader::ResultStatus::Success != load_result) { LOG_CRITICAL(Core, "Failed to load ROM (Error %i)!", load_result); System::Shutdown(); @@ -114,6 +115,7 @@ System::ResultStatus System::Load(EmuWindow* emu_window, const std::string& file return ResultStatus::ErrorLoader; } } + Memory::SetCurrentPageTable(&Kernel::g_current_process->vm_manager.page_table); status = ResultStatus::Success; return status; } @@ -196,4 +198,4 @@ void System::Shutdown() { LOG_DEBUG(Core, "Shutdown OK"); } -} // namespace +} // namespace Core diff --git a/src/core/loader/3dsx.cpp b/src/core/loader/3dsx.cpp index 5ad5c5287..918038f1e 100644 --- a/src/core/loader/3dsx.cpp +++ b/src/core/loader/3dsx.cpp @@ -91,8 +91,8 @@ static u32 TranslateAddr(u32 addr, const THREEloadinfo* loadinfo, u32* offsets) return loadinfo->seg_addrs[2] + addr - offsets[1]; } -using Kernel::SharedPtr; using Kernel::CodeSet; +using Kernel::SharedPtr; static THREEDSX_Error Load3DSXFile(FileUtil::IOFile& file, u32 base_addr, SharedPtr* out_codeset) { @@ -255,7 +255,7 @@ FileType AppLoader_THREEDSX::IdentifyType(FileUtil::IOFile& file) { return FileType::Error; } -ResultStatus AppLoader_THREEDSX::Load() { +ResultStatus AppLoader_THREEDSX::Load(Kernel::SharedPtr& process) { if (is_loaded) return ResultStatus::ErrorAlreadyLoaded; @@ -267,16 +267,15 @@ ResultStatus AppLoader_THREEDSX::Load() { return ResultStatus::Error; codeset->name = filename; - Kernel::g_current_process = Kernel::Process::Create(std::move(codeset)); - Kernel::g_current_process->svc_access_mask.set(); - Kernel::g_current_process->address_mappings = default_address_mappings; - Memory::SetCurrentPageTable(&Kernel::g_current_process->vm_manager.page_table); + process = Kernel::Process::Create(std::move(codeset)); + process->svc_access_mask.set(); + process->address_mappings = default_address_mappings; // Attach the default resource limit (APPLICATION) to the process - Kernel::g_current_process->resource_limit = + process->resource_limit = Kernel::ResourceLimit::GetForCategory(Kernel::ResourceLimitCategory::APPLICATION); - Kernel::g_current_process->Run(48, Kernel::DEFAULT_STACK_SIZE); + process->Run(48, Kernel::DEFAULT_STACK_SIZE); Service::FS::RegisterSelfNCCH(*this); diff --git a/src/core/loader/3dsx.h b/src/core/loader/3dsx.h index 3f376778a..1e59bbb9d 100644 --- a/src/core/loader/3dsx.h +++ b/src/core/loader/3dsx.h @@ -31,7 +31,7 @@ public: return IdentifyType(file); } - ResultStatus Load() override; + ResultStatus Load(Kernel::SharedPtr& process) override; ResultStatus ReadIcon(std::vector& buffer) override; diff --git a/src/core/loader/elf.cpp b/src/core/loader/elf.cpp index 2de1f4e81..e36e42120 100644 --- a/src/core/loader/elf.cpp +++ b/src/core/loader/elf.cpp @@ -13,8 +13,8 @@ #include "core/loader/elf.h" #include "core/memory.h" -using Kernel::SharedPtr; using Kernel::CodeSet; +using Kernel::SharedPtr; //////////////////////////////////////////////////////////////////////////////////////////////////// // ELF Header Constants @@ -375,7 +375,7 @@ FileType AppLoader_ELF::IdentifyType(FileUtil::IOFile& file) { return FileType::Error; } -ResultStatus AppLoader_ELF::Load() { +ResultStatus AppLoader_ELF::Load(Kernel::SharedPtr& process) { if (is_loaded) return ResultStatus::ErrorAlreadyLoaded; @@ -394,16 +394,15 @@ ResultStatus AppLoader_ELF::Load() { SharedPtr codeset = elf_reader.LoadInto(Memory::PROCESS_IMAGE_VADDR); codeset->name = filename; - Kernel::g_current_process = Kernel::Process::Create(std::move(codeset)); - Kernel::g_current_process->svc_access_mask.set(); - Kernel::g_current_process->address_mappings = default_address_mappings; - Memory::SetCurrentPageTable(&Kernel::g_current_process->vm_manager.page_table); + process = Kernel::Process::Create(std::move(codeset)); + process->svc_access_mask.set(); + process->address_mappings = default_address_mappings; // Attach the default resource limit (APPLICATION) to the process - Kernel::g_current_process->resource_limit = + process->resource_limit = Kernel::ResourceLimit::GetForCategory(Kernel::ResourceLimitCategory::APPLICATION); - Kernel::g_current_process->Run(48, Kernel::DEFAULT_STACK_SIZE); + process->Run(48, Kernel::DEFAULT_STACK_SIZE); is_loaded = true; return ResultStatus::Success; diff --git a/src/core/loader/elf.h b/src/core/loader/elf.h index 862aa90d8..113da5917 100644 --- a/src/core/loader/elf.h +++ b/src/core/loader/elf.h @@ -30,7 +30,7 @@ public: return IdentifyType(file); } - ResultStatus Load() override; + ResultStatus Load(Kernel::SharedPtr& process) override; private: std::string filename; diff --git a/src/core/loader/loader.h b/src/core/loader/loader.h index 3160fd2fd..82b2be6a3 100644 --- a/src/core/loader/loader.h +++ b/src/core/loader/loader.h @@ -13,10 +13,12 @@ #include #include "common/common_types.h" #include "common/file_util.h" +#include "core/hle/kernel/kernel.h" namespace Kernel { struct AddressMapping; -} +class Process; +} // namespace Kernel //////////////////////////////////////////////////////////////////////////////////////////////////// // Loader namespace @@ -92,10 +94,11 @@ public: virtual FileType GetFileType() = 0; /** - * Load the application - * @return ResultStatus result of function + * Load the application and return the created Process instance + * @param process The newly created process. + * @return The status result of the operation. */ - virtual ResultStatus Load() = 0; + virtual ResultStatus Load(Kernel::SharedPtr& process) = 0; /** * Loads the system mode that this application needs. @@ -206,4 +209,4 @@ extern const std::initializer_list default_address_mappi */ std::unique_ptr GetLoader(const std::string& filename); -} // namespace +} // namespace Loader diff --git a/src/core/loader/ncch.cpp b/src/core/loader/ncch.cpp index 5107135f9..66bc5823d 100644 --- a/src/core/loader/ncch.cpp +++ b/src/core/loader/ncch.cpp @@ -67,9 +67,9 @@ std::pair, ResultStatus> AppLoader_NCCH::LoadKernelSystemMo ResultStatus::Success); } -ResultStatus AppLoader_NCCH::LoadExec() { - using Kernel::SharedPtr; +ResultStatus AppLoader_NCCH::LoadExec(Kernel::SharedPtr& process) { using Kernel::CodeSet; + using Kernel::SharedPtr; if (!is_loaded) return ResultStatus::ErrorNotLoaded; @@ -107,16 +107,15 @@ ResultStatus AppLoader_NCCH::LoadExec() { codeset->entrypoint = codeset->code.addr; codeset->memory = std::make_shared>(std::move(code)); - Kernel::g_current_process = Kernel::Process::Create(std::move(codeset)); - Memory::SetCurrentPageTable(&Kernel::g_current_process->vm_manager.page_table); + process = Kernel::Process::Create(std::move(codeset)); // Attach a resource limit to the process based on the resource limit category - Kernel::g_current_process->resource_limit = + process->resource_limit = Kernel::ResourceLimit::GetForCategory(static_cast( overlay_ncch->exheader_header.arm11_system_local_caps.resource_limit_category)); // Set the default CPU core for this process - Kernel::g_current_process->ideal_processor = + process->ideal_processor = overlay_ncch->exheader_header.arm11_system_local_caps.ideal_processor; // Copy data while converting endianness @@ -124,11 +123,11 @@ ResultStatus AppLoader_NCCH::LoadExec() { kernel_caps; std::copy_n(overlay_ncch->exheader_header.arm11_kernel_caps.descriptors, kernel_caps.size(), begin(kernel_caps)); - Kernel::g_current_process->ParseKernelCaps(kernel_caps.data(), kernel_caps.size()); + process->ParseKernelCaps(kernel_caps.data(), kernel_caps.size()); s32 priority = overlay_ncch->exheader_header.arm11_system_local_caps.priority; u32 stack_size = overlay_ncch->exheader_header.codeset_info.stack_size; - Kernel::g_current_process->Run(priority, stack_size); + process->Run(priority, stack_size); return ResultStatus::Success; } return ResultStatus::Error; @@ -151,7 +150,7 @@ void AppLoader_NCCH::ParseRegionLockoutInfo() { } } -ResultStatus AppLoader_NCCH::Load() { +ResultStatus AppLoader_NCCH::Load(Kernel::SharedPtr& process) { u64_le ncch_program_id; if (is_loaded) @@ -183,7 +182,7 @@ ResultStatus AppLoader_NCCH::Load() { is_loaded = true; // Set state to loaded - result = LoadExec(); // Load the executable into memory for booting + result = LoadExec(process); // Load the executable into memory for booting if (ResultStatus::Success != result) return result; diff --git a/src/core/loader/ncch.h b/src/core/loader/ncch.h index 9b56465cb..09230ae33 100644 --- a/src/core/loader/ncch.h +++ b/src/core/loader/ncch.h @@ -33,7 +33,7 @@ public: return IdentifyType(file); } - ResultStatus Load() override; + ResultStatus Load(Kernel::SharedPtr& process) override; /** * Loads the Exheader and returns the system mode for this application. @@ -62,9 +62,10 @@ public: private: /** * Loads .code section into memory for booting + * @param process The newly created process * @return ResultStatus result of function */ - ResultStatus LoadExec(); + ResultStatus LoadExec(Kernel::SharedPtr& process); /// Reads the region lockout info in the SMDH and send it to CFG service void ParseRegionLockoutInfo(); From a321bce37834c1f3034bd87df14fc71c13e6b84a Mon Sep 17 00:00:00 2001 From: Subv Date: Tue, 29 Aug 2017 12:59:54 -0500 Subject: [PATCH 8/8] Disable unary operator- on Math::Vec2/Vec3/Vec4 for unsigned types. It is unlikely we will ever use this without first doing a Cast to a signed type. Fixes 9 "unary minus operator applied to unsigned type, result still unsigned" warnings on MSVC2017.3 --- src/common/vector_math.h | 12 ++++++++---- src/video_core/swrasterizer/clipper.cpp | 2 +- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/common/vector_math.h b/src/common/vector_math.h index 6e2a5ad60..2b05f66ee 100644 --- a/src/common/vector_math.h +++ b/src/common/vector_math.h @@ -31,6 +31,7 @@ #pragma once #include +#include namespace Math { @@ -90,7 +91,8 @@ public: y -= other.y; } - Vec2 operator-() const { + template + Vec2::value, U>> operator-() const { return MakeVec(-x, -y); } Vec2 operator*(const Vec2& other) const { @@ -247,7 +249,8 @@ public: z -= other.z; } - Vec3 operator-() const { + template + Vec3::value, U>> operator-() const { return MakeVec(-x, -y, -z); } Vec3 operator*(const Vec3& other) const { @@ -462,7 +465,8 @@ public: w -= other.w; } - Vec4 operator-() const { + template + Vec4::value, U>> operator-() const { return MakeVec(-x, -y, -z, -w); } Vec4 operator*(const Vec4& other) const { @@ -720,4 +724,4 @@ static inline Vec4 MakeVec(const T& x, const Vec3& yzw) { return MakeVec(x, yzw[0], yzw[1], yzw[2]); } -} // namespace +} // namespace Math diff --git a/src/video_core/swrasterizer/clipper.cpp b/src/video_core/swrasterizer/clipper.cpp index a52129eb7..c1ed48398 100644 --- a/src/video_core/swrasterizer/clipper.cpp +++ b/src/video_core/swrasterizer/clipper.cpp @@ -98,7 +98,7 @@ void ProcessTriangle(const OutputVertex& v0, const OutputVertex& v1, const Outpu auto FlipQuaternionIfOpposite = [](auto& a, const auto& b) { if (Math::Dot(a, b) < float24::Zero()) - a = -a; + a = a * float24::FromFloat32(-1.0f); }; // Flip the quaternions if they are opposite to prevent interpolating them over the wrong