core: Create a thread for each CPU core, keep in lock-step with a barrier.
This commit is contained in:
		| @@ -27,6 +27,13 @@ namespace Core { | ||||
|  | ||||
| System::~System() = default; | ||||
|  | ||||
| /// Runs a CPU core while the system is powered on | ||||
| static void RunCpuCore(std::shared_ptr<Cpu> cpu_state) { | ||||
|     while (Core::System().GetInstance().IsPoweredOn()) { | ||||
|         cpu_state->RunLoop(true); | ||||
|     } | ||||
| } | ||||
|  | ||||
| System::ResultStatus System::RunLoop(bool tight_loop) { | ||||
|     status = ResultStatus::Success; | ||||
|  | ||||
| @@ -109,7 +116,7 @@ System::ResultStatus System::Load(EmuWindow* emu_window, const std::string& file | ||||
| } | ||||
|  | ||||
| void System::PrepareReschedule() { | ||||
|     cpu_cores[0]->PrepareReschedule(); | ||||
|     CurrentCpuCore().PrepareReschedule(); | ||||
| } | ||||
|  | ||||
| PerfStats::Results System::GetAndResetPerfStats() { | ||||
| @@ -123,14 +130,13 @@ System::ResultStatus System::Init(EmuWindow* emu_window, u32 system_mode) { | ||||
|  | ||||
|     current_process = Kernel::Process::Create("main"); | ||||
|  | ||||
|     for (auto& cpu_core : cpu_cores) { | ||||
|         cpu_core = std::make_unique<Cpu>(); | ||||
|     cpu_barrier = std::make_shared<CpuBarrier>(); | ||||
|     for (size_t index = 0; index < cpu_cores.size(); ++index) { | ||||
|         cpu_cores[index] = std::make_shared<Cpu>(cpu_barrier, index); | ||||
|     } | ||||
|  | ||||
|     gpu_core = std::make_unique<Tegra::GPU>(); | ||||
|  | ||||
|     telemetry_session = std::make_unique<Core::TelemetrySession>(); | ||||
|  | ||||
|     service_manager = std::make_shared<Service::SM::ServiceManager>(); | ||||
|  | ||||
|     HW::Init(); | ||||
| @@ -142,6 +148,14 @@ System::ResultStatus System::Init(EmuWindow* emu_window, u32 system_mode) { | ||||
|         return ResultStatus::ErrorVideoCore; | ||||
|     } | ||||
|  | ||||
|     // Create threads for CPU cores 1-3, and build thread_to_cpu map | ||||
|     // CPU core 0 is run on the main thread | ||||
|     thread_to_cpu[std::this_thread::get_id()] = cpu_cores[0]; | ||||
|     for (size_t index = 0; index < cpu_core_threads.size(); ++index) { | ||||
|         cpu_core_threads[index] = std::make_unique<std::thread>(RunCpuCore, cpu_cores[index + 1]); | ||||
|         thread_to_cpu[cpu_core_threads[index]->get_id()] = cpu_cores[index + 1]; | ||||
|     } | ||||
|  | ||||
|     NGLOG_DEBUG(Core, "Initialized OK"); | ||||
|  | ||||
|     // Reset counters and set time origin to current frame | ||||
| @@ -171,9 +185,15 @@ void System::Shutdown() { | ||||
|     telemetry_session.reset(); | ||||
|     gpu_core.reset(); | ||||
|  | ||||
|     // Close all CPU/threading state | ||||
|     thread_to_cpu.clear(); | ||||
|     for (auto& cpu_core : cpu_cores) { | ||||
|         cpu_core.reset(); | ||||
|     } | ||||
|     for (auto& thread : cpu_core_threads) { | ||||
|         thread->join(); | ||||
|         thread.reset(); | ||||
|     } | ||||
|  | ||||
|     CoreTiming::Shutdown(); | ||||
|  | ||||
|   | ||||
| @@ -7,6 +7,7 @@ | ||||
| #include <array> | ||||
| #include <memory> | ||||
| #include <string> | ||||
| #include <thread> | ||||
| #include "common/common_types.h" | ||||
| #include "core/core_cpu.h" | ||||
| #include "core/hle/kernel/kernel.h" | ||||
| @@ -112,7 +113,7 @@ public: | ||||
|      * @returns A reference to the emulated CPU. | ||||
|      */ | ||||
|     ARM_Interface& CPU() { | ||||
|         return cpu_cores[0]->CPU(); | ||||
|         return CurrentCpuCore().CPU(); | ||||
|     } | ||||
|  | ||||
|     Tegra::GPU& GPU() { | ||||
| @@ -120,7 +121,7 @@ public: | ||||
|     } | ||||
|  | ||||
|     Kernel::Scheduler& Scheduler() { | ||||
|         return cpu_cores[0]->Scheduler(); | ||||
|         return CurrentCpuCore().Scheduler(); | ||||
|     } | ||||
|  | ||||
|     Kernel::SharedPtr<Kernel::Process>& CurrentProcess() { | ||||
| @@ -157,6 +158,14 @@ public: | ||||
|     } | ||||
|  | ||||
| private: | ||||
|     /// Returns the current CPU core based on the calling host thread | ||||
|     Cpu& CurrentCpuCore() { | ||||
|         const auto& search = thread_to_cpu.find(std::this_thread::get_id()); | ||||
|         ASSERT(search != thread_to_cpu.end()); | ||||
|         ASSERT(search->second); | ||||
|         return *search->second; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Initialize the emulated system. | ||||
|      * @param emu_window Pointer to the host-system window used for video output and keyboard input. | ||||
| @@ -167,14 +176,12 @@ private: | ||||
|  | ||||
|     /// AppLoader used to load the current executing application | ||||
|     std::unique_ptr<Loader::AppLoader> app_loader; | ||||
|  | ||||
|     std::array<std::unique_ptr<Cpu>, 4> cpu_cores; | ||||
|     std::unique_ptr<Tegra::GPU> gpu_core; | ||||
|     std::shared_ptr<Tegra::DebugContext> debug_context; | ||||
|     Kernel::SharedPtr<Kernel::Process> current_process; | ||||
|  | ||||
|     /// When true, signals that a reschedule should happen | ||||
|     bool reschedule_pending{}; | ||||
|     std::shared_ptr<CpuBarrier> cpu_barrier; | ||||
|     std::array<std::shared_ptr<Cpu>, NUM_CPU_CORES> cpu_cores; | ||||
|     std::array<std::unique_ptr<std::thread>, NUM_CPU_CORES - 1> cpu_core_threads; | ||||
|  | ||||
|     /// Service manager | ||||
|     std::shared_ptr<Service::SM::ServiceManager> service_manager; | ||||
| @@ -186,6 +193,9 @@ private: | ||||
|  | ||||
|     ResultStatus status = ResultStatus::Success; | ||||
|     std::string status_details = ""; | ||||
|  | ||||
|     /// Map of guest threads to CPU cores | ||||
|     std::map<std::thread::id, std::shared_ptr<Cpu>> thread_to_cpu; | ||||
| }; | ||||
|  | ||||
| inline ARM_Interface& CPU() { | ||||
|   | ||||
| @@ -2,6 +2,9 @@ | ||||
| // Licensed under GPLv2 or any later version | ||||
| // Refer to the license.txt file included. | ||||
|  | ||||
| #include <condition_variable> | ||||
| #include <mutex> | ||||
|  | ||||
| #include "common/logging/log.h" | ||||
| #ifdef ARCHITECTURE_x86_64 | ||||
| #include "core/arm/dynarmic/arm_dynarmic.h" | ||||
| @@ -16,7 +19,9 @@ | ||||
|  | ||||
| namespace Core { | ||||
|  | ||||
| Cpu::Cpu() { | ||||
| Cpu::Cpu(std::shared_ptr<CpuBarrier> cpu_barrier, size_t core_index) | ||||
|     : cpu_barrier{std::move(cpu_barrier)}, core_index{core_index} { | ||||
|  | ||||
|     if (Settings::values.use_cpu_jit) { | ||||
| #ifdef ARCHITECTURE_x86_64 | ||||
|         arm_interface = std::make_shared<ARM_Dynarmic>(); | ||||
| @@ -32,15 +37,25 @@ Cpu::Cpu() { | ||||
| } | ||||
|  | ||||
| void Cpu::RunLoop(bool tight_loop) { | ||||
|     // Wait for all other CPU cores to complete the previous slice, such that they run in lock-step | ||||
|     cpu_barrier->Rendezvous(); | ||||
|  | ||||
|     // If we don't have a currently active thread then don't execute instructions, | ||||
|     // instead advance to the next event and try to yield to the next thread | ||||
|     if (Kernel::GetCurrentThread() == nullptr) { | ||||
|         NGLOG_TRACE(Core, "Idling"); | ||||
|         CoreTiming::Idle(); | ||||
|         CoreTiming::Advance(); | ||||
|         NGLOG_TRACE(Core, "Core-{} idling", core_index); | ||||
|  | ||||
|         if (IsMainCore()) { | ||||
|             CoreTiming::Idle(); | ||||
|             CoreTiming::Advance(); | ||||
|         } | ||||
|  | ||||
|         PrepareReschedule(); | ||||
|     } else { | ||||
|         CoreTiming::Advance(); | ||||
|         if (IsMainCore()) { | ||||
|             CoreTiming::Advance(); | ||||
|         } | ||||
|  | ||||
|         if (tight_loop) { | ||||
|             arm_interface->Run(); | ||||
|         } else { | ||||
|   | ||||
| @@ -4,7 +4,9 @@ | ||||
|  | ||||
| #pragma once | ||||
|  | ||||
| #include <condition_variable> | ||||
| #include <memory> | ||||
| #include <mutex> | ||||
| #include <string> | ||||
| #include "common/common_types.h" | ||||
|  | ||||
| @@ -16,9 +18,32 @@ class Scheduler; | ||||
|  | ||||
| namespace Core { | ||||
|  | ||||
| constexpr unsigned NUM_CPU_CORES{4}; | ||||
|  | ||||
| class CpuBarrier { | ||||
| public: | ||||
|     void Rendezvous() { | ||||
|         std::unique_lock<std::mutex> lock(mutex); | ||||
|  | ||||
|         --cores_waiting; | ||||
|         if (!cores_waiting) { | ||||
|             cores_waiting = NUM_CPU_CORES; | ||||
|             condition.notify_all(); | ||||
|             return; | ||||
|         } | ||||
|  | ||||
|         condition.wait(lock); | ||||
|     } | ||||
|  | ||||
| private: | ||||
|     unsigned cores_waiting{NUM_CPU_CORES}; | ||||
|     std::mutex mutex; | ||||
|     std::condition_variable condition; | ||||
| }; | ||||
|  | ||||
| class Cpu { | ||||
| public: | ||||
|     Cpu(); | ||||
|     Cpu(std::shared_ptr<CpuBarrier> cpu_barrier, size_t core_index); | ||||
|  | ||||
|     void RunLoop(bool tight_loop = true); | ||||
|  | ||||
| @@ -34,13 +59,19 @@ public: | ||||
|         return *scheduler; | ||||
|     } | ||||
|  | ||||
|     bool IsMainCore() const { | ||||
|         return core_index == 0; | ||||
|     } | ||||
|  | ||||
| private: | ||||
|     void Reschedule(); | ||||
|  | ||||
|     std::shared_ptr<ARM_Interface> arm_interface; | ||||
|     std::shared_ptr<CpuBarrier> cpu_barrier; | ||||
|     std::unique_ptr<Kernel::Scheduler> scheduler; | ||||
|  | ||||
|     bool reschedule_pending{}; | ||||
|     size_t core_index; | ||||
| }; | ||||
|  | ||||
| } // namespace Core | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 bunnei
					bunnei