Merge pull request #31 from neobrain/gpu_framebuffer
GPU framebuffer emulation improvements
This commit is contained in:
		| @@ -28,22 +28,24 @@ QVariant GPUCommandStreamItemModel::data(const QModelIndex& index, int role) con | |||||||
|     const GSP_GPU::GXCommand& command = GetDebugger()->ReadGXCommandHistory(command_index); |     const GSP_GPU::GXCommand& command = GetDebugger()->ReadGXCommandHistory(command_index); | ||||||
|     if (role == Qt::DisplayRole) |     if (role == Qt::DisplayRole) | ||||||
|     { |     { | ||||||
|         std::map<GSP_GPU::GXCommandId, const char*> command_names; |         std::map<GSP_GPU::GXCommandId, const char*> command_names = { | ||||||
|         command_names[GSP_GPU::GXCommandId::REQUEST_DMA] = "REQUEST_DMA"; |             { GSP_GPU::GXCommandId::REQUEST_DMA, "REQUEST_DMA" }, | ||||||
|         command_names[GSP_GPU::GXCommandId::SET_COMMAND_LIST_FIRST] = "SET_COMMAND_LIST_FIRST"; |             { GSP_GPU::GXCommandId::SET_COMMAND_LIST_FIRST, "SET_COMMAND_LIST_FIRST" }, | ||||||
|         command_names[GSP_GPU::GXCommandId::SET_MEMORY_FILL] = "SET_MEMORY_FILL"; |             { GSP_GPU::GXCommandId::SET_MEMORY_FILL, "SET_MEMORY_FILL" }, | ||||||
|         command_names[GSP_GPU::GXCommandId::SET_DISPLAY_TRANSFER] = "SET_DISPLAY_TRANSFER"; |             { GSP_GPU::GXCommandId::SET_DISPLAY_TRANSFER, "SET_DISPLAY_TRANSFER" }, | ||||||
|         command_names[GSP_GPU::GXCommandId::SET_TEXTURE_COPY] = "SET_TEXTURE_COPY"; |             { GSP_GPU::GXCommandId::SET_TEXTURE_COPY, "SET_TEXTURE_COPY" }, | ||||||
|         command_names[GSP_GPU::GXCommandId::SET_COMMAND_LIST_LAST] = "SET_COMMAND_LIST_LAST"; |             { GSP_GPU::GXCommandId::SET_COMMAND_LIST_LAST, "SET_COMMAND_LIST_LAST" } | ||||||
|         QString str = QString("%1 %2 %3 %4 %5 %6 %7 %8 %9").arg(command_names[static_cast<GSP_GPU::GXCommandId>(command.id)]) |         }; | ||||||
|                         .arg(command.data[0], 8, 16, QLatin1Char('0')) |         const u32* command_data = reinterpret_cast<const u32*>(&command); | ||||||
|                         .arg(command.data[1], 8, 16, QLatin1Char('0')) |         QString str = QString("%1 %2 %3 %4 %5 %6 %7 %8 %9").arg(command_names[command.id]) | ||||||
|                         .arg(command.data[2], 8, 16, QLatin1Char('0')) |                         .arg(command_data[0], 8, 16, QLatin1Char('0')) | ||||||
|                         .arg(command.data[3], 8, 16, QLatin1Char('0')) |                         .arg(command_data[1], 8, 16, QLatin1Char('0')) | ||||||
|                         .arg(command.data[4], 8, 16, QLatin1Char('0')) |                         .arg(command_data[2], 8, 16, QLatin1Char('0')) | ||||||
|                         .arg(command.data[5], 8, 16, QLatin1Char('0')) |                         .arg(command_data[3], 8, 16, QLatin1Char('0')) | ||||||
|                         .arg(command.data[6], 8, 16, QLatin1Char('0')) |                         .arg(command_data[4], 8, 16, QLatin1Char('0')) | ||||||
|                         .arg(command.data[7], 8, 16, QLatin1Char('0')); |                         .arg(command_data[5], 8, 16, QLatin1Char('0')) | ||||||
|  |                         .arg(command_data[6], 8, 16, QLatin1Char('0')) | ||||||
|  |                         .arg(command_data[7], 8, 16, QLatin1Char('0')); | ||||||
|         return QVariant(str); |         return QVariant(str); | ||||||
|     } |     } | ||||||
|     else |     else | ||||||
|   | |||||||
| @@ -34,7 +34,7 @@ | |||||||
| /* | /* | ||||||
|  * Standardized way to define a group of registers and corresponding data structures. To define |  * Standardized way to define a group of registers and corresponding data structures. To define | ||||||
|  * a new register set, first define struct containing an enumeration called "Id" containing |  * a new register set, first define struct containing an enumeration called "Id" containing | ||||||
|  * all register IDs and a template union called "Struct". Specialize the Struct union for any |  * all register IDs and a template struct called "Struct". Specialize the Struct struct for any | ||||||
|  * register ID which needs to be accessed in a specialized way. You can then declare the object |  * register ID which needs to be accessed in a specialized way. You can then declare the object | ||||||
|  * containing all register values using the RegisterSet<BaseType, DefiningStruct> type, where |  * containing all register values using the RegisterSet<BaseType, DefiningStruct> type, where | ||||||
|  * BaseType is the underlying type of each register (e.g. u32). |  * BaseType is the underlying type of each register (e.g. u32). | ||||||
| @@ -54,7 +54,7 @@ | |||||||
|  * |  * | ||||||
|  *         // declare register definition structures |  *         // declare register definition structures | ||||||
|  *         template<Id id> |  *         template<Id id> | ||||||
|  *         union Struct; |  *         struct Struct; | ||||||
|  *     }; |  *     }; | ||||||
|  * |  * | ||||||
|  *     // Define register set object |  *     // Define register set object | ||||||
| @@ -62,10 +62,12 @@ | |||||||
|  * |  * | ||||||
|  *     // define register definition structures |  *     // define register definition structures | ||||||
|  *     template<> |  *     template<> | ||||||
|  *     union Regs::Struct<Regs::Value1> { |  *     struct Regs::Struct<Regs::Value1> { | ||||||
|  |  *         union { | ||||||
|  *             BitField<0, 4, u32> some_field; |  *             BitField<0, 4, u32> some_field; | ||||||
|  *             BitField<4, 3, u32> some_other_field; |  *             BitField<4, 3, u32> some_other_field; | ||||||
|  *         }; |  *         }; | ||||||
|  |  *     }; | ||||||
|  * |  * | ||||||
|  * Usage in external code (within SomeNamespace scope): |  * Usage in external code (within SomeNamespace scope): | ||||||
|  * |  * | ||||||
| @@ -77,7 +79,7 @@ | |||||||
|  * |  * | ||||||
|  * |  * | ||||||
|  * @tparam BaseType Base type used for storing individual registers, e.g. u32 |  * @tparam BaseType Base type used for storing individual registers, e.g. u32 | ||||||
|  * @tparam RegDefinition Class defining an enumeration called "Id" and a template<Id id> union, as described above. |  * @tparam RegDefinition Class defining an enumeration called "Id" and a template<Id id> struct, as described above. | ||||||
|  * @note RegDefinition::Id needs to have an enum value called NumIds defining the number of registers to be allocated. |  * @note RegDefinition::Id needs to have an enum value called NumIds defining the number of registers to be allocated. | ||||||
|  */ |  */ | ||||||
| template<typename BaseType, typename RegDefinition> | template<typename BaseType, typename RegDefinition> | ||||||
|   | |||||||
| @@ -16,6 +16,6 @@ | |||||||
| namespace ConfigMem { | namespace ConfigMem { | ||||||
|  |  | ||||||
| template <typename T> | template <typename T> | ||||||
| inline void Read(T &var, const u32 addr); | void Read(T &var, const u32 addr); | ||||||
|  |  | ||||||
| } // namespace | } // namespace | ||||||
|   | |||||||
| @@ -47,11 +47,6 @@ Handle g_shared_memory = 0; | |||||||
|  |  | ||||||
| u32 g_thread_id = 0; | u32 g_thread_id = 0; | ||||||
|  |  | ||||||
| enum { |  | ||||||
|     REG_FRAMEBUFFER_1   = 0x00400468, |  | ||||||
|     REG_FRAMEBUFFER_2   = 0x00400494, |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| /// Gets a pointer to the start (header) of a command buffer in GSP shared memory | /// Gets a pointer to the start (header) of a command buffer in GSP shared memory | ||||||
| static inline u8* GX_GetCmdBufferPointer(u32 thread_id, u32 offset=0) { | static inline u8* GX_GetCmdBufferPointer(u32 thread_id, u32 offset=0) { | ||||||
|     return Kernel::GetSharedMemoryPointer(g_shared_memory, 0x800 + (thread_id * 0x200) + offset); |     return Kernel::GetSharedMemoryPointer(g_shared_memory, 0x800 + (thread_id * 0x200) + offset); | ||||||
| @@ -67,38 +62,62 @@ void GX_FinishCommand(u32 thread_id) { | |||||||
|     // TODO: Increment header->index? |     // TODO: Increment header->index? | ||||||
| } | } | ||||||
|  |  | ||||||
| /// Read a GSP GPU hardware register | /// Write a GSP GPU hardware register | ||||||
| void ReadHWRegs(Service::Interface* self) { | void WriteHWRegs(Service::Interface* self) { | ||||||
|     static const u32 framebuffer_1[] = {GPU::PADDR_VRAM_TOP_LEFT_FRAME1, GPU::PADDR_VRAM_TOP_RIGHT_FRAME1}; |  | ||||||
|     static const u32 framebuffer_2[] = {GPU::PADDR_VRAM_TOP_LEFT_FRAME2, GPU::PADDR_VRAM_TOP_RIGHT_FRAME2}; |  | ||||||
|  |  | ||||||
|     u32* cmd_buff = Service::GetCommandBuffer(); |     u32* cmd_buff = Service::GetCommandBuffer(); | ||||||
|     u32 reg_addr = cmd_buff[1]; |     u32 reg_addr = cmd_buff[1]; | ||||||
|     u32 size = cmd_buff[2]; |     u32 size = cmd_buff[2]; | ||||||
|     u32* dst = (u32*)Memory::GetPointer(cmd_buff[0x41]); |  | ||||||
|  |  | ||||||
|     switch (reg_addr) { |     // TODO: Return proper error codes | ||||||
|  |     if (reg_addr + size >= 0x420000) { | ||||||
|     // NOTE: Calling SetFramebufferLocation here is a hack... Not sure the correct way yet to set  |         ERROR_LOG(GPU, "Write address out of range! (address=0x%08x, size=0x%08x)", reg_addr, size); | ||||||
|     // whether the framebuffers should be in VRAM or GSP heap, but from what I understand, if the  |         return; | ||||||
|     // user application is reading from either of these registers, then its going to be in VRAM. |  | ||||||
|  |  | ||||||
|     // Top framebuffer 1 addresses |  | ||||||
|     case REG_FRAMEBUFFER_1: |  | ||||||
|         GPU::SetFramebufferLocation(GPU::FRAMEBUFFER_LOCATION_VRAM); |  | ||||||
|         memcpy(dst, framebuffer_1, size); |  | ||||||
|         break; |  | ||||||
|  |  | ||||||
|     // Top framebuffer 2 addresses |  | ||||||
|     case REG_FRAMEBUFFER_2: |  | ||||||
|         GPU::SetFramebufferLocation(GPU::FRAMEBUFFER_LOCATION_VRAM); |  | ||||||
|         memcpy(dst, framebuffer_2, size); |  | ||||||
|         break; |  | ||||||
|  |  | ||||||
|     default: |  | ||||||
|         ERROR_LOG(GSP, "unknown register read at address %08X", reg_addr); |  | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     // size should be word-aligned | ||||||
|  |     if ((size % 4) != 0) { | ||||||
|  |         ERROR_LOG(GPU, "Invalid size 0x%08x", size); | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     u32* src = (u32*)Memory::GetPointer(cmd_buff[0x4]); | ||||||
|  |  | ||||||
|  |     while (size > 0) { | ||||||
|  |         GPU::Write<u32>(reg_addr + 0x1EB00000, *src); | ||||||
|  |  | ||||||
|  |         size -= 4; | ||||||
|  |         ++src; | ||||||
|  |         reg_addr += 4; | ||||||
|  |     } | ||||||
|  | } | ||||||
|  |  | ||||||
|  | /// Read a GSP GPU hardware register | ||||||
|  | void ReadHWRegs(Service::Interface* self) { | ||||||
|  |     u32* cmd_buff = Service::GetCommandBuffer(); | ||||||
|  |     u32 reg_addr = cmd_buff[1]; | ||||||
|  |     u32 size = cmd_buff[2]; | ||||||
|  |  | ||||||
|  |     // TODO: Return proper error codes | ||||||
|  |     if (reg_addr + size >= 0x420000) { | ||||||
|  |         ERROR_LOG(GPU, "Read address out of range! (address=0x%08x, size=0x%08x)", reg_addr, size); | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // size should be word-aligned | ||||||
|  |     if ((size % 4) != 0) { | ||||||
|  |         ERROR_LOG(GPU, "Invalid size 0x%08x", size); | ||||||
|  |         return; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     u32* dst = (u32*)Memory::GetPointer(cmd_buff[0x41]); | ||||||
|  |  | ||||||
|  |     while (size > 0) { | ||||||
|  |         GPU::Read<u32>(*dst, reg_addr + 0x1EB00000); | ||||||
|  |  | ||||||
|  |         size -= 4; | ||||||
|  |         ++dst; | ||||||
|  |         reg_addr += 4; | ||||||
|  |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| /** | /** | ||||||
| @@ -134,52 +153,92 @@ void RegisterInterruptRelayQueue(Service::Interface* self) { | |||||||
|  |  | ||||||
| /// This triggers handling of the GX command written to the command buffer in shared memory. | /// This triggers handling of the GX command written to the command buffer in shared memory. | ||||||
| void TriggerCmdReqQueue(Service::Interface* self) { | void TriggerCmdReqQueue(Service::Interface* self) { | ||||||
|     GX_CmdBufferHeader* header = (GX_CmdBufferHeader*)GX_GetCmdBufferPointer(g_thread_id); |  | ||||||
|     u32* cmd_buff = (u32*)GX_GetCmdBufferPointer(g_thread_id, 0x20 + (header->index * 0x20)); |  | ||||||
|  |  | ||||||
|     switch (static_cast<GXCommandId>(cmd_buff[0])) { |     // Utility function to convert register ID to address | ||||||
|  |     auto WriteGPURegister = [](u32 id, u32 data) { | ||||||
|  |         GPU::Write<u32>(0x1EF00000 + 4 * id, data); | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     GX_CmdBufferHeader* header = (GX_CmdBufferHeader*)GX_GetCmdBufferPointer(g_thread_id); | ||||||
|  |     auto& command = *(const GXCommand*)GX_GetCmdBufferPointer(g_thread_id, 0x20 + (header->index * 0x20)); | ||||||
|  |  | ||||||
|  |     switch (command.id) { | ||||||
|  |  | ||||||
|     // GX request DMA - typically used for copying memory from GSP heap to VRAM |     // GX request DMA - typically used for copying memory from GSP heap to VRAM | ||||||
|     case GXCommandId::REQUEST_DMA: |     case GXCommandId::REQUEST_DMA: | ||||||
|         memcpy(Memory::GetPointer(cmd_buff[2]), Memory::GetPointer(cmd_buff[1]), cmd_buff[3]); |         memcpy(Memory::GetPointer(command.dma_request.dest_address), | ||||||
|  |                Memory::GetPointer(command.dma_request.source_address), | ||||||
|  |                command.dma_request.size); | ||||||
|         break; |         break; | ||||||
|  |  | ||||||
|  |     // ctrulib homebrew sends all relevant command list data with this command, | ||||||
|  |     // hence we do all "interesting" stuff here and do nothing in SET_COMMAND_LIST_FIRST. | ||||||
|  |     // TODO: This will need some rework in the future. | ||||||
|     case GXCommandId::SET_COMMAND_LIST_LAST: |     case GXCommandId::SET_COMMAND_LIST_LAST: | ||||||
|         GPU::Write<u32>(GPU::Registers::CommandListAddress, cmd_buff[1] >> 3); |     { | ||||||
|         GPU::Write<u32>(GPU::Registers::CommandListSize, cmd_buff[2] >> 3); |         auto& params = command.set_command_list_last; | ||||||
|         GPU::Write<u32>(GPU::Registers::ProcessCommandList, 1); // TODO: Not sure if we are supposed to always write this |         WriteGPURegister(GPU::Regs::CommandProcessor + 2, params.address >> 3); | ||||||
|  |         WriteGPURegister(GPU::Regs::CommandProcessor, params.size >> 3); | ||||||
|  |         WriteGPURegister(GPU::Regs::CommandProcessor + 4, 1); // TODO: Not sure if we are supposed to always write this .. seems to trigger processing though | ||||||
|  |  | ||||||
|         // TODO: Move this to GPU |         // TODO: Move this to GPU | ||||||
|         // TODO: Not sure what units the size is measured in |         // TODO: Not sure what units the size is measured in | ||||||
|         g_debugger.CommandListCalled(cmd_buff[1], (u32*)Memory::GetPointer(cmd_buff[1]), cmd_buff[2]); |         g_debugger.CommandListCalled(params.address, | ||||||
|  |                                      (u32*)Memory::GetPointer(params.address), | ||||||
|  |                                      params.size); | ||||||
|         break; |         break; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // It's assumed that the two "blocks" behave equivalently. | ||||||
|  |     // Presumably this is done simply to allow two memory fills to run in parallel. | ||||||
|     case GXCommandId::SET_MEMORY_FILL: |     case GXCommandId::SET_MEMORY_FILL: | ||||||
|         break; |     { | ||||||
|  |         auto& params = command.memory_fill; | ||||||
|  |         WriteGPURegister(GPU::Regs::MemoryFill, params.start1 >> 3); | ||||||
|  |         WriteGPURegister(GPU::Regs::MemoryFill + 1, params.end1 >> 3); | ||||||
|  |         WriteGPURegister(GPU::Regs::MemoryFill + 2, params.end1 - params.start1); | ||||||
|  |         WriteGPURegister(GPU::Regs::MemoryFill + 3, params.value1); | ||||||
|  |  | ||||||
|  |         WriteGPURegister(GPU::Regs::MemoryFill + 4, params.start2 >> 3); | ||||||
|  |         WriteGPURegister(GPU::Regs::MemoryFill + 5, params.end2 >> 3); | ||||||
|  |         WriteGPURegister(GPU::Regs::MemoryFill + 6, params.end2 - params.start2); | ||||||
|  |         WriteGPURegister(GPU::Regs::MemoryFill + 7, params.value2); | ||||||
|  |         break; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // TODO: Check if texture copies are implemented correctly.. | ||||||
|     case GXCommandId::SET_DISPLAY_TRANSFER: |     case GXCommandId::SET_DISPLAY_TRANSFER: | ||||||
|         break; |  | ||||||
|  |  | ||||||
|     case GXCommandId::SET_TEXTURE_COPY: |     case GXCommandId::SET_TEXTURE_COPY: | ||||||
|         break; |     { | ||||||
|  |         auto& params = command.image_copy; | ||||||
|  |         WriteGPURegister(GPU::Regs::DisplayTransfer, params.in_buffer_address >> 3); | ||||||
|  |         WriteGPURegister(GPU::Regs::DisplayTransfer + 1, params.out_buffer_address >> 3); | ||||||
|  |         WriteGPURegister(GPU::Regs::DisplayTransfer + 3, params.in_buffer_size); | ||||||
|  |         WriteGPURegister(GPU::Regs::DisplayTransfer + 2, params.out_buffer_size); | ||||||
|  |         WriteGPURegister(GPU::Regs::DisplayTransfer + 4, params.flags); | ||||||
|  |  | ||||||
|  |         // TODO: Should this only be ORed with 1 for texture copies? | ||||||
|  |         // trigger transfer | ||||||
|  |         WriteGPURegister(GPU::Regs::DisplayTransfer + 6, 1); | ||||||
|  |         break; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     // TODO: Figure out what exactly SET_COMMAND_LIST_FIRST and SET_COMMAND_LIST_LAST | ||||||
|  |     //       are supposed to do. | ||||||
|     case GXCommandId::SET_COMMAND_LIST_FIRST: |     case GXCommandId::SET_COMMAND_LIST_FIRST: | ||||||
|     { |     { | ||||||
|         //u32* buf0_data = (u32*)Memory::GetPointer(cmd_buff[1]); |  | ||||||
|         //u32* buf1_data = (u32*)Memory::GetPointer(cmd_buff[3]); |  | ||||||
|         //u32* buf2_data = (u32*)Memory::GetPointer(cmd_buff[5]); |  | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     default: |     default: | ||||||
|         ERROR_LOG(GSP, "unknown command 0x%08X", cmd_buff[0]); |         ERROR_LOG(GSP, "unknown command 0x%08X", (int)command.id.Value()); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     GX_FinishCommand(g_thread_id); |     GX_FinishCommand(g_thread_id); | ||||||
| } | } | ||||||
|  |  | ||||||
| const Interface::FunctionInfo FunctionTable[] = { | const Interface::FunctionInfo FunctionTable[] = { | ||||||
|     {0x00010082, nullptr,                       "WriteHWRegs"}, |     {0x00010082, WriteHWRegs,                   "WriteHWRegs"}, | ||||||
|     {0x00020084, nullptr,                       "WriteHWRegsWithMask"}, |     {0x00020084, nullptr,                       "WriteHWRegsWithMask"}, | ||||||
|     {0x00030082, nullptr,                       "WriteHWRegRepeat"}, |     {0x00030082, nullptr,                       "WriteHWRegRepeat"}, | ||||||
|     {0x00040080, ReadHWRegs,                    "ReadHWRegs"}, |     {0x00040080, ReadHWRegs,                    "ReadHWRegs"}, | ||||||
|   | |||||||
| @@ -4,6 +4,7 @@ | |||||||
|  |  | ||||||
| #pragma once | #pragma once | ||||||
|  |  | ||||||
|  | #include "common/bit_field.h" | ||||||
| #include "core/hle/service/service.h" | #include "core/hle/service/service.h" | ||||||
|  |  | ||||||
| //////////////////////////////////////////////////////////////////////////////////////////////////// | //////////////////////////////////////////////////////////////////////////////////////////////////// | ||||||
| @@ -12,21 +13,58 @@ | |||||||
| namespace GSP_GPU { | namespace GSP_GPU { | ||||||
|  |  | ||||||
| enum class GXCommandId : u32 { | enum class GXCommandId : u32 { | ||||||
|     REQUEST_DMA            = 0x00000000, |     REQUEST_DMA            = 0x00, | ||||||
|     SET_COMMAND_LIST_LAST  = 0x00000001, |     SET_COMMAND_LIST_LAST  = 0x01, | ||||||
|     SET_MEMORY_FILL        = 0x00000002, // TODO: Confirm? (lictru uses 0x01000102) |  | ||||||
|     SET_DISPLAY_TRANSFER   = 0x00000003, |     // Fills a given memory range with a particular value | ||||||
|     SET_TEXTURE_COPY       = 0x00000004, |     SET_MEMORY_FILL        = 0x02, | ||||||
|     SET_COMMAND_LIST_FIRST = 0x00000005, |  | ||||||
|  |     // Copies an image and optionally performs color-conversion or scaling. | ||||||
|  |     // This is highly similar to the GameCube's EFB copy feature | ||||||
|  |     SET_DISPLAY_TRANSFER   = 0x03, | ||||||
|  |  | ||||||
|  |     // Conceptionally similar to SET_DISPLAY_TRANSFER and presumable uses the same hardware path | ||||||
|  |     SET_TEXTURE_COPY       = 0x04, | ||||||
|  |  | ||||||
|  |     SET_COMMAND_LIST_FIRST = 0x05, | ||||||
| }; | }; | ||||||
|  |  | ||||||
| union GXCommand { | struct GXCommand { | ||||||
|  |     BitField<0, 8, GXCommandId> id; | ||||||
|  |  | ||||||
|  |     union { | ||||||
|         struct { |         struct { | ||||||
|         GXCommandId id; |             u32 source_address; | ||||||
|     }; |             u32 dest_address; | ||||||
|  |             u32 size; | ||||||
|  |         } dma_request; | ||||||
|  |  | ||||||
|     u32 data[0x20]; |         struct { | ||||||
|  |             u32 address; | ||||||
|  |             u32 size; | ||||||
|  |         } set_command_list_last; | ||||||
|  |  | ||||||
|  |         struct { | ||||||
|  |             u32 start1; | ||||||
|  |             u32 value1; | ||||||
|  |             u32 end1; | ||||||
|  |             u32 start2; | ||||||
|  |             u32 value2; | ||||||
|  |             u32 end2; | ||||||
|  |         } memory_fill; | ||||||
|  |  | ||||||
|  |         struct { | ||||||
|  |             u32 in_buffer_address; | ||||||
|  |             u32 out_buffer_address; | ||||||
|  |             u32 in_buffer_size; | ||||||
|  |             u32 out_buffer_size; | ||||||
|  |             u32 flags; | ||||||
|  |         } image_copy; | ||||||
|  |  | ||||||
|  |         u8 raw_data[0x1C]; | ||||||
|     }; |     }; | ||||||
|  | }; | ||||||
|  | static_assert(sizeof(GXCommand) == 0x20, "GXCommand struct has incorrect size"); | ||||||
|  |  | ||||||
| /// Interface to "srv:" service | /// Interface to "srv:" service | ||||||
| class Interface : public Service::Interface { | class Interface : public Service::Interface { | ||||||
|   | |||||||
| @@ -15,7 +15,7 @@ | |||||||
|  |  | ||||||
| namespace GPU { | namespace GPU { | ||||||
|  |  | ||||||
| Registers g_regs; | RegisterSet<u32, Regs> g_regs; | ||||||
|  |  | ||||||
| u64 g_last_ticks = 0; ///< Last CPU ticks | u64 g_last_ticks = 0; ///< Last CPU ticks | ||||||
|  |  | ||||||
| @@ -26,37 +26,47 @@ u64 g_last_ticks = 0; ///< Last CPU ticks | |||||||
| void SetFramebufferLocation(const FramebufferLocation mode) { | void SetFramebufferLocation(const FramebufferLocation mode) { | ||||||
|     switch (mode) { |     switch (mode) { | ||||||
|     case FRAMEBUFFER_LOCATION_FCRAM: |     case FRAMEBUFFER_LOCATION_FCRAM: | ||||||
|         g_regs.framebuffer_top_left_1   = PADDR_TOP_LEFT_FRAME1; |     { | ||||||
|         g_regs.framebuffer_top_left_2   = PADDR_TOP_LEFT_FRAME2; |         auto& framebuffer_top = g_regs.Get<Regs::FramebufferTop>(); | ||||||
|         g_regs.framebuffer_top_right_1  = PADDR_TOP_RIGHT_FRAME1; |         auto& framebuffer_sub = g_regs.Get<Regs::FramebufferBottom>(); | ||||||
|         g_regs.framebuffer_top_right_2  = PADDR_TOP_RIGHT_FRAME2; |  | ||||||
|         g_regs.framebuffer_sub_left_1   = PADDR_SUB_FRAME1; |         framebuffer_top.address_left1  = PADDR_TOP_LEFT_FRAME1; | ||||||
|         //g_regs.framebuffer_sub_left_2  = unknown; |         framebuffer_top.address_left2  = PADDR_TOP_LEFT_FRAME2; | ||||||
|         g_regs.framebuffer_sub_right_1  = PADDR_SUB_FRAME2; |         framebuffer_top.address_right1 = PADDR_TOP_RIGHT_FRAME1; | ||||||
|         //g_regs.framebufferr_sub_right_2 = unknown; |         framebuffer_top.address_right2 = PADDR_TOP_RIGHT_FRAME2; | ||||||
|  |         framebuffer_sub.address_left1  = PADDR_SUB_FRAME1; | ||||||
|  |         //framebuffer_sub.address_left2  = unknown; | ||||||
|  |         framebuffer_sub.address_right1 = PADDR_SUB_FRAME2; | ||||||
|  |         //framebuffer_sub.address_right2 = unknown; | ||||||
|         break; |         break; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     case FRAMEBUFFER_LOCATION_VRAM: |     case FRAMEBUFFER_LOCATION_VRAM: | ||||||
|         g_regs.framebuffer_top_left_1   = PADDR_VRAM_TOP_LEFT_FRAME1; |     { | ||||||
|         g_regs.framebuffer_top_left_2   = PADDR_VRAM_TOP_LEFT_FRAME2; |         auto& framebuffer_top = g_regs.Get<Regs::FramebufferTop>(); | ||||||
|         g_regs.framebuffer_top_right_1  = PADDR_VRAM_TOP_RIGHT_FRAME1; |         auto& framebuffer_sub = g_regs.Get<Regs::FramebufferBottom>(); | ||||||
|         g_regs.framebuffer_top_right_2  = PADDR_VRAM_TOP_RIGHT_FRAME2; |  | ||||||
|         g_regs.framebuffer_sub_left_1   = PADDR_VRAM_SUB_FRAME1; |         framebuffer_top.address_left1  = PADDR_VRAM_TOP_LEFT_FRAME1; | ||||||
|         //g_regs.framebuffer_sub_left_2  = unknown; |         framebuffer_top.address_left2  = PADDR_VRAM_TOP_LEFT_FRAME2; | ||||||
|         g_regs.framebuffer_sub_right_1  = PADDR_VRAM_SUB_FRAME2; |         framebuffer_top.address_right1 = PADDR_VRAM_TOP_RIGHT_FRAME1; | ||||||
|         //g_regs.framebufferr_sub_right_2 = unknown; |         framebuffer_top.address_right2 = PADDR_VRAM_TOP_RIGHT_FRAME2; | ||||||
|  |         framebuffer_sub.address_left1  = PADDR_VRAM_SUB_FRAME1; | ||||||
|  |         //framebuffer_sub.address_left2  = unknown; | ||||||
|  |         framebuffer_sub.address_right1 = PADDR_VRAM_SUB_FRAME2; | ||||||
|  |         //framebuffer_sub.address_right2 = unknown; | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
|     } |     } | ||||||
|  | } | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * Gets the location of the framebuffers |  * Gets the location of the framebuffers | ||||||
|  * @return Location of framebuffers as FramebufferLocation enum |  * @return Location of framebuffers as FramebufferLocation enum | ||||||
|  */ |  */ | ||||||
| const FramebufferLocation GetFramebufferLocation() { | FramebufferLocation GetFramebufferLocation(u32 address) { | ||||||
|     if ((g_regs.framebuffer_top_right_1 & ~Memory::VRAM_MASK) == Memory::VRAM_PADDR) { |     if ((address & ~Memory::VRAM_MASK) == Memory::VRAM_PADDR) { | ||||||
|         return FRAMEBUFFER_LOCATION_VRAM; |         return FRAMEBUFFER_LOCATION_VRAM; | ||||||
|     } else if ((g_regs.framebuffer_top_right_1 & ~Memory::FCRAM_MASK) == Memory::FCRAM_PADDR) { |     } else if ((address & ~Memory::FCRAM_MASK) == Memory::FCRAM_PADDR) { | ||||||
|         return FRAMEBUFFER_LOCATION_FCRAM; |         return FRAMEBUFFER_LOCATION_FCRAM; | ||||||
|     } else { |     } else { | ||||||
|         ERROR_LOG(GPU, "unknown framebuffer location!"); |         ERROR_LOG(GPU, "unknown framebuffer location!"); | ||||||
| @@ -64,91 +74,161 @@ const FramebufferLocation GetFramebufferLocation() { | |||||||
|     return FRAMEBUFFER_LOCATION_UNKNOWN; |     return FRAMEBUFFER_LOCATION_UNKNOWN; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | u32 GetFramebufferAddr(const u32 address) { | ||||||
|  |     switch (GetFramebufferLocation(address)) { | ||||||
|  |     case FRAMEBUFFER_LOCATION_FCRAM: | ||||||
|  |         return Memory::VirtualAddressFromPhysical_FCRAM(address); | ||||||
|  |     case FRAMEBUFFER_LOCATION_VRAM: | ||||||
|  |         return Memory::VirtualAddressFromPhysical_VRAM(address); | ||||||
|  |     default: | ||||||
|  |         ERROR_LOG(GPU, "unknown framebuffer location"); | ||||||
|  |     } | ||||||
|  |     return 0; | ||||||
|  | } | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * Gets a read-only pointer to a framebuffer in memory |  * Gets a read-only pointer to a framebuffer in memory | ||||||
|  * @param address Physical address of framebuffer |  * @param address Physical address of framebuffer | ||||||
|  * @return Returns const pointer to raw framebuffer |  * @return Returns const pointer to raw framebuffer | ||||||
|  */ |  */ | ||||||
| const u8* GetFramebufferPointer(const u32 address) { | const u8* GetFramebufferPointer(const u32 address) { | ||||||
|     switch (GetFramebufferLocation()) { |     u32 addr = GetFramebufferAddr(address); | ||||||
|     case FRAMEBUFFER_LOCATION_FCRAM: |     return (addr != 0) ? Memory::GetPointer(addr) : nullptr; | ||||||
|         return (const u8*)Memory::GetPointer(Memory::VirtualAddressFromPhysical_FCRAM(address)); |  | ||||||
|     case FRAMEBUFFER_LOCATION_VRAM: |  | ||||||
|         return (const u8*)Memory::GetPointer(Memory::VirtualAddressFromPhysical_VRAM(address)); |  | ||||||
|     default: |  | ||||||
|         ERROR_LOG(GPU, "unknown framebuffer location"); |  | ||||||
|     } |  | ||||||
|     return NULL; |  | ||||||
| } | } | ||||||
|  |  | ||||||
| template <typename T> | template <typename T> | ||||||
| inline void Read(T &var, const u32 addr) { | inline void Read(T &var, const u32 raw_addr) { | ||||||
|     switch (addr) { |     u32 addr = raw_addr - 0x1EF00000; | ||||||
|     case Registers::FramebufferTopLeft1: |     int index = addr / 4; | ||||||
|         var = g_regs.framebuffer_top_left_1; |  | ||||||
|         break; |  | ||||||
|  |  | ||||||
|     case Registers::FramebufferTopLeft2: |     // Reads other than u32 are untested, so I'd rather have them abort than silently fail | ||||||
|         var = g_regs.framebuffer_top_left_2; |     if (index >= Regs::NumIds || !std::is_same<T,u32>::value) | ||||||
|         break; |     { | ||||||
|  |  | ||||||
|     case Registers::FramebufferTopRight1: |  | ||||||
|         var = g_regs.framebuffer_top_right_1; |  | ||||||
|         break; |  | ||||||
|  |  | ||||||
|     case Registers::FramebufferTopRight2: |  | ||||||
|         var = g_regs.framebuffer_top_right_2; |  | ||||||
|         break; |  | ||||||
|  |  | ||||||
|     case Registers::FramebufferSubLeft1: |  | ||||||
|         var = g_regs.framebuffer_sub_left_1; |  | ||||||
|         break; |  | ||||||
|  |  | ||||||
|     case Registers::FramebufferSubRight1: |  | ||||||
|         var = g_regs.framebuffer_sub_right_1; |  | ||||||
|         break; |  | ||||||
|  |  | ||||||
|     case Registers::CommandListSize: |  | ||||||
|         var = g_regs.command_list_size; |  | ||||||
|         break; |  | ||||||
|  |  | ||||||
|     case Registers::CommandListAddress: |  | ||||||
|         var = g_regs.command_list_address; |  | ||||||
|         break; |  | ||||||
|  |  | ||||||
|     case Registers::ProcessCommandList: |  | ||||||
|         var = g_regs.command_processing_enabled; |  | ||||||
|         break; |  | ||||||
|  |  | ||||||
|     default: |  | ||||||
|         ERROR_LOG(GPU, "unknown Read%d @ 0x%08X", sizeof(var) * 8, addr); |         ERROR_LOG(GPU, "unknown Read%d @ 0x%08X", sizeof(var) * 8, addr); | ||||||
|         break; |         return; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     var = g_regs[static_cast<Regs::Id>(addr / 4)]; | ||||||
| } | } | ||||||
|  |  | ||||||
| template <typename T> | template <typename T> | ||||||
| inline void Write(u32 addr, const T data) { | inline void Write(u32 addr, const T data) { | ||||||
|     switch (static_cast<Registers::Id>(addr)) { |     addr -= 0x1EF00000; | ||||||
|     case Registers::CommandListSize: |     int index = addr / 4; | ||||||
|         g_regs.command_list_size = data; |  | ||||||
|         break; |  | ||||||
|  |  | ||||||
|     case Registers::CommandListAddress: |     // Writes other than u32 are untested, so I'd rather have them abort than silently fail | ||||||
|         g_regs.command_list_address = data; |     if (index >= Regs::NumIds || !std::is_same<T,u32>::value) | ||||||
|         break; |  | ||||||
|  |  | ||||||
|     case Registers::ProcessCommandList: |  | ||||||
|         g_regs.command_processing_enabled = data; |  | ||||||
|         if (g_regs.command_processing_enabled & 1) |  | ||||||
|     { |     { | ||||||
|             // u32* buffer = (u32*)Memory::GetPointer(g_regs.command_list_address << 3); |         ERROR_LOG(GPU, "unknown Write%d 0x%08X @ 0x%08X", sizeof(data) * 8, data, addr); | ||||||
|             ERROR_LOG(GPU, "Beginning %x bytes of commands from address %x", g_regs.command_list_size, g_regs.command_list_address << 3); |         return; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     g_regs[static_cast<Regs::Id>(index)] = data; | ||||||
|  |  | ||||||
|  |     switch (static_cast<Regs::Id>(index)) { | ||||||
|  |  | ||||||
|  |     // Memory fills are triggered once the fill value is written. | ||||||
|  |     // NOTE: This is not verified. | ||||||
|  |     case Regs::MemoryFill + 3: | ||||||
|  |     case Regs::MemoryFill + 7: | ||||||
|  |     { | ||||||
|  |         const auto& config = g_regs.Get<Regs::MemoryFill>(static_cast<Regs::Id>(index - 3)); | ||||||
|  |  | ||||||
|  |         // TODO: Not sure if this check should be done at GSP level instead | ||||||
|  |         if (config.address_start) { | ||||||
|  |             // TODO: Not sure if this algorithm is correct, particularly because it doesn't use the size member at all | ||||||
|  |             u32* start = (u32*)Memory::GetPointer(config.GetStartAddress()); | ||||||
|  |             u32* end = (u32*)Memory::GetPointer(config.GetEndAddress()); | ||||||
|  |             for (u32* ptr = start; ptr < end; ++ptr) | ||||||
|  |                 *ptr = bswap32(config.value); // TODO: This is just a workaround to missing framebuffer format emulation | ||||||
|  |  | ||||||
|  |             DEBUG_LOG(GPU, "MemoryFill from 0x%08x to 0x%08x", config.GetStartAddress(), config.GetEndAddress()); | ||||||
|  |         } | ||||||
|  |         break; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     case Regs::DisplayTransfer + 6: | ||||||
|  |     { | ||||||
|  |         const auto& config = g_regs.Get<Regs::DisplayTransfer>(); | ||||||
|  |         if (config.trigger & 1) { | ||||||
|  |             u8* source_pointer = Memory::GetPointer(config.GetPhysicalInputAddress()); | ||||||
|  |             u8* dest_pointer = Memory::GetPointer(config.GetPhysicalOutputAddress()); | ||||||
|  |  | ||||||
|  |             for (int y = 0; y < config.output_height; ++y) { | ||||||
|  |                 // TODO: Why does the register seem to hold twice the framebuffer width? | ||||||
|  |                 for (int x = 0; x < config.output_width / 2; ++x) { | ||||||
|  |                     struct { | ||||||
|  |                         int r, g, b, a; | ||||||
|  |                     } source_color = { 0, 0, 0, 0 }; | ||||||
|  |  | ||||||
|  |                     switch (config.input_format) { | ||||||
|  |                     case Regs::FramebufferFormat::RGBA8: | ||||||
|  |                     { | ||||||
|  |                         // TODO: Most likely got the component order messed up. | ||||||
|  |                         u8* srcptr = source_pointer + x * 4 + y * config.input_width * 4 / 2; | ||||||
|  |                         source_color.r = srcptr[0]; // blue | ||||||
|  |                         source_color.g = srcptr[1]; // green | ||||||
|  |                         source_color.b = srcptr[2]; // red | ||||||
|  |                         source_color.a = srcptr[3]; // alpha | ||||||
|  |                         break; | ||||||
|  |                     } | ||||||
|  |  | ||||||
|  |                     default: | ||||||
|  |                         ERROR_LOG(GPU, "Unknown source framebuffer format %x", config.input_format.Value()); | ||||||
|  |                         break; | ||||||
|  |                     } | ||||||
|  |  | ||||||
|  |                     switch (config.output_format) { | ||||||
|  |                     /*case Regs::FramebufferFormat::RGBA8: | ||||||
|  |                     { | ||||||
|  |                         // TODO: Untested | ||||||
|  |                         u8* dstptr = (u32*)(dest_pointer + x * 4 + y * config.output_width * 4); | ||||||
|  |                         dstptr[0] = source_color.r; | ||||||
|  |                         dstptr[1] = source_color.g; | ||||||
|  |                         dstptr[2] = source_color.b; | ||||||
|  |                         dstptr[3] = source_color.a; | ||||||
|  |                         break; | ||||||
|  |                     }*/ | ||||||
|  |  | ||||||
|  |                     case Regs::FramebufferFormat::RGB8: | ||||||
|  |                     { | ||||||
|  |                         // TODO: Most likely got the component order messed up. | ||||||
|  |                         u8* dstptr = dest_pointer + x * 3 + y * config.output_width * 3 / 2; | ||||||
|  |                         dstptr[0] = source_color.r; // blue | ||||||
|  |                         dstptr[1] = source_color.g; // green | ||||||
|  |                         dstptr[2] = source_color.b; // red | ||||||
|  |                         break; | ||||||
|  |                     } | ||||||
|  |  | ||||||
|  |                     default: | ||||||
|  |                         ERROR_LOG(GPU, "Unknown destination framebuffer format %x", config.output_format.Value()); | ||||||
|  |                         break; | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |  | ||||||
|  |             DEBUG_LOG(GPU, "DisplayTriggerTransfer: 0x%08x bytes from 0x%08x(%dx%d)-> 0x%08x(%dx%d), dst format %x", | ||||||
|  |                       config.output_height * config.output_width * 4, | ||||||
|  |                       config.GetPhysicalInputAddress(), (int)config.input_width, (int)config.input_height, | ||||||
|  |                       config.GetPhysicalOutputAddress(), (int)config.output_width, (int)config.output_height, | ||||||
|  |                       config.output_format.Value()); | ||||||
|  |         } | ||||||
|  |         break; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     case Regs::CommandProcessor + 4: | ||||||
|  |     { | ||||||
|  |         const auto& config = g_regs.Get<Regs::CommandProcessor>(); | ||||||
|  |         if (config.trigger & 1) | ||||||
|  |         { | ||||||
|  |             // u32* buffer = (u32*)Memory::GetPointer(config.address << 3); | ||||||
|  |             ERROR_LOG(GPU, "Beginning 0x%08x bytes of commands from address 0x%08x", config.size, config.address << 3); | ||||||
|             // TODO: Process command list! |             // TODO: Process command list! | ||||||
|         } |         } | ||||||
|         break; |         break; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     default: |     default: | ||||||
|         ERROR_LOG(GPU, "unknown Write%d 0x%08X @ 0x%08X", sizeof(data) * 8, data, addr); |  | ||||||
|         break; |         break; | ||||||
|     } |     } | ||||||
| } | } | ||||||
| @@ -180,7 +260,24 @@ void Update() { | |||||||
| /// Initialize hardware | /// Initialize hardware | ||||||
| void Init() { | void Init() { | ||||||
|     g_last_ticks = Core::g_app_core->GetTicks(); |     g_last_ticks = Core::g_app_core->GetTicks(); | ||||||
|     SetFramebufferLocation(FRAMEBUFFER_LOCATION_FCRAM); | //    SetFramebufferLocation(FRAMEBUFFER_LOCATION_FCRAM); | ||||||
|  |     SetFramebufferLocation(FRAMEBUFFER_LOCATION_VRAM); | ||||||
|  |  | ||||||
|  |     auto& framebuffer_top = g_regs.Get<Regs::FramebufferTop>(); | ||||||
|  |     auto& framebuffer_sub = g_regs.Get<Regs::FramebufferBottom>(); | ||||||
|  |     // TODO: Width should be 240 instead? | ||||||
|  |     framebuffer_top.width = 480; | ||||||
|  |     framebuffer_top.height = 400; | ||||||
|  |     framebuffer_top.stride = 480*3; | ||||||
|  |     framebuffer_top.color_format = Regs::FramebufferFormat::RGB8; | ||||||
|  |     framebuffer_top.active_fb = 0; | ||||||
|  |  | ||||||
|  |     framebuffer_sub.width = 480; | ||||||
|  |     framebuffer_sub.height = 400; | ||||||
|  |     framebuffer_sub.stride = 480*3; | ||||||
|  |     framebuffer_sub.color_format = Regs::FramebufferFormat::RGB8; | ||||||
|  |     framebuffer_sub.active_fb = 0; | ||||||
|  |  | ||||||
|     NOTICE_LOG(GPU, "initialized OK"); |     NOTICE_LOG(GPU, "initialized OK"); | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -5,43 +5,168 @@ | |||||||
| #pragma once | #pragma once | ||||||
|  |  | ||||||
| #include "common/common_types.h" | #include "common/common_types.h" | ||||||
|  | #include "common/bit_field.h" | ||||||
|  | #include "common/register_set.h" | ||||||
|  |  | ||||||
| namespace GPU { | namespace GPU { | ||||||
|  |  | ||||||
| static const u32 kFrameCycles   = 268123480 / 60;   ///< 268MHz / 60 frames per second | static const u32 kFrameCycles   = 268123480 / 60;   ///< 268MHz / 60 frames per second | ||||||
| static const u32 kFrameTicks    = kFrameCycles / 3; ///< Approximate number of instructions/frame | static const u32 kFrameTicks    = kFrameCycles / 3; ///< Approximate number of instructions/frame | ||||||
|  |  | ||||||
| struct Registers { | // MMIO region 0x1EFxxxxx | ||||||
|  | struct Regs { | ||||||
|     enum Id : u32 { |     enum Id : u32 { | ||||||
|         FramebufferTopLeft1     = 0x1EF00468,   // Main LCD, first framebuffer for 3D left |         MemoryFill                = 0x00004, // + 5,6,7; second block at 8-11 | ||||||
|         FramebufferTopLeft2     = 0x1EF0046C,   // Main LCD, second framebuffer for 3D left |  | ||||||
|         FramebufferTopRight1    = 0x1EF00494,   // Main LCD, first framebuffer for 3D right |  | ||||||
|         FramebufferTopRight2    = 0x1EF00498,   // Main LCD, second framebuffer for 3D right |  | ||||||
|         FramebufferSubLeft1     = 0x1EF00568,   // Sub LCD, first framebuffer |  | ||||||
|         FramebufferSubLeft2     = 0x1EF0056C,   // Sub LCD, second framebuffer |  | ||||||
|         FramebufferSubRight1    = 0x1EF00594,   // Sub LCD, unused first framebuffer |  | ||||||
|         FramebufferSubRight2    = 0x1EF00598,   // Sub LCD, unused second framebuffer |  | ||||||
|  |  | ||||||
|         CommandListSize         = 0x1EF018E0, |         FramebufferTop            = 0x00117, // + 11a,11b,11c,11d(?),11e...126 | ||||||
|         CommandListAddress      = 0x1EF018E8, |         FramebufferBottom         = 0x00157, // + 15a,15b,15c,15d(?),15e...166 | ||||||
|         ProcessCommandList      = 0x1EF018F0, |  | ||||||
|  |         DisplayTransfer           = 0x00300, // + 301,302,303,304,305,306 | ||||||
|  |  | ||||||
|  |         CommandProcessor          = 0x00638, // + 63a,63c | ||||||
|  |  | ||||||
|  |         NumIds                    = 0x01000 | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|     u32 framebuffer_top_left_1; |     template<Id id> | ||||||
|     u32 framebuffer_top_left_2; |     struct Struct; | ||||||
|     u32 framebuffer_top_right_1; |  | ||||||
|     u32 framebuffer_top_right_2; |  | ||||||
|     u32 framebuffer_sub_left_1; |  | ||||||
|     u32 framebuffer_sub_left_2; |  | ||||||
|     u32 framebuffer_sub_right_1; |  | ||||||
|     u32 framebuffer_sub_right_2; |  | ||||||
|  |  | ||||||
|     u32 command_list_size; |     enum class FramebufferFormat : u32 { | ||||||
|     u32 command_list_address; |         RGBA8  = 0, | ||||||
|     u32 command_processing_enabled; |         RGB8   = 1, | ||||||
|  |         RGB565 = 2, | ||||||
|  |         RGB5A1 = 3, | ||||||
|  |         RGBA4  = 4, | ||||||
|  |     }; | ||||||
| }; | }; | ||||||
|  |  | ||||||
| extern Registers g_regs; | template<> | ||||||
|  | struct Regs::Struct<Regs::MemoryFill> { | ||||||
|  |     u32 address_start; | ||||||
|  |     u32 address_end; // ? | ||||||
|  |     u32 size; | ||||||
|  |     u32 value; // ? | ||||||
|  |  | ||||||
|  |     inline u32 GetStartAddress() const { | ||||||
|  |         return address_start * 8; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     inline u32 GetEndAddress() const { | ||||||
|  |         return address_end * 8; | ||||||
|  |     } | ||||||
|  | }; | ||||||
|  | static_assert(sizeof(Regs::Struct<Regs::MemoryFill>) == 0x10, "Structure size and register block length don't match"); | ||||||
|  |  | ||||||
|  | template<> | ||||||
|  | struct Regs::Struct<Regs::FramebufferTop> { | ||||||
|  |     using Format = Regs::FramebufferFormat; | ||||||
|  |  | ||||||
|  |     union { | ||||||
|  |         u32 size; | ||||||
|  |  | ||||||
|  |         BitField< 0, 16, u32> width; | ||||||
|  |         BitField<16, 16, u32> height; | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     u32 pad0[2]; | ||||||
|  |  | ||||||
|  |     u32 address_left1; | ||||||
|  |     u32 address_left2; | ||||||
|  |  | ||||||
|  |     union { | ||||||
|  |         u32 format; | ||||||
|  |  | ||||||
|  |         BitField< 0, 3, Format> color_format; | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     u32 pad1; | ||||||
|  |  | ||||||
|  |     union { | ||||||
|  |         u32 active_fb; | ||||||
|  |  | ||||||
|  |         // 0: Use parameters ending with "1" | ||||||
|  |         // 1: Use parameters ending with "2" | ||||||
|  |         BitField<0, 1, u32> second_fb_active; | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     u32 pad2[5]; | ||||||
|  |  | ||||||
|  |     // Distance between two pixel rows, in bytes | ||||||
|  |     u32 stride; | ||||||
|  |  | ||||||
|  |     u32 address_right1; | ||||||
|  |     u32 address_right2; | ||||||
|  | }; | ||||||
|  |  | ||||||
|  | template<> | ||||||
|  | struct Regs::Struct<Regs::FramebufferBottom> : public Regs::Struct<Regs::FramebufferTop> { | ||||||
|  | }; | ||||||
|  | static_assert(sizeof(Regs::Struct<Regs::FramebufferTop>) == 0x40, "Structure size and register block length don't match"); | ||||||
|  |  | ||||||
|  | template<> | ||||||
|  | struct Regs::Struct<Regs::DisplayTransfer> { | ||||||
|  |     using Format = Regs::FramebufferFormat; | ||||||
|  |  | ||||||
|  |     u32 input_address; | ||||||
|  |     u32 output_address; | ||||||
|  |  | ||||||
|  |     inline u32 GetPhysicalInputAddress() const { | ||||||
|  |         return input_address * 8; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     inline u32 GetPhysicalOutputAddress() const { | ||||||
|  |         return output_address * 8; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     union { | ||||||
|  |         u32 output_size; | ||||||
|  |  | ||||||
|  |         BitField< 0, 16, u32> output_width; | ||||||
|  |         BitField<16, 16, u32> output_height; | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     union { | ||||||
|  |         u32 input_size; | ||||||
|  |  | ||||||
|  |         BitField< 0, 16, u32> input_width; | ||||||
|  |         BitField<16, 16, u32> input_height; | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     union { | ||||||
|  |         u32 flags; | ||||||
|  |  | ||||||
|  |         BitField< 0, 1, u32> flip_data;        // flips input data horizontally (TODO) if true | ||||||
|  |         BitField< 8, 3, Format> input_format; | ||||||
|  |         BitField<12, 3, Format> output_format; | ||||||
|  |         BitField<16, 1, u32> output_tiled;     // stores output in a tiled format | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     u32 unknown; | ||||||
|  |  | ||||||
|  |     // it seems that writing to this field triggers the display transfer | ||||||
|  |     u32 trigger; | ||||||
|  | }; | ||||||
|  | static_assert(sizeof(Regs::Struct<Regs::DisplayTransfer>) == 0x1C, "Structure size and register block length don't match"); | ||||||
|  |  | ||||||
|  | template<> | ||||||
|  | struct Regs::Struct<Regs::CommandProcessor> { | ||||||
|  |     // command list size | ||||||
|  |     u32 size; | ||||||
|  |  | ||||||
|  |     u32 pad0; | ||||||
|  |  | ||||||
|  |     // command list address | ||||||
|  |     u32 address; | ||||||
|  |  | ||||||
|  |     u32 pad1; | ||||||
|  |  | ||||||
|  |     // it seems that writing to this field triggers command list processing | ||||||
|  |     u32 trigger; | ||||||
|  | }; | ||||||
|  | static_assert(sizeof(Regs::Struct<Regs::CommandProcessor>) == 0x14, "Structure size and register block length don't match"); | ||||||
|  |  | ||||||
|  |  | ||||||
|  | extern RegisterSet<u32, Regs> g_regs; | ||||||
|  |  | ||||||
| enum { | enum { | ||||||
|     TOP_ASPECT_X        = 0x5, |     TOP_ASPECT_X        = 0x5, | ||||||
| @@ -51,23 +176,35 @@ enum { | |||||||
|     TOP_WIDTH           = 400, |     TOP_WIDTH           = 400, | ||||||
|     BOTTOM_WIDTH        = 320, |     BOTTOM_WIDTH        = 320, | ||||||
|  |  | ||||||
|     // Physical addresses in FCRAM used by ARM9 applications - these are correct for real hardware  |     // Physical addresses in FCRAM (chosen arbitrarily) | ||||||
|     PADDR_FRAMEBUFFER_SEL       = 0x20184E59, |     PADDR_TOP_LEFT_FRAME1       = 0x201D4C00, | ||||||
|     PADDR_TOP_LEFT_FRAME1       = 0x20184E60, |     PADDR_TOP_LEFT_FRAME2       = 0x202D4C00, | ||||||
|  |     PADDR_TOP_RIGHT_FRAME1      = 0x203D4C00, | ||||||
|  |     PADDR_TOP_RIGHT_FRAME2      = 0x204D4C00, | ||||||
|  |     PADDR_SUB_FRAME1            = 0x205D4C00, | ||||||
|  |     PADDR_SUB_FRAME2            = 0x206D4C00, | ||||||
|  |     // Physical addresses in FCRAM used by ARM9 applications | ||||||
|  | /*    PADDR_TOP_LEFT_FRAME1       = 0x20184E60, | ||||||
|     PADDR_TOP_LEFT_FRAME2       = 0x201CB370, |     PADDR_TOP_LEFT_FRAME2       = 0x201CB370, | ||||||
|     PADDR_TOP_RIGHT_FRAME1      = 0x20282160, |     PADDR_TOP_RIGHT_FRAME1      = 0x20282160, | ||||||
|     PADDR_TOP_RIGHT_FRAME2      = 0x202C8670, |     PADDR_TOP_RIGHT_FRAME2      = 0x202C8670, | ||||||
|     PADDR_SUB_FRAME1            = 0x202118E0, |     PADDR_SUB_FRAME1            = 0x202118E0, | ||||||
|     PADDR_SUB_FRAME2            = 0x20249CF0, |     PADDR_SUB_FRAME2            = 0x20249CF0,*/ | ||||||
|  |  | ||||||
|     // Physical addresses in VRAM - I'm not sure how these are actually allocated (so not real) |     // Physical addresses in VRAM | ||||||
|     PADDR_VRAM_FRAMEBUFFER_SEL  = 0x18184E59, |     // TODO: These should just be deduced from the ones above | ||||||
|     PADDR_VRAM_TOP_LEFT_FRAME1  = 0x18184E60, |     PADDR_VRAM_TOP_LEFT_FRAME1  = 0x181D4C00, | ||||||
|     PADDR_VRAM_TOP_LEFT_FRAME2  = 0x181CB370, |     PADDR_VRAM_TOP_LEFT_FRAME2  = 0x182D4C00, | ||||||
|  |     PADDR_VRAM_TOP_RIGHT_FRAME1 = 0x183D4C00, | ||||||
|  |     PADDR_VRAM_TOP_RIGHT_FRAME2 = 0x184D4C00, | ||||||
|  |     PADDR_VRAM_SUB_FRAME1       = 0x185D4C00, | ||||||
|  |     PADDR_VRAM_SUB_FRAME2       = 0x186D4C00, | ||||||
|  |     // Physical addresses in VRAM used by ARM9 applications | ||||||
|  | /*    PADDR_VRAM_TOP_LEFT_FRAME2  = 0x181CB370, | ||||||
|     PADDR_VRAM_TOP_RIGHT_FRAME1 = 0x18282160, |     PADDR_VRAM_TOP_RIGHT_FRAME1 = 0x18282160, | ||||||
|     PADDR_VRAM_TOP_RIGHT_FRAME2 = 0x182C8670, |     PADDR_VRAM_TOP_RIGHT_FRAME2 = 0x182C8670, | ||||||
|     PADDR_VRAM_SUB_FRAME1       = 0x182118E0, |     PADDR_VRAM_SUB_FRAME1       = 0x182118E0, | ||||||
|     PADDR_VRAM_SUB_FRAME2       = 0x18249CF0, |     PADDR_VRAM_SUB_FRAME2       = 0x18249CF0,*/ | ||||||
| }; | }; | ||||||
|  |  | ||||||
| /// Framebuffer location | /// Framebuffer location | ||||||
| @@ -90,16 +227,18 @@ void SetFramebufferLocation(const FramebufferLocation mode); | |||||||
|  */ |  */ | ||||||
| const u8* GetFramebufferPointer(const u32 address); | const u8* GetFramebufferPointer(const u32 address); | ||||||
|  |  | ||||||
|  | u32 GetFramebufferAddr(const u32 address); | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * Gets the location of the framebuffers |  * Gets the location of the framebuffers | ||||||
|  */ |  */ | ||||||
| const FramebufferLocation GetFramebufferLocation(); | FramebufferLocation GetFramebufferLocation(u32 address); | ||||||
|  |  | ||||||
| template <typename T> | template <typename T> | ||||||
| inline void Read(T &var, const u32 addr); | void Read(T &var, const u32 addr); | ||||||
|  |  | ||||||
| template <typename T> | template <typename T> | ||||||
| inline void Write(u32 addr, const T data); | void Write(u32 addr, const T data); | ||||||
|  |  | ||||||
| /// Update hardware | /// Update hardware | ||||||
| void Update(); | void Update(); | ||||||
|   | |||||||
| @@ -9,10 +9,10 @@ | |||||||
| namespace HW { | namespace HW { | ||||||
|  |  | ||||||
| template <typename T> | template <typename T> | ||||||
| inline void Read(T &var, const u32 addr); | void Read(T &var, const u32 addr); | ||||||
|  |  | ||||||
| template <typename T> | template <typename T> | ||||||
| inline void Write(u32 addr, const T data); | void Write(u32 addr, const T data); | ||||||
|  |  | ||||||
| /// Update hardware | /// Update hardware | ||||||
| void Update(); | void Update(); | ||||||
|   | |||||||
| @@ -50,7 +50,7 @@ public: | |||||||
|         virtual void GXCommandProcessed(int total_command_count) |         virtual void GXCommandProcessed(int total_command_count) | ||||||
|         { |         { | ||||||
|             const GSP_GPU::GXCommand& cmd = observed->ReadGXCommandHistory(total_command_count-1); |             const GSP_GPU::GXCommand& cmd = observed->ReadGXCommandHistory(total_command_count-1); | ||||||
|             ERROR_LOG(GSP, "Received command: id=%x", cmd.id); |             ERROR_LOG(GSP, "Received command: id=%x", (int)cmd.id.Value()); | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         /** |         /** | ||||||
| @@ -78,11 +78,13 @@ public: | |||||||
|  |  | ||||||
|     void GXCommandProcessed(u8* command_data) |     void GXCommandProcessed(u8* command_data) | ||||||
|     { |     { | ||||||
|  |         if (observers.empty()) | ||||||
|  |             return; | ||||||
|  |  | ||||||
|         gx_command_history.push_back(GSP_GPU::GXCommand()); |         gx_command_history.push_back(GSP_GPU::GXCommand()); | ||||||
|         GSP_GPU::GXCommand& cmd = gx_command_history[gx_command_history.size()-1]; |         GSP_GPU::GXCommand& cmd = gx_command_history[gx_command_history.size()-1]; | ||||||
|  |  | ||||||
|         const int cmd_length = sizeof(GSP_GPU::GXCommand); |         memcpy(&cmd, command_data, sizeof(GSP_GPU::GXCommand)); | ||||||
|         memcpy(cmd.data, command_data, cmd_length); |  | ||||||
|  |  | ||||||
|         ForEachObserver([this](DebuggerObserver* observer) { |         ForEachObserver([this](DebuggerObserver* observer) { | ||||||
|                           observer->GXCommandProcessed(this->gx_command_history.size()); |                           observer->GXCommandProcessed(this->gx_command_history.size()); | ||||||
| @@ -91,6 +93,9 @@ public: | |||||||
|  |  | ||||||
|     void CommandListCalled(u32 address, u32* command_list, u32 size_in_words) |     void CommandListCalled(u32 address, u32* command_list, u32 size_in_words) | ||||||
|     { |     { | ||||||
|  |         if (observers.empty()) | ||||||
|  |             return; | ||||||
|  |  | ||||||
|         PicaCommandList cmdlist; |         PicaCommandList cmdlist; | ||||||
|         for (u32* parse_pointer = command_list; parse_pointer < command_list + size_in_words;) |         for (u32* parse_pointer = command_list; parse_pointer < command_list + size_in_words;) | ||||||
|         { |         { | ||||||
|   | |||||||
| @@ -61,10 +61,11 @@ void RendererOpenGL::FlipFramebuffer(const u8* in, u8* out) { | |||||||
|     int in_coord = 0; |     int in_coord = 0; | ||||||
|     for (int x = 0; x < VideoCore::kScreenTopWidth; x++) { |     for (int x = 0; x < VideoCore::kScreenTopWidth; x++) { | ||||||
|         for (int y = VideoCore::kScreenTopHeight-1; y >= 0; y--) { |         for (int y = VideoCore::kScreenTopHeight-1; y >= 0; y--) { | ||||||
|  |             // TODO: Properly support other framebuffer formats | ||||||
|             int out_coord = (x + y * VideoCore::kScreenTopWidth) * 3; |             int out_coord = (x + y * VideoCore::kScreenTopWidth) * 3; | ||||||
|             out[out_coord] = in[in_coord]; |             out[out_coord] = in[in_coord];         // blue? | ||||||
|             out[out_coord + 1] = in[in_coord + 1]; |             out[out_coord + 1] = in[in_coord + 1]; // green? | ||||||
|             out[out_coord + 2] = in[in_coord + 2]; |             out[out_coord + 2] = in[in_coord + 2]; // red? | ||||||
|             in_coord+=3; |             in_coord+=3; | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| @@ -77,8 +78,23 @@ void RendererOpenGL::FlipFramebuffer(const u8* in, u8* out) { | |||||||
|  */ |  */ | ||||||
| void RendererOpenGL::RenderXFB(const common::Rect& src_rect, const common::Rect& dst_rect) { | void RendererOpenGL::RenderXFB(const common::Rect& src_rect, const common::Rect& dst_rect) { | ||||||
|  |  | ||||||
|     FlipFramebuffer(GPU::GetFramebufferPointer(GPU::g_regs.framebuffer_top_left_1), m_xfb_top_flipped); |     const auto& framebuffer_top = GPU::g_regs.Get<GPU::Regs::FramebufferTop>(); | ||||||
|     FlipFramebuffer(GPU::GetFramebufferPointer(GPU::g_regs.framebuffer_sub_left_1), m_xfb_bottom_flipped); |     const auto& framebuffer_sub = GPU::g_regs.Get<GPU::Regs::FramebufferBottom>(); | ||||||
|  |     const u32 active_fb_top = (framebuffer_top.active_fb == 1) | ||||||
|  |                                 ? framebuffer_top.address_left2 | ||||||
|  |                                 : framebuffer_top.address_left1; | ||||||
|  |     const u32 active_fb_sub = (framebuffer_sub.active_fb == 1) | ||||||
|  |                                 ? framebuffer_sub.address_left2 | ||||||
|  |                                 : framebuffer_sub.address_left1; | ||||||
|  |  | ||||||
|  |     DEBUG_LOG(GPU, "RenderXFB: 0x%08x bytes from 0x%08x(%dx%d), fmt %x", | ||||||
|  |               framebuffer_top.stride * framebuffer_top.height, | ||||||
|  |               GPU::GetFramebufferAddr(active_fb_top), (int)framebuffer_top.width, | ||||||
|  |               (int)framebuffer_top.height, (int)framebuffer_top.format); | ||||||
|  |  | ||||||
|  |     // TODO: This should consider the GPU registers for framebuffer width, height and stride. | ||||||
|  |     FlipFramebuffer(GPU::GetFramebufferPointer(active_fb_top), m_xfb_top_flipped); | ||||||
|  |     FlipFramebuffer(GPU::GetFramebufferPointer(active_fb_sub), m_xfb_bottom_flipped); | ||||||
|  |  | ||||||
|     // Blit the top framebuffer |     // Blit the top framebuffer | ||||||
|     // ------------------------ |     // ------------------------ | ||||||
| @@ -110,7 +126,7 @@ void RendererOpenGL::RenderXFB(const common::Rect& src_rect, const common::Rect& | |||||||
|     // Update textures with contents of XFB in RAM - bottom |     // Update textures with contents of XFB in RAM - bottom | ||||||
|     glBindTexture(GL_TEXTURE_2D, m_xfb_texture_bottom); |     glBindTexture(GL_TEXTURE_2D, m_xfb_texture_bottom); | ||||||
|     glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, VideoCore::kScreenTopWidth, VideoCore::kScreenTopHeight, |     glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, VideoCore::kScreenTopWidth, VideoCore::kScreenTopHeight, | ||||||
|         GL_RGB, GL_UNSIGNED_BYTE, m_xfb_bottom_flipped); |         GL_BGR, GL_UNSIGNED_BYTE, m_xfb_bottom_flipped); | ||||||
|     glBindTexture(GL_TEXTURE_2D, 0); |     glBindTexture(GL_TEXTURE_2D, 0); | ||||||
|  |  | ||||||
|     // Render target is destination framebuffer |     // Render target is destination framebuffer | ||||||
|   | |||||||
| @@ -84,7 +84,6 @@ private: | |||||||
|     // "Flipped" framebuffers translate scanlines from native 3DS left-to-right to top-to-bottom |     // "Flipped" framebuffers translate scanlines from native 3DS left-to-right to top-to-bottom | ||||||
|     // as OpenGL expects them in a texture. There probably is a more efficient way of doing this: |     // as OpenGL expects them in a texture. There probably is a more efficient way of doing this: | ||||||
|  |  | ||||||
|     u8 m_xfb_top_flipped[VideoCore::kScreenTopWidth * VideoCore::kScreenTopWidth * 4];  |     u8 m_xfb_top_flipped[VideoCore::kScreenTopWidth * VideoCore::kScreenTopHeight * 4]; | ||||||
|     u8 m_xfb_bottom_flipped[VideoCore::kScreenTopWidth * VideoCore::kScreenTopWidth * 4];    |     u8 m_xfb_bottom_flipped[VideoCore::kScreenBottomWidth * VideoCore::kScreenBottomHeight * 4]; | ||||||
|  |  | ||||||
| }; | }; | ||||||
		Reference in New Issue
	
	Block a user
	 bunnei
					bunnei