Remade Vertex Cache

This commit is contained in:
Fernando Sahmkow 2017-01-28 22:32:14 -05:00
parent 372db835f4
commit 43a425038c
3 changed files with 90 additions and 25 deletions

View File

@ -44,6 +44,7 @@ set(HEADERS
shader/shader_interpreter.h
swrasterizer.h
utils.h
vertex_cache.h
vertex_loader.h
video_core.h
)

View File

@ -23,6 +23,7 @@
#include "video_core/rasterizer_interface.h"
#include "video_core/renderer_base.h"
#include "video_core/shader/shader.h"
#include "video_core/vertex_cache.h"
#include "video_core/vertex_loader.h"
#include "video_core/video_core.h"
@ -235,16 +236,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
DebugUtils::MemoryAccessTracker memory_accesses;
// Simple circular-replacement vertex cache
// The size has been tuned for optimal balance between hit-rate and the cost of lookup
const size_t VERTEX_CACHE_SIZE = 32;
std::array<u16, VERTEX_CACHE_SIZE> vertex_cache_ids;
std::array<Shader::OutputVertex, VERTEX_CACHE_SIZE> vertex_cache;
Shader::OutputVertex output_vertex;
unsigned int vertex_cache_pos = 0;
vertex_cache_ids.fill(-1);
auto* shader_engine = Shader::GetEngine();
Shader::UnitState shader_unit;
@ -260,25 +253,15 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
// the PICA supports it, and it would mess up the caching, guard against it here.
ASSERT(vertex != -1);
bool vertex_cache_hit = false;
if (is_indexed) {
if (g_debug_context && Pica::g_debug_context->recorder) {
int size = index_u16 ? 2 : 1;
memory_accesses.AddAccess(base_address + index_info.offset + size * index,
size);
}
for (unsigned int i = 0; i < VERTEX_CACHE_SIZE; ++i) {
if (vertex == vertex_cache_ids[i]) {
output_vertex = vertex_cache[i];
vertex_cache_hit = true;
break;
}
}
}
if (!vertex_cache_hit) {
if (!VertexCache::contains(vertex)) {
// Initialize data for the current vertex
Shader::InputVertex input;
loader.LoadVertex(base_address, index, vertex, input, memory_accesses);
@ -294,12 +277,10 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
output_vertex = Shader::OutputVertex::FromRegisters(shader_unit.registers.output,
regs, regs.vs.output_mask);
if (is_indexed) {
vertex_cache[vertex_cache_pos] = output_vertex;
vertex_cache_ids[vertex_cache_pos] = vertex;
vertex_cache_pos = (vertex_cache_pos + 1) % VERTEX_CACHE_SIZE;
}
}
if (is_indexed)
VertexCache::store(vertex, output_vertex);
} else
output_vertex = VertexCache::obtain(vertex);
// Send to renderer
using Pica::Shader::OutputVertex;
@ -316,6 +297,8 @@ static void WritePicaReg(u32 id, u32 value, u32 mask) {
range.second, range.first);
}
if (is_indexed)
VertexCache::clear();
break;
}

View File

@ -0,0 +1,81 @@
#pragma once
#include <algorithm>
#include <array>
#include "common/common_types.h"
#include "video_core/shader/shader.h"
// This little module emulates PICA's vertex shader's post transform vertex cache.
namespace VertexCache {
namespace {
constexpr size_t cache_max_size = 256 * 256;
// testing has proven that this size produces few misses in most games.
// in total the cache size is 64Kb
constexpr size_t cache_size = 512;
// this is used to find if a vertex is in the cache
static std::array<bool, cache_max_size> index_table = {};
#ifdef _DEBUG
static std::array<bool, cache_max_size> seen_table = {};
u32 cache_misses = 0;
// The number of misses we can tolerate.
const u32 tolerated_misses = 16;
#endif
// le cache
static std::array<Pica::Shader::OutputVertex, cache_size> cache;
// stores positions that are cached;
static std::array<u16, cache_size> remapper = {};
// used for clearing the cache;
u32 min_index = cache_size;
u32 max_index = 0;
} // Anonymous namespace
inline bool contains(u32 position) {
#ifdef _DEBUG
if (seen_table[position] != index_table[position])
cache_misses++;
#endif
return index_table[position];
}
inline Pica::Shader::OutputVertex& obtain(u32 position) {
return cache[position % cache_size];
}
inline void store(u32 position, Pica::Shader::OutputVertex& vertex) {
const u32 remap_index = position % cache_size;
index_table[remapper[remap_index]] = false;
remapper[remap_index] = position;
index_table[position] = true;
cache[remap_index] = vertex;
max_index = std::max(max_index, remap_index);
min_index = std::min(min_index, remap_index);
#ifdef _DEBUG
seen_table[position] = true;
#endif
}
void clear() {
max_index++;
auto result = std::minmax_element(remapper.begin() + min_index, remapper.begin() + max_index);
const u32 min = *result.first;
const u32 max = *result.second;
std::fill(index_table.begin() + min, index_table.begin() + max + 1, 0);
max_index = 0;
min_index = cache_size;
#ifdef _DEBUG
seen_table.fill(0);
if (cache_misses > tolerated_misses)
LOG_TRACE(HW_GPU, "The vertex cache had %d misses", cache_misses);
cache_misses = 0;
#endif
}
} // VertexCache