diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.cpp b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
index b4277ef730..1219ca6ead 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.cpp
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.cpp
@@ -7,90 +7,165 @@
 #include <utility>
 
 #include "common/alignment.h"
+#include "common/assert.h"
 #include "core/core.h"
 #include "video_core/memory_manager.h"
 #include "video_core/renderer_opengl/gl_buffer_cache.h"
 #include "video_core/renderer_opengl/gl_rasterizer.h"
+#include "video_core/renderer_opengl/gl_resource_manager.h"
 
 namespace OpenGL {
 
-CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, u8* host_ptr, std::size_t size,
-                                     std::size_t alignment, GLuint buffer, GLintptr offset)
-    : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr}, size{size}, alignment{alignment},
-      buffer{buffer}, offset{offset} {}
+namespace {
+
+constexpr GLuint EmptyBuffer = 0;
+constexpr GLintptr CachedBufferOffset = 0;
+
+OGLBuffer CreateBuffer(std::size_t size, GLenum usage) {
+    OGLBuffer buffer;
+    buffer.Create();
+    glNamedBufferData(buffer.handle, size, nullptr, usage);
+    return buffer;
+}
+
+} // Anonymous namespace
+
+CachedBufferEntry::CachedBufferEntry(VAddr cpu_addr, u8* host_ptr)
+    : RasterizerCacheObject{host_ptr}, cpu_addr{cpu_addr} {}
 
 OGLBufferCache::OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size)
     : RasterizerCache{rasterizer}, stream_buffer(size, true) {}
 
-std::pair<GLuint, GLintptr> OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size,
-                                                         std::size_t alignment, bool cache) {
+OGLBufferCache::~OGLBufferCache() = default;
+
+void OGLBufferCache::Unregister(const std::shared_ptr<CachedBufferEntry>& entry) {
+    std::lock_guard lock{mutex};
+
+    if (entry->IsInternalized()) {
+        internalized_entries.erase(entry->GetCacheAddr());
+    }
+    ReserveBuffer(entry);
+    RasterizerCache<std::shared_ptr<CachedBufferEntry>>::Unregister(entry);
+}
+
+OGLBufferCache::BufferInfo OGLBufferCache::UploadMemory(GPUVAddr gpu_addr, std::size_t size,
+                                                        std::size_t alignment, bool internalize) {
     std::lock_guard lock{mutex};
 
     auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
-
-    const auto& host_ptr{memory_manager.GetPointer(gpu_addr)};
+    const auto host_ptr{memory_manager.GetPointer(gpu_addr)};
+    const auto cache_addr{ToCacheAddr(host_ptr)};
     if (!host_ptr) {
-        // Return a dummy buffer when host_ptr is invalid.
-        return {0, 0};
+        return {EmptyBuffer, 0};
     }
 
     // Cache management is a big overhead, so only cache entries with a given size.
     // TODO: Figure out which size is the best for given games.
-    cache &= size >= 2048;
-
-    if (cache) {
-        if (auto entry = TryGet(host_ptr); entry) {
-            if (entry->GetSize() >= size && entry->GetAlignment() == alignment) {
-                return {entry->GetBuffer(), entry->GetOffset()};
-            }
-            Unregister(entry);
-        }
+    if (!internalize && size < 0x800 &&
+        internalized_entries.find(cache_addr) == internalized_entries.end()) {
+        return StreamBufferUpload(host_ptr, size, alignment);
     }
 
-    AlignBuffer(alignment);
-    const GLintptr uploaded_offset = buffer_offset;
-
-    std::memcpy(buffer_ptr, host_ptr, size);
-    buffer_ptr += size;
-    buffer_offset += size;
-
-    const GLuint buffer = stream_buffer.GetHandle();
-    if (cache) {
-        const VAddr cpu_addr = *memory_manager.GpuToCpuAddress(gpu_addr);
-        Register(std::make_shared<CachedBufferEntry>(cpu_addr, host_ptr, size, alignment, buffer,
-                                                     uploaded_offset));
+    auto entry = TryGet(host_ptr);
+    if (!entry) {
+        return FixedBufferUpload(gpu_addr, host_ptr, size, internalize);
     }
 
-    return {buffer, uploaded_offset};
+    if (entry->GetSize() < size) {
+        GrowBuffer(entry, size);
+    }
+    return {entry->GetBuffer(), CachedBufferOffset};
 }
 
-std::pair<GLuint, GLintptr> OGLBufferCache::UploadHostMemory(const void* raw_pointer,
-                                                             std::size_t size,
-                                                             std::size_t alignment) {
-    std::lock_guard lock{mutex};
+OGLBufferCache::BufferInfo OGLBufferCache::UploadHostMemory(const void* raw_pointer,
+                                                            std::size_t size,
+                                                            std::size_t alignment) {
+    return StreamBufferUpload(raw_pointer, size, alignment);
+}
+
+bool OGLBufferCache::Map(std::size_t max_size) {
+    const auto max_size_ = static_cast<GLsizeiptr>(max_size);
+    bool invalidate;
+    std::tie(buffer_ptr, buffer_offset_base, invalidate) = stream_buffer.Map(max_size_, 4);
+    buffer_offset = buffer_offset_base;
+    return invalidate;
+}
+
+void OGLBufferCache::Unmap() {
+    stream_buffer.Unmap(buffer_offset - buffer_offset_base);
+}
+
+OGLBufferCache::BufferInfo OGLBufferCache::StreamBufferUpload(const void* raw_pointer,
+                                                              std::size_t size,
+                                                              std::size_t alignment) {
     AlignBuffer(alignment);
-    std::memcpy(buffer_ptr, raw_pointer, size);
     const GLintptr uploaded_offset = buffer_offset;
+    std::memcpy(buffer_ptr, raw_pointer, size);
 
     buffer_ptr += size;
     buffer_offset += size;
     return {stream_buffer.GetHandle(), uploaded_offset};
 }
 
-bool OGLBufferCache::Map(std::size_t max_size) {
-    bool invalidate;
-    std::tie(buffer_ptr, buffer_offset_base, invalidate) =
-        stream_buffer.Map(static_cast<GLsizeiptr>(max_size), 4);
-    buffer_offset = buffer_offset_base;
-
-    if (invalidate) {
-        InvalidateAll();
+OGLBufferCache::BufferInfo OGLBufferCache::FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr,
+                                                             std::size_t size, bool internalize) {
+    if (internalize) {
+        internalized_entries.emplace(ToCacheAddr(host_ptr));
     }
-    return invalidate;
+    auto& memory_manager = Core::System::GetInstance().GPU().MemoryManager();
+    const auto cpu_addr = *memory_manager.GpuToCpuAddress(gpu_addr);
+    auto entry = GetUncachedBuffer(cpu_addr, host_ptr);
+    entry->SetSize(size);
+    entry->SetInternalState(internalize);
+    Register(entry);
+
+    if (entry->GetCapacity() < size) {
+        entry->SetCapacity(CreateBuffer(size, GL_STATIC_DRAW), size);
+    }
+    glNamedBufferSubData(entry->GetBuffer(), 0, static_cast<GLintptr>(size), host_ptr);
+    return {entry->GetBuffer(), CachedBufferOffset};
 }
 
-void OGLBufferCache::Unmap() {
-    stream_buffer.Unmap(buffer_offset - buffer_offset_base);
+void OGLBufferCache::GrowBuffer(std::shared_ptr<CachedBufferEntry>& entry, std::size_t new_size) {
+    const auto old_size = static_cast<GLintptr>(entry->GetSize());
+    if (entry->GetCapacity() < new_size) {
+        const auto old_buffer = entry->GetBuffer();
+        OGLBuffer new_buffer = CreateBuffer(new_size, GL_STATIC_COPY);
+
+        // Copy bits from the old buffer to the new buffer.
+        glCopyNamedBufferSubData(old_buffer, new_buffer.handle, 0, 0, old_size);
+        entry->SetCapacity(std::move(new_buffer), new_size);
+    }
+    // Upload the new bits.
+    const auto size_diff = static_cast<GLintptr>(new_size - old_size);
+    glNamedBufferSubData(entry->GetBuffer(), old_size, size_diff, entry->GetHostPtr() + old_size);
+
+    // Update entry's size in the object and in the cache.
+    entry->SetSize(new_size);
+    Unregister(entry);
+    Register(entry);
+}
+
+std::shared_ptr<CachedBufferEntry> OGLBufferCache::GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr) {
+    if (auto entry = TryGetReservedBuffer(host_ptr); entry) {
+        return entry;
+    }
+    return std::make_shared<CachedBufferEntry>(cpu_addr, host_ptr);
+}
+
+std::shared_ptr<CachedBufferEntry> OGLBufferCache::TryGetReservedBuffer(u8* host_ptr) {
+    const auto it = buffer_reserve.find(ToCacheAddr(host_ptr));
+    if (it == buffer_reserve.end()) {
+        return {};
+    }
+    auto& reserve = it->second;
+    auto entry = reserve.back();
+    reserve.pop_back();
+    return entry;
+}
+
+void OGLBufferCache::ReserveBuffer(std::shared_ptr<CachedBufferEntry> entry) {
+    buffer_reserve[entry->GetCacheAddr()].push_back(std::move(entry));
 }
 
 void OGLBufferCache::AlignBuffer(std::size_t alignment) {
diff --git a/src/video_core/renderer_opengl/gl_buffer_cache.h b/src/video_core/renderer_opengl/gl_buffer_cache.h
index 4a055035a6..00bc6008ab 100644
--- a/src/video_core/renderer_opengl/gl_buffer_cache.h
+++ b/src/video_core/renderer_opengl/gl_buffer_cache.h
@@ -5,9 +5,12 @@
 #pragma once
 
 #include <cstddef>
+#include <map>
 #include <memory>
 #include <tuple>
+#include <unordered_set>
 #include <utility>
+#include <vector>
 
 #include "common/common_types.h"
 #include "video_core/rasterizer_cache.h"
@@ -20,8 +23,7 @@ class RasterizerOpenGL;
 
 class CachedBufferEntry final : public RasterizerCacheObject {
 public:
-    explicit CachedBufferEntry(VAddr cpu_addr, u8* host_ptr, std::size_t size,
-                               std::size_t alignment, GLuint buffer, GLintptr offset);
+    explicit CachedBufferEntry(VAddr cpu_addr, u8* host_ptr);
 
     VAddr GetCpuAddr() const override {
         return cpu_addr;
@@ -35,55 +37,87 @@ public:
         return size;
     }
 
-    std::size_t GetAlignment() const {
-        return alignment;
+    std::size_t GetCapacity() const {
+        return capacity;
+    }
+
+    bool IsInternalized() const {
+        return is_internal;
     }
 
     GLuint GetBuffer() const {
-        return buffer;
+        return buffer.handle;
     }
 
-    GLintptr GetOffset() const {
-        return offset;
+    void SetSize(std::size_t new_size) {
+        size = new_size;
+    }
+
+    void SetInternalState(bool is_internal_) {
+        is_internal = is_internal_;
+    }
+
+    void SetCapacity(OGLBuffer&& new_buffer, std::size_t new_capacity) {
+        capacity = new_capacity;
+        buffer = std::move(new_buffer);
     }
 
 private:
     VAddr cpu_addr{};
     std::size_t size{};
-    std::size_t alignment{};
-
-    GLuint buffer{};
-    GLintptr offset{};
+    std::size_t capacity{};
+    bool is_internal{};
+    OGLBuffer buffer;
 };
 
 class OGLBufferCache final : public RasterizerCache<std::shared_ptr<CachedBufferEntry>> {
+    using BufferInfo = std::pair<GLuint, GLintptr>;
+
 public:
     explicit OGLBufferCache(RasterizerOpenGL& rasterizer, std::size_t size);
+    ~OGLBufferCache();
+
+    void Unregister(const std::shared_ptr<CachedBufferEntry>& entry) override;
 
     /// Uploads data from a guest GPU address. Returns the OpenGL buffer where it's located and its
     /// offset.
-    std::pair<GLuint, GLintptr> UploadMemory(GPUVAddr gpu_addr, std::size_t size,
-                                             std::size_t alignment = 4, bool cache = true);
+    BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
+                            bool internalize = false);
 
     /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
-    std::pair<GLuint, GLintptr> UploadHostMemory(const void* raw_pointer, std::size_t size,
-                                                 std::size_t alignment = 4);
+    BufferInfo UploadHostMemory(const void* raw_pointer, std::size_t size,
+                                std::size_t alignment = 4);
 
     bool Map(std::size_t max_size);
     void Unmap();
 
 protected:
-    void AlignBuffer(std::size_t alignment);
-
     // We do not have to flush this cache as things in it are never modified by us.
     void FlushObjectInner(const std::shared_ptr<CachedBufferEntry>& object) override {}
 
 private:
-    OGLStreamBuffer stream_buffer;
+    BufferInfo StreamBufferUpload(const void* raw_pointer, std::size_t size, std::size_t alignment);
+
+    BufferInfo FixedBufferUpload(GPUVAddr gpu_addr, u8* host_ptr, std::size_t size,
+                                 bool internalize);
+
+    void GrowBuffer(std::shared_ptr<CachedBufferEntry>& entry, std::size_t new_size);
+
+    std::shared_ptr<CachedBufferEntry> GetUncachedBuffer(VAddr cpu_addr, u8* host_ptr);
+
+    std::shared_ptr<CachedBufferEntry> TryGetReservedBuffer(u8* host_ptr);
+
+    void ReserveBuffer(std::shared_ptr<CachedBufferEntry> entry);
+
+    void AlignBuffer(std::size_t alignment);
 
     u8* buffer_ptr = nullptr;
     GLintptr buffer_offset = 0;
     GLintptr buffer_offset_base = 0;
+
+    OGLStreamBuffer stream_buffer;
+    std::unordered_set<CacheAddr> internalized_entries;
+    std::unordered_map<CacheAddr, std::vector<std::shared_ptr<CachedBufferEntry>>> buffer_reserve;
 };
 
 } // namespace OpenGL
diff --git a/src/video_core/renderer_opengl/gl_rasterizer.cpp b/src/video_core/renderer_opengl/gl_rasterizer.cpp
index d694dacfb7..e216163e10 100644
--- a/src/video_core/renderer_opengl/gl_rasterizer.cpp
+++ b/src/video_core/renderer_opengl/gl_rasterizer.cpp
@@ -790,7 +790,7 @@ void RasterizerOpenGL::SetupConstBuffer(const Tegra::Engines::ConstBufferInfo& b
     size = Common::AlignUp(size, sizeof(GLvec4));
     ASSERT_MSG(size <= MaxConstbufferSize, "Constant buffer is too big");
 
-    const std::size_t alignment = device.GetUniformBufferAlignment();
+    const auto alignment = device.GetUniformBufferAlignment();
     const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment);
     bind_ubo_pushbuffer.Push(cbuf, offset, size);
 }