mirror of
https://github.com/PCSX2/pcsx2.git
synced 2025-12-16 04:08:48 +00:00
GS/VK/GL/DX12/DX11: Use default buffer instead of upload buffer for accurate prims data.
Should hopefully give better performance. Also refactor some upload/staging buffer handling in VK/DX12.
This commit is contained in:
parent
16a7cfebdd
commit
cbd4a9c92f
@ -396,8 +396,12 @@ bool GSDevice11::Create(GSVSyncMode vsync_mode, bool allow_present_throttle)
|
||||
}
|
||||
}
|
||||
|
||||
bd = {};
|
||||
|
||||
if (m_features.accurate_prims)
|
||||
{
|
||||
bd.Usage = D3D11_USAGE_DEFAULT;
|
||||
bd.CPUAccessFlags = 0;
|
||||
bd.ByteWidth = ACCURATE_PRIMS_BUFFER_SIZE;
|
||||
bd.BindFlags = D3D11_BIND_SHADER_RESOURCE;
|
||||
bd.StructureByteStride = sizeof(AccuratePrimsEdgeData);
|
||||
@ -410,8 +414,11 @@ bool GSDevice11::Create(GSVSyncMode vsync_mode, bool allow_present_throttle)
|
||||
}
|
||||
|
||||
const CD3D11_SHADER_RESOURCE_VIEW_DESC accurate_prims_b_srv_desc(
|
||||
D3D11_SRV_DIMENSION_BUFFER, DXGI_FORMAT_UNKNOWN, 0, ACCURATE_PRIMS_BUFFER_SIZE / sizeof(AccuratePrimsEdgeData));
|
||||
if (FAILED(m_dev->CreateShaderResourceView(m_accurate_prims_b.get(), &accurate_prims_b_srv_desc, m_accurate_prims_b_srv.put())))
|
||||
D3D11_SRV_DIMENSION_BUFFER, DXGI_FORMAT_UNKNOWN, 0,
|
||||
ACCURATE_PRIMS_BUFFER_SIZE / sizeof(AccuratePrimsEdgeData));
|
||||
|
||||
if (FAILED(m_dev->CreateShaderResourceView(m_accurate_prims_b.get(), &accurate_prims_b_srv_desc,
|
||||
m_accurate_prims_b_srv.put())))
|
||||
{
|
||||
Console.Error("D3D11: Failed to create accurate prims buffer SRV.");
|
||||
return false;
|
||||
@ -419,7 +426,7 @@ bool GSDevice11::Create(GSVSyncMode vsync_mode, bool allow_present_throttle)
|
||||
|
||||
// If MAX_TEXTURES changes, please change the register for this buffer in the shader.
|
||||
static_assert(MAX_TEXTURES == 5);
|
||||
m_ctx->PSSetShaderResources(MAX_TEXTURES, 1, m_accurate_prims_b_srv.addressof());
|
||||
m_ctx->PSSetShaderResources(5, 1, m_accurate_prims_b_srv.addressof());
|
||||
}
|
||||
|
||||
// rasterizer
|
||||
@ -2326,29 +2333,18 @@ bool GSDevice11::SetupAccuratePrims(GSHWDrawConfig& config)
|
||||
if (size > ACCURATE_PRIMS_BUFFER_SIZE)
|
||||
return false;
|
||||
|
||||
D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE;
|
||||
// Performance note: UpdateSubresource() copies data to a temp staging buffer to avoid stalling the GPU,
|
||||
// so a manual ring buffer is not needed here like VK/DX12.
|
||||
D3D11_BOX dst_region{};
|
||||
dst_region.left = 0;
|
||||
dst_region.right = size;
|
||||
dst_region.top = 0;
|
||||
dst_region.bottom = 1;
|
||||
dst_region.front = 0;
|
||||
dst_region.back = 1;
|
||||
m_ctx->UpdateSubresource(m_accurate_prims_b.get(), 0, &dst_region, config.accurate_prims_edge_data->data(), size, 0);
|
||||
|
||||
pxAssert(m_accurate_prims_b_pos % sizeof(AccuratePrimsEdgeData) == 0);
|
||||
|
||||
if (m_accurate_prims_b_pos + size > ACCURATE_PRIMS_BUFFER_SIZE)
|
||||
{
|
||||
m_accurate_prims_b_pos = 0;
|
||||
type = D3D11_MAP_WRITE_DISCARD;
|
||||
}
|
||||
|
||||
D3D11_MAPPED_SUBRESOURCE m;
|
||||
if (FAILED(m_ctx->Map(m_accurate_prims_b.get(), 0, type, 0, &m)))
|
||||
return false;
|
||||
|
||||
void* map = static_cast<u8*>(m.pData) + m_accurate_prims_b_pos;
|
||||
|
||||
GSVector4i::storent(map, config.accurate_prims_edge_data->data(), size);
|
||||
|
||||
m_ctx->Unmap(m_accurate_prims_b.get(), 0);
|
||||
|
||||
config.cb_ps.accurate_prims_base_index.x = m_accurate_prims_b_pos / sizeof(AccuratePrimsEdgeData);
|
||||
|
||||
m_accurate_prims_b_pos += size;
|
||||
config.cb_ps.accurate_prims_base_index.x = 0; // No offsetting needed like DX12/VK since we don't use a ring buffer.
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -137,7 +137,6 @@ private:
|
||||
u32 m_vb_pos = 0; // bytes
|
||||
u32 m_ib_pos = 0; // indices/sizeof(u32)
|
||||
u32 m_structured_vb_pos = 0; // bytes
|
||||
u32 m_accurate_prims_b_pos = 0; // bytes/sizeof(AccuratePrimsEdgeData)
|
||||
|
||||
bool m_allow_tearing_supported = false;
|
||||
bool m_using_flip_model_swap_chain = true;
|
||||
|
||||
@ -20,29 +20,33 @@ D3D12StreamBuffer::~D3D12StreamBuffer()
|
||||
Destroy();
|
||||
}
|
||||
|
||||
bool D3D12StreamBuffer::Create(u32 size)
|
||||
bool D3D12StreamBuffer::Create(u32 size, bool default_heap)
|
||||
{
|
||||
const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER, 0, size, 1, 1, 1, DXGI_FORMAT_UNKNOWN,
|
||||
{1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE};
|
||||
|
||||
D3D12MA::ALLOCATION_DESC allocationDesc = {};
|
||||
allocationDesc.Flags = D3D12MA::ALLOCATION_FLAG_COMMITTED;
|
||||
allocationDesc.HeapType = D3D12_HEAP_TYPE_UPLOAD;
|
||||
allocationDesc.HeapType = default_heap ? D3D12_HEAP_TYPE_DEFAULT : D3D12_HEAP_TYPE_UPLOAD;
|
||||
|
||||
wil::com_ptr_nothrow<ID3D12Resource> buffer;
|
||||
wil::com_ptr_nothrow<D3D12MA::Allocation> allocation;
|
||||
HRESULT hr = GSDevice12::GetInstance()->GetAllocator()->CreateResource(&allocationDesc, &resource_desc,
|
||||
D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, allocation.put(), IID_PPV_ARGS(buffer.put()));
|
||||
default_heap ? D3D12_RESOURCE_STATE_COMMON : D3D12_RESOURCE_STATE_GENERIC_READ,
|
||||
nullptr, allocation.put(), IID_PPV_ARGS(buffer.put()));
|
||||
pxAssertMsg(SUCCEEDED(hr), "Allocate buffer");
|
||||
if (FAILED(hr))
|
||||
return false;
|
||||
|
||||
static const D3D12_RANGE read_range = {};
|
||||
u8* host_pointer;
|
||||
hr = buffer->Map(0, &read_range, reinterpret_cast<void**>(&host_pointer));
|
||||
pxAssertMsg(SUCCEEDED(hr), "Map buffer");
|
||||
if (FAILED(hr))
|
||||
return false;
|
||||
u8* host_pointer = nullptr;
|
||||
if (!default_heap)
|
||||
{
|
||||
hr = buffer->Map(0, &read_range, reinterpret_cast<void**>(&host_pointer));
|
||||
pxAssertMsg(SUCCEEDED(hr), "Map buffer");
|
||||
if (FAILED(hr))
|
||||
return false;
|
||||
}
|
||||
|
||||
Destroy(true);
|
||||
|
||||
@ -51,6 +55,7 @@ bool D3D12StreamBuffer::Create(u32 size)
|
||||
m_host_pointer = host_pointer;
|
||||
m_size = size;
|
||||
m_gpu_pointer = m_buffer->GetGPUVirtualAddress();
|
||||
m_default_heap = default_heap;
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -148,6 +153,7 @@ void D3D12StreamBuffer::Destroy(bool defer)
|
||||
m_current_offset = 0;
|
||||
m_current_space = 0;
|
||||
m_current_gpu_position = 0;
|
||||
m_default_heap = false;
|
||||
m_tracked_fences.clear();
|
||||
}
|
||||
|
||||
|
||||
@ -22,7 +22,7 @@ public:
|
||||
D3D12StreamBuffer();
|
||||
~D3D12StreamBuffer();
|
||||
|
||||
bool Create(u32 size);
|
||||
bool Create(u32 size, bool default_heap = false);
|
||||
|
||||
__fi bool IsValid() const { return static_cast<bool>(m_buffer); }
|
||||
__fi ID3D12Resource* GetBuffer() const { return m_buffer.get(); }
|
||||
@ -54,7 +54,8 @@ private:
|
||||
wil::com_ptr_nothrow<ID3D12Resource> m_buffer;
|
||||
wil::com_ptr_nothrow<D3D12MA::Allocation> m_allocation;
|
||||
D3D12_GPU_VIRTUAL_ADDRESS m_gpu_pointer = {};
|
||||
u8* m_host_pointer = nullptr;
|
||||
u8* m_host_pointer = nullptr; // Only used for upload heaps.
|
||||
bool m_default_heap = false; // False for upload heap; true for default heap.
|
||||
|
||||
// List of fences and the corresponding positions in the buffer
|
||||
std::deque<std::pair<u64, u32>> m_tracked_fences;
|
||||
|
||||
@ -624,52 +624,91 @@ bool GSDevice12::SetGPUTimingEnabled(bool enabled)
|
||||
bool GSDevice12::AllocatePreinitializedGPUBuffer(u32 size, ID3D12Resource** gpu_buffer,
|
||||
D3D12MA::Allocation** gpu_allocation, const std::function<void(void*)>& fill_callback)
|
||||
{
|
||||
// Try to place the fixed index buffer in GPU local memory.
|
||||
// Use the staging buffer to copy into it.
|
||||
// Allocate and fill staging buffer
|
||||
ID3D12Resource* cpu_buffer = AllocateUploadStagingBuffer(size, fill_callback);
|
||||
|
||||
// Create GPU buffer
|
||||
const D3D12_RESOURCE_DESC rd = {D3D12_RESOURCE_DIMENSION_BUFFER, 0, size, 1, 1, 1, DXGI_FORMAT_UNKNOWN, {1, 0},
|
||||
D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE};
|
||||
|
||||
const D3D12MA::ALLOCATION_DESC cpu_ad = {D3D12MA::ALLOCATION_FLAG_NONE, D3D12_HEAP_TYPE_UPLOAD};
|
||||
|
||||
ComPtr<ID3D12Resource> cpu_buffer;
|
||||
ComPtr<D3D12MA::Allocation> cpu_allocation;
|
||||
HRESULT hr = m_allocator->CreateResource(
|
||||
&cpu_ad, &rd, D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, cpu_allocation.put(), IID_PPV_ARGS(cpu_buffer.put()));
|
||||
pxAssertMsg(SUCCEEDED(hr), "Allocate CPU buffer");
|
||||
if (FAILED(hr))
|
||||
return false;
|
||||
|
||||
static constexpr const D3D12_RANGE read_range = {};
|
||||
const D3D12_RANGE write_range = {0, size};
|
||||
void* mapped;
|
||||
hr = cpu_buffer->Map(0, &read_range, &mapped);
|
||||
pxAssertMsg(SUCCEEDED(hr), "Map CPU buffer");
|
||||
if (FAILED(hr))
|
||||
return false;
|
||||
fill_callback(mapped);
|
||||
cpu_buffer->Unmap(0, &write_range);
|
||||
|
||||
const D3D12MA::ALLOCATION_DESC gpu_ad = {D3D12MA::ALLOCATION_FLAG_COMMITTED, D3D12_HEAP_TYPE_DEFAULT};
|
||||
|
||||
hr = m_allocator->CreateResource(
|
||||
HRESULT hr = m_allocator->CreateResource(
|
||||
&gpu_ad, &rd, D3D12_RESOURCE_STATE_COMMON, nullptr, gpu_allocation, IID_PPV_ARGS(gpu_buffer));
|
||||
pxAssertMsg(SUCCEEDED(hr), "Allocate GPU buffer");
|
||||
if (FAILED(hr))
|
||||
return false;
|
||||
|
||||
GetInitCommandList()->CopyBufferRegion(*gpu_buffer, 0, cpu_buffer.get(), 0, size);
|
||||
// Copy the data
|
||||
GetInitCommandList()->CopyBufferRegion(*gpu_buffer, 0, cpu_buffer, 0, size);
|
||||
|
||||
// Transition GPU buffer to COPY_DEST
|
||||
D3D12_RESOURCE_BARRIER rb = {D3D12_RESOURCE_BARRIER_TYPE_TRANSITION, D3D12_RESOURCE_BARRIER_FLAG_NONE};
|
||||
rb.Transition.Subresource = D3D12_RESOURCE_BARRIER_ALL_SUBRESOURCES;
|
||||
rb.Transition.pResource = *gpu_buffer;
|
||||
rb.Transition.StateBefore = D3D12_RESOURCE_STATE_COPY_DEST; // COMMON -> COPY_DEST at first use.
|
||||
rb.Transition.StateAfter = D3D12_RESOURCE_STATE_INDEX_BUFFER;
|
||||
GetInitCommandList()->ResourceBarrier(1, &rb);
|
||||
|
||||
DeferResourceDestruction(cpu_allocation.get(), cpu_buffer.get());
|
||||
return true;
|
||||
}
|
||||
|
||||
ID3D12Resource* GSDevice12::WriteTextureUploadBuffer(u32 size, std::function<void(void*)> write_data, u32& offset_out)
|
||||
{
|
||||
if (!m_texture_stream_buffer.ReserveMemory(size, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT))
|
||||
{
|
||||
GSDevice12::GetInstance()->ExecuteCommandList(
|
||||
false, "While waiting for %u bytes in texture upload buffer", size);
|
||||
if (!m_texture_stream_buffer.ReserveMemory(size, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT))
|
||||
{
|
||||
Console.Error("Failed to reserve texture upload memory (%u bytes).", size);
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
offset_out = m_texture_stream_buffer.GetCurrentOffset();
|
||||
write_data(m_texture_stream_buffer.GetCurrentHostPointer());
|
||||
m_texture_stream_buffer.CommitMemory(size);
|
||||
return m_texture_stream_buffer.GetBuffer();
|
||||
}
|
||||
|
||||
ID3D12Resource* GSDevice12::AllocateUploadStagingBuffer(u32 size, std::function<void(void*)> write_data)
|
||||
{
|
||||
wil::com_ptr_nothrow<ID3D12Resource> resource;
|
||||
wil::com_ptr_nothrow<D3D12MA::Allocation> allocation;
|
||||
|
||||
// Allocate staging buffer
|
||||
const D3D12MA::ALLOCATION_DESC allocation_desc = {D3D12MA::ALLOCATION_FLAG_NONE, D3D12_HEAP_TYPE_UPLOAD};
|
||||
const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER, 0, size, 1, 1, 1,
|
||||
DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE};
|
||||
HRESULT hr = GetAllocator()->CreateResource(&allocation_desc, &resource_desc,
|
||||
D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, allocation.put(), IID_PPV_ARGS(resource.put()));
|
||||
if (FAILED(hr))
|
||||
{
|
||||
Console.WriteLn("(AllocateUploadStagingBuffer) CreateCommittedResource() failed with %08X", hr);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Map
|
||||
static constexpr const D3D12_RANGE read_range = {};
|
||||
void* map_ptr;
|
||||
hr = resource->Map(0, &read_range, &map_ptr);
|
||||
if (FAILED(hr))
|
||||
{
|
||||
Console.WriteLn("(AllocateUploadStagingBuffer) Map() failed with %08X", hr);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
// Write data
|
||||
write_data(map_ptr);
|
||||
|
||||
// Unmap
|
||||
const D3D12_RANGE write_range = {0, size};
|
||||
resource->Unmap(0, &write_range);
|
||||
|
||||
// Immediately queue it for freeing after the command buffer finishes, since it's only needed for the copy.
|
||||
// This adds the reference needed to keep the buffer alive.
|
||||
DeferResourceDestruction(allocation.get(), resource.get());
|
||||
return resource.get();
|
||||
}
|
||||
|
||||
RenderAPI GSDevice12::GetRenderAPI() const
|
||||
{
|
||||
return RenderAPI::D3D12;
|
||||
@ -2180,15 +2219,17 @@ void GSDevice12::IASetIndexBuffer(const void* index, size_t count)
|
||||
m_index_stream_buffer.CommitMemory(size);
|
||||
}
|
||||
|
||||
void GSDevice12::SetupAccuratePrims(GSHWDrawConfig& config)
|
||||
void GSDevice12::SetupAccuratePrimsBuffer(GSHWDrawConfig& config)
|
||||
{
|
||||
if (config.accurate_prims)
|
||||
{
|
||||
// Unbind the buffer.
|
||||
m_dirty_flags |= DIRTY_FLAG_PS_ACCURATE_PRIMS_BUFFER_BINDING;
|
||||
|
||||
const u32 count = config.accurate_prims_edge_data->size();
|
||||
const u32 size = count * sizeof(AccuratePrimsEdgeData);
|
||||
|
||||
// Reserve the GPU region.
|
||||
if (!m_accurate_prims_stream_buffer.ReserveMemory(size, sizeof(AccuratePrimsEdgeData)))
|
||||
{
|
||||
ExecuteCommandListAndRestartRenderPass(false, "Uploading bytes to accurate prims buffer");
|
||||
@ -2196,14 +2237,72 @@ void GSDevice12::SetupAccuratePrims(GSHWDrawConfig& config)
|
||||
pxFailRel("Failed to reserve space for accurate prims");
|
||||
}
|
||||
|
||||
const u32 offset = m_accurate_prims_stream_buffer.GetCurrentOffset();
|
||||
|
||||
if (InRenderPass())
|
||||
EndRenderPass();
|
||||
|
||||
// Copy data to an upload buffer.
|
||||
ID3D12Resource* upload_buffer;
|
||||
u32 upload_buffer_offset;
|
||||
|
||||
const auto upload_data = [&](void* map_ptr) {
|
||||
std::memcpy(map_ptr, config.accurate_prims_edge_data->data(), size);
|
||||
};
|
||||
|
||||
// If the texture is larger than half our streaming buffer size, use a separate buffer.
|
||||
// Otherwise allocation will either fail, or require lots of cmdbuffer submissions.
|
||||
if (size > m_texture_stream_buffer.GetSize() / 2)
|
||||
{
|
||||
upload_buffer_offset = 0;
|
||||
upload_buffer = AllocateUploadStagingBuffer(size, upload_data);
|
||||
}
|
||||
else
|
||||
{
|
||||
upload_buffer = WriteTextureUploadBuffer(size, upload_data, upload_buffer_offset);
|
||||
}
|
||||
if (!upload_buffer)
|
||||
{
|
||||
Console.Error("Failed to get upload buffer for accurate prims data.");
|
||||
return;
|
||||
}
|
||||
|
||||
// Copy data from upload to GPU buffer.
|
||||
const D3D12_RESOURCE_BARRIER barrier_sr_to_dst = {
|
||||
D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
|
||||
D3D12_RESOURCE_BARRIER_FLAG_NONE,
|
||||
{{m_accurate_prims_stream_buffer.GetBuffer(), 0,
|
||||
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE,
|
||||
D3D12_RESOURCE_STATE_COPY_DEST}}};
|
||||
GetCommandList()->ResourceBarrier(1, &barrier_sr_to_dst);
|
||||
GetCommandList()->CopyBufferRegion(
|
||||
m_accurate_prims_stream_buffer.GetBuffer(), offset, upload_buffer, upload_buffer_offset, size);
|
||||
|
||||
// Commit the GPU region.
|
||||
m_accurate_prims_stream_buffer.CommitMemory(size);
|
||||
|
||||
// Issue the barrier since this will be used next draw.
|
||||
const D3D12_RESOURCE_BARRIER barrier_dst_to_sr = {
|
||||
D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
|
||||
D3D12_RESOURCE_BARRIER_FLAG_NONE,
|
||||
{{m_accurate_prims_stream_buffer.GetBuffer(), 0,
|
||||
D3D12_RESOURCE_STATE_COPY_DEST,
|
||||
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE}}};
|
||||
GetCommandList()->ResourceBarrier(1, &barrier_dst_to_sr);
|
||||
|
||||
m_accurate_prims_stream_buffer_offset = offset; // Save this for the constant buffer.
|
||||
}
|
||||
}
|
||||
|
||||
void GSDevice12::SetupAccuratePrimsConstants(GSHWDrawConfig& config)
|
||||
{
|
||||
if (config.accurate_prims)
|
||||
{
|
||||
config.cb_vs.base_vertex = m_vertex.start;
|
||||
config.cb_ps.accurate_prims_base_index.x = m_accurate_prims_stream_buffer.GetCurrentOffset() / sizeof(AccuratePrimsEdgeData);
|
||||
config.cb_ps.accurate_prims_base_index.x = m_accurate_prims_stream_buffer_offset / sizeof(AccuratePrimsEdgeData);
|
||||
|
||||
SetVSConstantBuffer(config.cb_vs);
|
||||
SetPSConstantBuffer(config.cb_ps);
|
||||
|
||||
std::memcpy(m_accurate_prims_stream_buffer.GetCurrentHostPointer(), config.accurate_prims_edge_data->data(), size);
|
||||
m_accurate_prims_stream_buffer.CommitMemory(size);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2394,7 +2493,8 @@ bool GSDevice12::CreateBuffers()
|
||||
return false;
|
||||
}
|
||||
|
||||
if (!m_accurate_prims_stream_buffer.Create(ACCURATE_PRIMS_BUFFER_SIZE))
|
||||
if (!m_accurate_prims_stream_buffer.Create(
|
||||
m_features.accurate_prims ? ACCURATE_PRIMS_BUFFER_SIZE : sizeof(AccuratePrimsEdgeData), true))
|
||||
{
|
||||
Host::ReportErrorAsync("GS", "Failed to allocate accurate prims buffer");
|
||||
return false;
|
||||
@ -2406,8 +2506,17 @@ bool GSDevice12::CreateBuffers()
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create the shader resource view for the accurate prims buffer.
|
||||
if (m_features.accurate_prims)
|
||||
{
|
||||
// Transition to accurate prims buffer to pixel shader resource and create the shader resource view.
|
||||
const D3D12_RESOURCE_BARRIER barrier = {
|
||||
D3D12_RESOURCE_BARRIER_TYPE_TRANSITION,
|
||||
D3D12_RESOURCE_BARRIER_FLAG_NONE,
|
||||
{{m_accurate_prims_stream_buffer.GetBuffer(), 0,
|
||||
D3D12_RESOURCE_STATE_COMMON,
|
||||
D3D12_RESOURCE_STATE_PIXEL_SHADER_RESOURCE}}};
|
||||
GetInitCommandList()->ResourceBarrier(1, &barrier);
|
||||
|
||||
D3D12_SHADER_RESOURCE_VIEW_DESC desc = {
|
||||
DXGI_FORMAT_UNKNOWN, D3D12_SRV_DIMENSION_BUFFER, D3D12_DEFAULT_SHADER_4_COMPONENT_MAPPING};
|
||||
desc.Buffer.FirstElement = 0;
|
||||
@ -3940,6 +4049,9 @@ void GSDevice12::RenderHW(GSHWDrawConfig& config)
|
||||
|
||||
PipelineSelector& pipe = m_pipeline_selector;
|
||||
|
||||
// Copying buffers needs to done outside render pass so do this early.
|
||||
SetupAccuratePrimsBuffer(config);
|
||||
|
||||
// figure out the pipeline
|
||||
UpdateHWPipelineSelector(config);
|
||||
|
||||
@ -4321,5 +4433,6 @@ void GSDevice12::UploadHWDrawVerticesAndIndices(GSHWDrawConfig& config)
|
||||
IASetIndexBuffer(config.indices, config.nindices);
|
||||
}
|
||||
|
||||
SetupAccuratePrims(config);
|
||||
// Needs to be done after vertex offset is set.
|
||||
SetupAccuratePrimsConstants(config);
|
||||
}
|
||||
|
||||
@ -129,6 +129,8 @@ public:
|
||||
// Allocates a temporary CPU staging buffer, fires the callback with it to populate, then copies to a GPU buffer.
|
||||
bool AllocatePreinitializedGPUBuffer(u32 size, ID3D12Resource** gpu_buffer, D3D12MA::Allocation** gpu_allocation,
|
||||
const std::function<void(void*)>& fill_callback);
|
||||
ID3D12Resource* AllocateUploadStagingBuffer(u32 size, std::function<void(void*)> write_data);
|
||||
ID3D12Resource* WriteTextureUploadBuffer(u32 size, std::function<void(void*)> write_data, u32& offset_out);
|
||||
|
||||
private:
|
||||
struct CommandListResources
|
||||
@ -307,6 +309,7 @@ private:
|
||||
D3D12StreamBuffer m_vertex_stream_buffer;
|
||||
D3D12StreamBuffer m_index_stream_buffer;
|
||||
D3D12StreamBuffer m_accurate_prims_stream_buffer;
|
||||
u32 m_accurate_prims_stream_buffer_offset = 0; // Ring buffer offset for the current draw.
|
||||
D3D12DescriptorHandle m_accurate_prims_srv_descriptor_cpu;
|
||||
D3D12DescriptorHandle m_accurate_prims_srv_descriptor_gpu;
|
||||
D3D12StreamBuffer m_vertex_constant_buffer;
|
||||
@ -465,7 +468,8 @@ public:
|
||||
|
||||
void IASetVertexBuffer(const void* vertex, size_t stride, size_t count);
|
||||
void IASetIndexBuffer(const void* index, size_t count);
|
||||
void SetupAccuratePrims(GSHWDrawConfig& config);
|
||||
void SetupAccuratePrimsBuffer(GSHWDrawConfig& config);
|
||||
void SetupAccuratePrimsConstants(GSHWDrawConfig& config);
|
||||
|
||||
void PSSetShaderResource(int i, GSTexture* sr, bool check_state);
|
||||
void PSSetSampler(GSHWDrawConfig::SamplerSelector sel);
|
||||
|
||||
@ -350,43 +350,6 @@ ID3D12GraphicsCommandList* GSTexture12::GetCommandBufferForUpdate()
|
||||
return dev->GetInitCommandList();
|
||||
}
|
||||
|
||||
ID3D12Resource* GSTexture12::AllocateUploadStagingBuffer(
|
||||
const void* data, u32 pitch, u32 upload_pitch, u32 height) const
|
||||
{
|
||||
const u32 buffer_size = CalcUploadSize(height, upload_pitch);
|
||||
wil::com_ptr_nothrow<ID3D12Resource> resource;
|
||||
wil::com_ptr_nothrow<D3D12MA::Allocation> allocation;
|
||||
|
||||
const D3D12MA::ALLOCATION_DESC allocation_desc = {D3D12MA::ALLOCATION_FLAG_NONE, D3D12_HEAP_TYPE_UPLOAD};
|
||||
const D3D12_RESOURCE_DESC resource_desc = {D3D12_RESOURCE_DIMENSION_BUFFER, 0, buffer_size, 1, 1, 1,
|
||||
DXGI_FORMAT_UNKNOWN, {1, 0}, D3D12_TEXTURE_LAYOUT_ROW_MAJOR, D3D12_RESOURCE_FLAG_NONE};
|
||||
HRESULT hr = GSDevice12::GetInstance()->GetAllocator()->CreateResource(&allocation_desc, &resource_desc,
|
||||
D3D12_RESOURCE_STATE_GENERIC_READ, nullptr, allocation.put(), IID_PPV_ARGS(resource.put()));
|
||||
if (FAILED(hr))
|
||||
{
|
||||
Console.WriteLn("(AllocateUploadStagingBuffer) CreateCommittedResource() failed with %08X", hr);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void* map_ptr;
|
||||
hr = resource->Map(0, nullptr, &map_ptr);
|
||||
if (FAILED(hr))
|
||||
{
|
||||
Console.WriteLn("(AllocateUploadStagingBuffer) Map() failed with %08X", hr);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
CopyTextureDataForUpload(map_ptr, data, pitch, upload_pitch, height);
|
||||
|
||||
const D3D12_RANGE write_range = {0, buffer_size};
|
||||
resource->Unmap(0, &write_range);
|
||||
|
||||
// Immediately queue it for freeing after the command buffer finishes, since it's only needed for the copy.
|
||||
// This adds the reference needed to keep the buffer alive.
|
||||
GSDevice12::GetInstance()->DeferResourceDestruction(allocation.get(), resource.get());
|
||||
return resource.get();
|
||||
}
|
||||
|
||||
void GSTexture12::CopyTextureDataForUpload(void* dst, const void* src, u32 pitch, u32 upload_pitch, u32 height) const
|
||||
{
|
||||
const u32 block_size = GetCompressedBlockSize();
|
||||
@ -406,7 +369,7 @@ bool GSTexture12::Update(const GSVector4i& r, const void* data, int pitch, int l
|
||||
const u32 width = Common::AlignUpPow2(r.width(), block_size);
|
||||
const u32 height = Common::AlignUpPow2(r.height(), block_size);
|
||||
const u32 upload_pitch = Common::AlignUpPow2<u32>(pitch, D3D12_TEXTURE_DATA_PITCH_ALIGNMENT);
|
||||
const u32 required_size = CalcUploadSize(r.height(), upload_pitch);
|
||||
const u32 required_size = CalcUploadSize(height, upload_pitch);
|
||||
|
||||
D3D12_TEXTURE_COPY_LOCATION srcloc;
|
||||
srcloc.Type = D3D12_TEXTURE_COPY_TYPE_PLACED_FOOTPRINT;
|
||||
@ -416,35 +379,25 @@ bool GSTexture12::Update(const GSVector4i& r, const void* data, int pitch, int l
|
||||
srcloc.PlacedFootprint.Footprint.Format = m_dxgi_format;
|
||||
srcloc.PlacedFootprint.Footprint.RowPitch = upload_pitch;
|
||||
|
||||
const auto upload_data = [&](void* map_ptr) {
|
||||
CopyTextureDataForUpload(map_ptr, data, pitch, upload_pitch, height);
|
||||
};
|
||||
|
||||
// If the texture is larger than half our streaming buffer size, use a separate buffer.
|
||||
// Otherwise allocation will either fail, or require lots of cmdbuffer submissions.
|
||||
if (required_size > (GSDevice12::GetInstance()->GetTextureStreamBuffer().GetSize() / 2))
|
||||
{
|
||||
srcloc.pResource = AllocateUploadStagingBuffer(data, pitch, upload_pitch, height);
|
||||
if (!srcloc.pResource)
|
||||
return false;
|
||||
|
||||
srcloc.pResource = GSDevice12::GetInstance()->AllocateUploadStagingBuffer(required_size, upload_data);
|
||||
srcloc.PlacedFootprint.Offset = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
D3D12StreamBuffer& sbuffer = GSDevice12::GetInstance()->GetTextureStreamBuffer();
|
||||
if (!sbuffer.ReserveMemory(required_size, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT))
|
||||
{
|
||||
GSDevice12::GetInstance()->ExecuteCommandList(
|
||||
false, "While waiting for %u bytes in texture upload buffer", required_size);
|
||||
if (!sbuffer.ReserveMemory(required_size, D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT))
|
||||
{
|
||||
Console.Error("Failed to reserve texture upload memory (%u bytes).", required_size);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
srcloc.pResource = sbuffer.GetBuffer();
|
||||
srcloc.PlacedFootprint.Offset = sbuffer.GetCurrentOffset();
|
||||
CopyTextureDataForUpload(sbuffer.GetCurrentHostPointer(), data, pitch, upload_pitch, height);
|
||||
sbuffer.CommitMemory(required_size);
|
||||
u32 offset;
|
||||
srcloc.pResource = GSDevice12::GetInstance()->WriteTextureUploadBuffer(required_size, upload_data, offset);
|
||||
srcloc.PlacedFootprint.Offset = offset;
|
||||
}
|
||||
if (!srcloc.pResource)
|
||||
return false;
|
||||
|
||||
ID3D12GraphicsCommandList* cmdlist = GetCommandBufferForUpdate();
|
||||
GL_PUSH("GSTexture12::Update({%d,%d} %dx%d Lvl:%u", r.x, r.y, r.width(), r.height(), layer);
|
||||
|
||||
@ -79,7 +79,6 @@ private:
|
||||
static bool CreateUAVDescriptor(ID3D12Resource* resource, DXGI_FORMAT format, D3D12DescriptorHandle* dh);
|
||||
|
||||
ID3D12GraphicsCommandList* GetCommandBufferForUpdate();
|
||||
ID3D12Resource* AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 height) const;
|
||||
void CopyTextureDataForUpload(void* dst, const void* src, u32 pitch, u32 upload_pitch, u32 height) const;
|
||||
|
||||
wil::com_ptr_nothrow<ID3D12Resource> m_resource;
|
||||
|
||||
@ -310,10 +310,10 @@ namespace
|
||||
};
|
||||
} // namespace
|
||||
|
||||
std::unique_ptr<GLStreamBuffer> GLStreamBuffer::Create(GLenum target, u32 size)
|
||||
std::unique_ptr<GLStreamBuffer> GLStreamBuffer::Create(GLenum target, u32 size, bool nonsyncing)
|
||||
{
|
||||
std::unique_ptr<GLStreamBuffer> buf;
|
||||
if (GLAD_GL_VERSION_4_4 || GLAD_GL_ARB_buffer_storage || GLAD_GL_EXT_buffer_storage)
|
||||
if (!nonsyncing && (GLAD_GL_VERSION_4_4 || GLAD_GL_ARB_buffer_storage || GLAD_GL_EXT_buffer_storage))
|
||||
{
|
||||
buf = BufferStorageStreamBuffer::Create(target, size);
|
||||
if (buf)
|
||||
|
||||
@ -38,7 +38,7 @@ public:
|
||||
/// Returns the minimum granularity of blocks which sync objects will be created around.
|
||||
virtual u32 GetChunkSize() const = 0;
|
||||
|
||||
static std::unique_ptr<GLStreamBuffer> Create(GLenum target, u32 size);
|
||||
static std::unique_ptr<GLStreamBuffer> Create(GLenum target, u32 size, bool nonsyncing = false);
|
||||
|
||||
protected:
|
||||
GLStreamBuffer(GLenum target, GLuint buffer_id, u32 size);
|
||||
|
||||
@ -260,11 +260,17 @@ bool GSDeviceOGL::Create(GSVSyncMode vsync_mode, bool allow_present_throttle)
|
||||
|
||||
m_vertex_stream_buffer = GLStreamBuffer::Create(GL_ARRAY_BUFFER, VERTEX_BUFFER_SIZE);
|
||||
m_index_stream_buffer = GLStreamBuffer::Create(GL_ELEMENT_ARRAY_BUFFER, INDEX_BUFFER_SIZE);
|
||||
m_accurate_prims_stream_buffer = GLStreamBuffer::Create(GL_ARRAY_BUFFER, ACCURATE_PRIMS_BUFFER_SIZE);
|
||||
if (m_features.accurate_prims)
|
||||
{
|
||||
// Performance note: prefer a non-syncing buffer for accurate prims so that it is more likely to be GPU local.
|
||||
// Rationale: we expect this buffer to be updated relatively rarely and it's used as a pixel shader resource.
|
||||
m_accurate_prims_stream_buffer = GLStreamBuffer::Create(GL_ARRAY_BUFFER, ACCURATE_PRIMS_BUFFER_SIZE, true);
|
||||
}
|
||||
m_vertex_uniform_stream_buffer = GLStreamBuffer::Create(GL_UNIFORM_BUFFER, VERTEX_UNIFORM_BUFFER_SIZE);
|
||||
m_fragment_uniform_stream_buffer = GLStreamBuffer::Create(GL_UNIFORM_BUFFER, FRAGMENT_UNIFORM_BUFFER_SIZE);
|
||||
glGetIntegerv(GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT, &m_uniform_buffer_alignment);
|
||||
if (!m_vertex_stream_buffer || !m_index_stream_buffer || !m_accurate_prims_stream_buffer ||
|
||||
if (!m_vertex_stream_buffer || !m_index_stream_buffer ||
|
||||
(m_features.accurate_prims && !m_accurate_prims_stream_buffer) ||
|
||||
!m_vertex_uniform_stream_buffer || !m_fragment_uniform_stream_buffer)
|
||||
{
|
||||
Host::ReportErrorAsync("GS", "Failed to create vertex/index/uniform streaming buffers");
|
||||
|
||||
@ -3406,13 +3406,14 @@ void GSDeviceVK::IASetIndexBuffer(const void* index, size_t count)
|
||||
SetIndexBuffer(m_index_stream_buffer.GetBuffer());
|
||||
}
|
||||
|
||||
void GSDeviceVK::SetupAccuratePrims(GSHWDrawConfig& config)
|
||||
void GSDeviceVK::SetupAccuratePrimsBuffer(GSHWDrawConfig& config)
|
||||
{
|
||||
if (config.accurate_prims)
|
||||
{
|
||||
const u32 count = config.accurate_prims_edge_data->size();
|
||||
const u32 size = count * sizeof(AccuratePrimsEdgeData);
|
||||
|
||||
// Reserve the GPU region.
|
||||
if (!m_accurate_prims_stream_buffer.ReserveMemory(size, sizeof(AccuratePrimsEdgeData)))
|
||||
{
|
||||
ExecuteCommandBufferAndRestartRenderPass(false, "Uploading bytes to accurate prims buffer");
|
||||
@ -3420,17 +3421,120 @@ void GSDeviceVK::SetupAccuratePrims(GSHWDrawConfig& config)
|
||||
pxFailRel("Failed to reserve space for accurate prims");
|
||||
}
|
||||
|
||||
const u32 offset = m_accurate_prims_stream_buffer.GetCurrentOffset();
|
||||
|
||||
if (InRenderPass())
|
||||
EndRenderPass();
|
||||
|
||||
// Copy data to an upload buffer.
|
||||
VkBuffer upload_buffer;
|
||||
u32 upload_buffer_offset;
|
||||
|
||||
const auto upload_data = [&](void* map_ptr) {
|
||||
std::memcpy(map_ptr, config.accurate_prims_edge_data->data(), size);
|
||||
};
|
||||
|
||||
// If the texture is larger than half our streaming buffer size, use a separate buffer.
|
||||
// Otherwise allocation will either fail, or require lots of cmdbuffer submissions.
|
||||
if (size > m_texture_stream_buffer.GetCurrentSize() / 2)
|
||||
{
|
||||
upload_buffer_offset = 0;
|
||||
upload_buffer = AllocateUploadStagingBuffer(size, upload_data);
|
||||
}
|
||||
else
|
||||
{
|
||||
upload_buffer = WriteTextureUploadBuffer(size, upload_data, upload_buffer_offset);
|
||||
}
|
||||
if (upload_buffer == VK_NULL_HANDLE)
|
||||
{
|
||||
Console.Error("Failed to get upload buffer for accurate prims data.");
|
||||
return;
|
||||
}
|
||||
|
||||
// Copy data from upload to GPU buffer.
|
||||
VkBufferCopy copyRegion = {upload_buffer_offset, offset, size};
|
||||
vkCmdCopyBuffer(GetCurrentCommandBuffer(), upload_buffer, m_accurate_prims_stream_buffer.GetBuffer(), 1, ©Region);
|
||||
|
||||
// Commit the GPU region.
|
||||
m_accurate_prims_stream_buffer.CommitMemory(size);
|
||||
|
||||
// Issue the barrier since this will be used next draw.
|
||||
VkBufferMemoryBarrier barrier = {
|
||||
VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER, nullptr,
|
||||
VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_SHADER_READ_BIT,
|
||||
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED,
|
||||
m_accurate_prims_stream_buffer.GetBuffer(), offset, size};
|
||||
vkCmdPipelineBarrier(GetCurrentCommandBuffer(),
|
||||
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
|
||||
0, 0, nullptr, 1, &barrier, 0, nullptr);
|
||||
|
||||
m_accurate_prims_stream_buffer_offset = offset; // Save this for the constant buffer.
|
||||
}
|
||||
}
|
||||
|
||||
void GSDeviceVK::SetupAccuratePrimsConstants(GSHWDrawConfig& config)
|
||||
{
|
||||
if (config.accurate_prims)
|
||||
{
|
||||
// We separate this from setting up the buffer to mirror Vulkan, which requires it.
|
||||
config.cb_vs.base_vertex = m_vertex.start;
|
||||
config.cb_ps.accurate_prims_base_index.x = m_accurate_prims_stream_buffer.GetCurrentOffset() / sizeof(AccuratePrimsEdgeData);
|
||||
config.cb_ps.accurate_prims_base_index.x = m_accurate_prims_stream_buffer_offset / sizeof(AccuratePrimsEdgeData);
|
||||
|
||||
SetVSConstantBuffer(config.cb_vs);
|
||||
SetPSConstantBuffer(config.cb_ps);
|
||||
|
||||
std::memcpy(m_accurate_prims_stream_buffer.GetCurrentHostPointer(), config.accurate_prims_edge_data->data(), size);
|
||||
m_accurate_prims_stream_buffer.CommitMemory(size);
|
||||
}
|
||||
}
|
||||
|
||||
VkBuffer GSDeviceVK::WriteTextureUploadBuffer(u32 size, std::function<void(void*)> write_data, u32& offset_out)
|
||||
{
|
||||
if (!m_texture_stream_buffer.ReserveMemory(size, GetBufferCopyOffsetAlignment()))
|
||||
{
|
||||
ExecuteCommandBuffer(
|
||||
false, "While waiting for %u bytes in texture upload buffer", size);
|
||||
if (!m_texture_stream_buffer.ReserveMemory(size, GetBufferCopyOffsetAlignment()))
|
||||
{
|
||||
Console.Error("Failed to reserve texture upload memory (%u bytes).", size);
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
}
|
||||
|
||||
offset_out = m_texture_stream_buffer.GetCurrentOffset();
|
||||
write_data(m_texture_stream_buffer.GetCurrentHostPointer());
|
||||
m_texture_stream_buffer.CommitMemory(size);
|
||||
return m_texture_stream_buffer.GetBuffer();
|
||||
}
|
||||
|
||||
VkBuffer GSDeviceVK::AllocateUploadStagingBuffer(u32 size, std::function<void(void*)> write_data)
|
||||
{
|
||||
const VkBufferCreateInfo bci = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, nullptr, 0, static_cast<VkDeviceSize>(size),
|
||||
VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_SHARING_MODE_EXCLUSIVE, 0, nullptr};
|
||||
|
||||
// Don't worry about setting the coherent bit for this upload, the main reason we had
|
||||
// that set in StreamBuffer was for MoltenVK, which would upload the whole buffer on
|
||||
// smaller uploads, but we're writing to the whole thing anyway.
|
||||
VmaAllocationCreateInfo aci = {};
|
||||
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
|
||||
aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
|
||||
|
||||
VmaAllocationInfo ai;
|
||||
VkBuffer buffer;
|
||||
VmaAllocation allocation;
|
||||
VkResult res = vmaCreateBuffer(GSDeviceVK::GetInstance()->GetAllocator(), &bci, &aci, &buffer, &allocation, &ai);
|
||||
if (res != VK_SUCCESS)
|
||||
{
|
||||
LOG_VULKAN_ERROR(res, "(AllocateUploadStagingBuffer) vmaCreateBuffer() failed: ");
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
// Immediately queue it for freeing after the command buffer finishes, since it's only needed for the copy.
|
||||
GSDeviceVK::GetInstance()->DeferBufferDestruction(buffer, allocation);
|
||||
|
||||
// And write the data.
|
||||
write_data(ai.pMappedData);
|
||||
vmaFlushAllocation(GSDeviceVK::GetInstance()->GetAllocator(), allocation, 0, size);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
void GSDeviceVK::OMSetRenderTargets(
|
||||
GSTexture* rt, GSTexture* ds, const GSVector4i& scissor, FeedbackLoopFlag feedback_loop)
|
||||
{
|
||||
@ -3762,7 +3866,8 @@ bool GSDeviceVK::CreateBuffers()
|
||||
|
||||
if (m_features.accurate_prims)
|
||||
{
|
||||
if (!m_accurate_prims_stream_buffer.Create(VK_BUFFER_USAGE_STORAGE_BUFFER_BIT, ACCURATE_PRIMS_BUFFER_SIZE))
|
||||
if (!m_accurate_prims_stream_buffer.Create(
|
||||
VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, ACCURATE_PRIMS_BUFFER_SIZE, true))
|
||||
{
|
||||
Host::ReportErrorAsync("GS", "Failed to allocate accurate prims buffer");
|
||||
return false;
|
||||
@ -5673,13 +5778,15 @@ GSTextureVK* GSDeviceVK::SetupPrimitiveTrackingDATE(GSHWDrawConfig& config)
|
||||
|
||||
void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
|
||||
{
|
||||
|
||||
const GSVector2i rtsize(config.rt ? config.rt->GetSize() : config.ds->GetSize());
|
||||
GSTextureVK* draw_rt = static_cast<GSTextureVK*>(config.rt);
|
||||
GSTextureVK* draw_ds = static_cast<GSTextureVK*>(config.ds);
|
||||
GSTextureVK* draw_rt_clone = nullptr;
|
||||
GSTextureVK* colclip_rt = static_cast<GSTextureVK*>(g_gs_device->GetColorClipTexture());
|
||||
|
||||
// Copying buffers needs to done outside render pass so do this early.
|
||||
SetupAccuratePrimsBuffer(config);
|
||||
|
||||
// stream buffer in first, in case we need to exec
|
||||
SetVSConstantBuffer(config.cb_vs);
|
||||
SetPSConstantBuffer(config.cb_ps);
|
||||
@ -6157,7 +6264,8 @@ void GSDeviceVK::UploadHWDrawVerticesAndIndices(GSHWDrawConfig& config)
|
||||
IASetIndexBuffer(config.indices, config.nindices);
|
||||
}
|
||||
|
||||
SetupAccuratePrims(config);
|
||||
// Needs to be done after vertex offset is set.
|
||||
SetupAccuratePrimsConstants(config);
|
||||
}
|
||||
|
||||
VkImageMemoryBarrier GSDeviceVK::GetColorBufferBarrier(GSTextureVK* rt) const
|
||||
|
||||
@ -98,6 +98,8 @@ public:
|
||||
__fi VkCommandBuffer GetCurrentCommandBuffer() const { return m_current_command_buffer; }
|
||||
__fi VKStreamBuffer& GetTextureUploadBuffer() { return m_texture_stream_buffer; }
|
||||
VkCommandBuffer GetCurrentInitCommandBuffer();
|
||||
VkBuffer AllocateUploadStagingBuffer(u32 size, std::function<void(void*)> write_data);
|
||||
VkBuffer WriteTextureUploadBuffer(u32 size, std::function<void(void*)> write_data, u32& offset_out);
|
||||
|
||||
/// Allocates a descriptor set from the pool reserved for the current frame.
|
||||
VkDescriptorSet AllocatePersistentDescriptorSet(VkDescriptorSetLayout set_layout);
|
||||
@ -381,6 +383,7 @@ private:
|
||||
VKStreamBuffer m_vertex_stream_buffer;
|
||||
VKStreamBuffer m_index_stream_buffer;
|
||||
VKStreamBuffer m_accurate_prims_stream_buffer;
|
||||
u32 m_accurate_prims_stream_buffer_offset = 0; // Ring buffer offset for the current draw.
|
||||
VKStreamBuffer m_vertex_uniform_stream_buffer;
|
||||
VKStreamBuffer m_fragment_uniform_stream_buffer;
|
||||
VKStreamBuffer m_texture_stream_buffer;
|
||||
@ -563,7 +566,8 @@ public:
|
||||
void PSSetShaderResource(int i, GSTexture* sr, bool check_state);
|
||||
void PSSetSampler(GSHWDrawConfig::SamplerSelector sel);
|
||||
|
||||
void SetupAccuratePrims(GSHWDrawConfig& config);
|
||||
void SetupAccuratePrimsBuffer(GSHWDrawConfig& config);
|
||||
void SetupAccuratePrimsConstants(GSHWDrawConfig& config);
|
||||
|
||||
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i& scissor,
|
||||
FeedbackLoopFlag feedback_loop = FeedbackLoopFlag_None);
|
||||
|
||||
@ -270,38 +270,6 @@ void GSTextureVK::CopyTextureDataForUpload(void* dst, const void* src, u32 pitch
|
||||
StringUtil::StrideMemCpy(dst, upload_pitch, src, pitch, std::min(upload_pitch, pitch), count);
|
||||
}
|
||||
|
||||
VkBuffer GSTextureVK::AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 height) const
|
||||
{
|
||||
const u32 size = upload_pitch * height;
|
||||
const VkBufferCreateInfo bci = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, nullptr, 0, static_cast<VkDeviceSize>(size),
|
||||
VK_BUFFER_USAGE_TRANSFER_SRC_BIT, VK_SHARING_MODE_EXCLUSIVE, 0, nullptr};
|
||||
|
||||
// Don't worry about setting the coherent bit for this upload, the main reason we had
|
||||
// that set in StreamBuffer was for MoltenVK, which would upload the whole buffer on
|
||||
// smaller uploads, but we're writing to the whole thing anyway.
|
||||
VmaAllocationCreateInfo aci = {};
|
||||
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
|
||||
aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
|
||||
|
||||
VmaAllocationInfo ai;
|
||||
VkBuffer buffer;
|
||||
VmaAllocation allocation;
|
||||
VkResult res = vmaCreateBuffer(GSDeviceVK::GetInstance()->GetAllocator(), &bci, &aci, &buffer, &allocation, &ai);
|
||||
if (res != VK_SUCCESS)
|
||||
{
|
||||
LOG_VULKAN_ERROR(res, "(AllocateUploadStagingBuffer) vmaCreateBuffer() failed: ");
|
||||
return VK_NULL_HANDLE;
|
||||
}
|
||||
|
||||
// Immediately queue it for freeing after the command buffer finishes, since it's only needed for the copy.
|
||||
GSDeviceVK::GetInstance()->DeferBufferDestruction(buffer, allocation);
|
||||
|
||||
// And write the data.
|
||||
CopyTextureDataForUpload(ai.pMappedData, data, pitch, upload_pitch, height);
|
||||
vmaFlushAllocation(GSDeviceVK::GetInstance()->GetAllocator(), allocation, 0, size);
|
||||
return buffer;
|
||||
}
|
||||
|
||||
void GSTextureVK::UpdateFromBuffer(VkCommandBuffer cmdbuf, int level, u32 x, u32 y, u32 width, u32 height,
|
||||
u32 buffer_height, u32 row_length, VkBuffer buffer, u32 buffer_offset)
|
||||
{
|
||||
@ -333,6 +301,10 @@ bool GSTextureVK::Update(const GSVector4i& r, const void* data, int pitch, int l
|
||||
const u32 upload_pitch = Common::AlignUpPow2(pitch, GSDeviceVK::GetInstance()->GetBufferCopyRowPitchAlignment());
|
||||
const u32 required_size = CalcUploadSize(height, upload_pitch);
|
||||
|
||||
const auto upload_data = [&](void* map_ptr) {
|
||||
CopyTextureDataForUpload(map_ptr, data, pitch, upload_pitch, height);
|
||||
};
|
||||
|
||||
// If the texture is larger than half our streaming buffer size, use a separate buffer.
|
||||
// Otherwise allocation will either fail, or require lots of cmdbuffer submissions.
|
||||
VkBuffer buffer;
|
||||
@ -340,29 +312,14 @@ bool GSTextureVK::Update(const GSVector4i& r, const void* data, int pitch, int l
|
||||
if (required_size > (GSDeviceVK::GetInstance()->GetTextureUploadBuffer().GetCurrentSize() / 2))
|
||||
{
|
||||
buffer_offset = 0;
|
||||
buffer = AllocateUploadStagingBuffer(data, pitch, upload_pitch, height);
|
||||
if (buffer == VK_NULL_HANDLE)
|
||||
return false;
|
||||
buffer = GSDeviceVK::GetInstance()->AllocateUploadStagingBuffer(required_size, upload_data);
|
||||
}
|
||||
else
|
||||
{
|
||||
VKStreamBuffer& sbuffer = GSDeviceVK::GetInstance()->GetTextureUploadBuffer();
|
||||
if (!sbuffer.ReserveMemory(required_size, GSDeviceVK::GetInstance()->GetBufferCopyOffsetAlignment()))
|
||||
{
|
||||
GSDeviceVK::GetInstance()->ExecuteCommandBuffer(
|
||||
false, "While waiting for %u bytes in texture upload buffer", required_size);
|
||||
if (!sbuffer.ReserveMemory(required_size, GSDeviceVK::GetInstance()->GetBufferCopyOffsetAlignment()))
|
||||
{
|
||||
Console.Error("Failed to reserve texture upload memory (%u bytes).", required_size);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
buffer = sbuffer.GetBuffer();
|
||||
buffer_offset = sbuffer.GetCurrentOffset();
|
||||
CopyTextureDataForUpload(sbuffer.GetCurrentHostPointer(), data, pitch, upload_pitch, height);
|
||||
sbuffer.CommitMemory(required_size);
|
||||
buffer = GSDeviceVK::GetInstance()->WriteTextureUploadBuffer(required_size, upload_data, buffer_offset);
|
||||
}
|
||||
if (buffer == VK_NULL_HANDLE)
|
||||
return false;
|
||||
|
||||
const VkCommandBuffer cmdbuf = GetCommandBufferForUpdate();
|
||||
GL_PUSH("GSTextureVK::Update({%d,%d} %dx%d Lvl:%u", r.x, r.y, r.width(), r.height(), layer);
|
||||
|
||||
@ -84,7 +84,6 @@ private:
|
||||
|
||||
VkCommandBuffer GetCommandBufferForUpdate();
|
||||
void CopyTextureDataForUpload(void* dst, const void* src, u32 pitch, u32 upload_pitch, u32 height) const;
|
||||
VkBuffer AllocateUploadStagingBuffer(const void* data, u32 pitch, u32 upload_pitch, u32 height) const;
|
||||
void UpdateFromBuffer(VkCommandBuffer cmdbuf, int level, u32 x, u32 y, u32 width, u32 height, u32 buffer_height,
|
||||
u32 row_length, VkBuffer buffer, u32 buffer_offset);
|
||||
|
||||
|
||||
@ -19,6 +19,7 @@ VKStreamBuffer::VKStreamBuffer(VKStreamBuffer&& move)
|
||||
, m_allocation(move.m_allocation)
|
||||
, m_buffer(move.m_buffer)
|
||||
, m_host_pointer(move.m_host_pointer)
|
||||
, m_device_local(move.m_device_local)
|
||||
, m_tracked_fences(std::move(move.m_tracked_fences))
|
||||
{
|
||||
move.m_size = 0;
|
||||
@ -28,6 +29,7 @@ VKStreamBuffer::VKStreamBuffer(VKStreamBuffer&& move)
|
||||
move.m_allocation = VK_NULL_HANDLE;
|
||||
move.m_buffer = VK_NULL_HANDLE;
|
||||
move.m_host_pointer = nullptr;
|
||||
move.m_device_local = false;
|
||||
}
|
||||
|
||||
VKStreamBuffer::~VKStreamBuffer()
|
||||
@ -48,19 +50,29 @@ VKStreamBuffer& VKStreamBuffer::operator=(VKStreamBuffer&& move)
|
||||
std::swap(m_buffer, move.m_buffer);
|
||||
std::swap(m_host_pointer, move.m_host_pointer);
|
||||
std::swap(m_tracked_fences, move.m_tracked_fences);
|
||||
std::swap(m_device_local, move.m_device_local);
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
bool VKStreamBuffer::Create(VkBufferUsageFlags usage, u32 size)
|
||||
bool VKStreamBuffer::Create(VkBufferUsageFlags usage, u32 size, bool device_local)
|
||||
{
|
||||
const VkBufferCreateInfo bci = {VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, nullptr, 0, static_cast<VkDeviceSize>(size),
|
||||
usage, VK_SHARING_MODE_EXCLUSIVE, 0, nullptr};
|
||||
|
||||
VmaAllocationCreateInfo aci = {};
|
||||
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
|
||||
aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
|
||||
aci.preferredFlags = VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
if (device_local)
|
||||
{
|
||||
// GPU default buffer
|
||||
aci.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
|
||||
}
|
||||
else
|
||||
{
|
||||
// CPU upload buffer
|
||||
aci.flags = VMA_ALLOCATION_CREATE_MAPPED_BIT;
|
||||
aci.usage = VMA_MEMORY_USAGE_CPU_TO_GPU;
|
||||
aci.preferredFlags = VK_MEMORY_PROPERTY_HOST_COHERENT_BIT;
|
||||
}
|
||||
|
||||
VmaAllocationInfo ai = {};
|
||||
VkBuffer new_buffer = VK_NULL_HANDLE;
|
||||
@ -83,7 +95,8 @@ bool VKStreamBuffer::Create(VkBufferUsageFlags usage, u32 size)
|
||||
m_tracked_fences.clear();
|
||||
m_allocation = new_allocation;
|
||||
m_buffer = new_buffer;
|
||||
m_host_pointer = static_cast<u8*>(ai.pMappedData);
|
||||
m_host_pointer = device_local ? nullptr : static_cast<u8*>(ai.pMappedData);
|
||||
m_device_local = device_local;
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -104,6 +117,7 @@ void VKStreamBuffer::Destroy(bool defer)
|
||||
m_buffer = VK_NULL_HANDLE;
|
||||
m_allocation = VK_NULL_HANDLE;
|
||||
m_host_pointer = nullptr;
|
||||
m_device_local = false;
|
||||
}
|
||||
|
||||
bool VKStreamBuffer::ReserveMemory(u32 num_bytes, u32 alignment)
|
||||
@ -180,8 +194,11 @@ void VKStreamBuffer::CommitMemory(u32 final_num_bytes)
|
||||
pxAssert((m_current_offset + final_num_bytes) <= m_size);
|
||||
pxAssert(final_num_bytes <= m_current_space);
|
||||
|
||||
// For non-coherent mappings, flush the memory range
|
||||
vmaFlushAllocation(GSDeviceVK::GetInstance()->GetAllocator(), m_allocation, m_current_offset, final_num_bytes);
|
||||
if (!m_device_local)
|
||||
{
|
||||
// For non-coherent mappings, flush the memory range
|
||||
vmaFlushAllocation(GSDeviceVK::GetInstance()->GetAllocator(), m_allocation, m_current_offset, final_num_bytes);
|
||||
}
|
||||
|
||||
m_current_offset += final_num_bytes;
|
||||
m_current_space -= final_num_bytes;
|
||||
|
||||
@ -30,14 +30,13 @@ public:
|
||||
__fi u32 GetCurrentSpace() const { return m_current_space; }
|
||||
__fi u32 GetCurrentOffset() const { return m_current_offset; }
|
||||
|
||||
bool Create(VkBufferUsageFlags usage, u32 size);
|
||||
bool Create(VkBufferUsageFlags usage, u32 size, bool device_local = false);
|
||||
void Destroy(bool defer);
|
||||
|
||||
bool ReserveMemory(u32 num_bytes, u32 alignment);
|
||||
void CommitMemory(u32 final_num_bytes);
|
||||
|
||||
private:
|
||||
bool AllocateBuffer(VkBufferUsageFlags usage, u32 size);
|
||||
void UpdateCurrentFencePosition();
|
||||
void UpdateGPUPosition();
|
||||
|
||||
@ -51,7 +50,8 @@ private:
|
||||
|
||||
VmaAllocation m_allocation = VK_NULL_HANDLE;
|
||||
VkBuffer m_buffer = VK_NULL_HANDLE;
|
||||
u8* m_host_pointer = nullptr;
|
||||
u8* m_host_pointer = nullptr; // Only used for upload buffers.
|
||||
bool m_device_local = false; // False for upload buffer; true for default buffer.
|
||||
|
||||
// List of fences and the corresponding positions in the buffer
|
||||
std::deque<std::pair<u64, u32>> m_tracked_fences;
|
||||
|
||||
Loading…
Reference in New Issue
Block a user