fix render pass barrier logic

This commit is contained in:
goeiecool9999 2025-12-26 16:15:59 +01:00
parent df52fd69b7
commit ea35c9f216
2 changed files with 32 additions and 72 deletions

View File

@ -8,6 +8,7 @@
#include "Cafe/HW/Latte/Renderer/Vulkan/CachedFBOVk.h"
#include "Cafe/HW/Latte/Renderer/Vulkan/VKRMemoryManager.h"
#include "Cafe/HW/Latte/Renderer/Vulkan/SwapchainInfoVk.h"
#include "Cafe/HW/Latte/Core/LattePerformanceMonitor.h"
#include "util/math/vector2.h"
#include "util/helpers/Semaphore.h"
#include "util/containers/flat_hash_map.hpp"
@ -546,7 +547,7 @@ private:
void draw_handleSpecialState5();
// draw synchronization helper
void sync_inputTexturesChanged();
bool sync_isInputTexturesSyncRequired();
void sync_RenderPassLoadTextures(CachedFBOVk* fboVk);
void sync_RenderPassStoreTextures(CachedFBOVk* fboVk);
@ -824,6 +825,7 @@ private:
bufMemBarrier.offset = offset;
bufMemBarrier.size = size;
vkCmdPipelineBarrier(m_state.currentCommandBuffer, srcStages, dstStages, 0, 0, nullptr, 1, &bufMemBarrier, 0, nullptr);
performanceMonitor.vk.numDrawBarriersPerFrame.increment();
}
template<uint32 TSrcSyncOpA, uint32 TDstSyncOpA, uint32 TSrcSyncOpB, uint32 TDstSyncOpB>
@ -862,6 +864,7 @@ private:
bufMemBarrier[1].size = sizeB;
vkCmdPipelineBarrier(m_state.currentCommandBuffer, srcStagesA|srcStagesB, dstStagesA|dstStagesB, 0, 0, nullptr, 2, bufMemBarrier, 0, nullptr);
performanceMonitor.vk.numDrawBarriersPerFrame.increment();
}
void barrier_sequentializeTransfer()
@ -880,6 +883,7 @@ private:
memBarrier.dstAccessMask |= (VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT);
vkCmdPipelineBarrier(m_state.currentCommandBuffer, srcStages, dstStages, 0, 1, &memBarrier, 0, nullptr, 0, nullptr);
performanceMonitor.vk.numDrawBarriersPerFrame.increment();
}
void barrier_sequentializeCommand()
@ -888,6 +892,7 @@ private:
VkPipelineStageFlags dstStages = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
vkCmdPipelineBarrier(m_state.currentCommandBuffer, srcStages, dstStages, 0, 0, nullptr, 0, nullptr, 0, nullptr);
performanceMonitor.vk.numDrawBarriersPerFrame.increment();
}
template<uint32 TSrcSyncOp, uint32 TDstSyncOp>
@ -915,6 +920,7 @@ private:
0, NULL,
0, NULL,
1, &imageMemBarrier);
performanceMonitor.vk.numDrawBarriersPerFrame.increment();
}
template<uint32 TSrcSyncOp, uint32 TDstSyncOp>

View File

@ -1035,68 +1035,21 @@ VkDescriptorSetInfo* VulkanRenderer::draw_getOrCreateDescriptorSet(PipelineInfo*
return dsInfo;
}
void VulkanRenderer::sync_inputTexturesChanged()
bool VulkanRenderer::sync_isInputTexturesSyncRequired()
{
bool writeFlushRequired = false;
if (m_state.activeVertexDS)
{
for (auto& tex : m_state.activeVertexDS->list_fboCandidates)
auto checkSync = [&](const VkDescriptorSetInfo* info) {
if (info)
{
tex->m_vkFlushIndex_read = m_state.currentFlushIndex;
if (tex->m_vkFlushIndex_write == m_state.currentFlushIndex)
writeFlushRequired = true;
for (auto& tex : m_state.activeVertexDS->list_fboCandidates)
{
tex->m_vkFlushIndex_read = m_state.currentFlushIndex;
if (tex->m_vkFlushIndex_write == m_state.currentFlushIndex)
return true;
}
}
}
if (m_state.activeGeometryDS)
{
for (auto& tex : m_state.activeGeometryDS->list_fboCandidates)
{
tex->m_vkFlushIndex_read = m_state.currentFlushIndex;
if (tex->m_vkFlushIndex_write == m_state.currentFlushIndex)
writeFlushRequired = true;
}
}
if (m_state.activePixelDS)
{
for (auto& tex : m_state.activePixelDS->list_fboCandidates)
{
tex->m_vkFlushIndex_read = m_state.currentFlushIndex;
if (tex->m_vkFlushIndex_write == m_state.currentFlushIndex)
writeFlushRequired = true;
}
}
// barrier here
if (writeFlushRequired)
{
VkMemoryBarrier memoryBarrier{};
memoryBarrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
memoryBarrier.srcAccessMask = 0;
memoryBarrier.dstAccessMask = 0;
VkPipelineStageFlags srcStage = 0;
VkPipelineStageFlags dstStage = 0;
// src
srcStage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
memoryBarrier.srcAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
srcStage |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
memoryBarrier.srcAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
// dst
dstStage |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
memoryBarrier.dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT;
dstStage |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
memoryBarrier.dstAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_SHADER_READ_BIT;
vkCmdPipelineBarrier(m_state.currentCommandBuffer, srcStage, dstStage, 0, 1, &memoryBarrier, 0, nullptr, 0, nullptr);
performanceMonitor.vk.numDrawBarriersPerFrame.increment();
m_state.currentFlushIndex++;
}
return false;
};
return checkSync(m_state.activeVertexDS) || checkSync(m_state.activeGeometryDS) || checkSync(m_state.activePixelDS);
}
void VulkanRenderer::sync_RenderPassLoadTextures(CachedFBOVk* fboVk)
@ -1111,7 +1064,7 @@ void VulkanRenderer::sync_RenderPassLoadTextures(CachedFBOVk* fboVk)
readFlushRequired = true;
texVk->m_vkFlushIndex_write = m_state.currentFlushIndex;
texVk->m_vkFlushIndex_read = m_state.currentFlushIndex;
// todo - also check for write-before-write ?
if (texVk->m_vkFlushIndex_read == m_state.currentFlushIndex)
readFlushRequired = true;
@ -1151,11 +1104,10 @@ void VulkanRenderer::sync_RenderPassLoadTextures(CachedFBOVk* fboVk)
void VulkanRenderer::sync_RenderPassStoreTextures(CachedFBOVk* fboVk)
{
uint32 flushIndex = m_state.currentFlushIndex;
for (auto& tex : fboVk->GetTextures())
{
LatteTextureVk* texVk = (LatteTextureVk*)tex;
texVk->m_vkFlushIndex_write = flushIndex;
texVk->m_vkFlushIndex_write = m_state.currentFlushIndex;
}
}
@ -1224,20 +1176,22 @@ void VulkanRenderer::draw_setRenderPass()
auto vkObjRenderPass = fboVk->GetRenderPassObj();
auto vkObjFramebuffer = fboVk->GetFramebufferObj();
bool overridePassReuse = m_state.hasRenderSelfDependency && (GetConfig().vk_accurate_barriers || m_state.activePipelineInfo->neverSkipAccurateBarrier);
const bool syncSkipAllowed = !(GetConfig().vk_accurate_barriers || m_state.activePipelineInfo->neverSkipAccurateBarrier);
if (!overridePassReuse && m_state.activeRenderpassFBO == fboVk)
const bool FBOChanged = m_state.activeRenderpassFBO != fboVk;
bool inputSyncNecessary = false;
if (m_state.descriptorSetsChanged)
inputSyncNecessary = sync_isInputTexturesSyncRequired();
const bool passReusable = !FBOChanged && !inputSyncNecessary;
if (passReusable)
{
if (m_state.descriptorSetsChanged)
sync_inputTexturesChanged();
// reuse previous render pass
return;
}
draw_endRenderPass();
if (m_state.descriptorSetsChanged)
sync_inputTexturesChanged();
// assume that FBO changed, update self-dependency state
m_state.hasRenderSelfDependency = fboVk->CheckForCollision(m_state.activeVertexDS, m_state.activeGeometryDS, m_state.activePixelDS);
sync_RenderPassLoadTextures(fboVk);