diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/LatteTextureVk.h b/src/Cafe/HW/Latte/Renderer/Vulkan/LatteTextureVk.h index 475ed00e..612e2e70 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/LatteTextureVk.h +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/LatteTextureVk.h @@ -78,6 +78,8 @@ protected: LatteTextureView* CreateView(Latte::E_DIM dim, Latte::E_GX2SURFFMT format, sint32 firstMip, sint32 mipCount, sint32 firstSlice, sint32 sliceCount) override; public: + uint64 m_vkFlushIndex{}; // used to track read-write dependencies within the same renderpass + uint64 m_vkFlushIndex_read{}; uint64 m_vkFlushIndex_write{}; diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h index 31c06449..edf50feb 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRenderer.h @@ -412,7 +412,7 @@ private: } // invalidation / flushing - uint64 currentFlushIndex{0}; + uint64 currentFlushIndex{1}; bool requestFlush{ false }; // flush after every draw operation. The renderpass dependencies dont handle dependencies across multiple drawcalls inside a single renderpass // draw sequence @@ -549,7 +549,7 @@ private: void draw_handleSpecialState5(); // draw synchronization helper - void sync_performFlushBarrier(); + void sync_performFlushBarrier(CachedFBOVk* fboVk); bool sync_isInputTexturesSyncRequired(); void sync_RenderPassLoadTextures(CachedFBOVk* fboVk); void sync_RenderPassStoreTextures(CachedFBOVk* fboVk); diff --git a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp index 3c982510..46619f0e 100644 --- a/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp +++ b/src/Cafe/HW/Latte/Renderer/Vulkan/VulkanRendererCore.cpp @@ -1035,30 +1035,62 @@ VkDescriptorSetInfo* VulkanRenderer::draw_getOrCreateDescriptorSet(PipelineInfo* return dsInfo; } -void VulkanRenderer::sync_performFlushBarrier() +void VulkanRenderer::sync_performFlushBarrier(CachedFBOVk* fboVk) { - VkMemoryBarrier memoryBarrier{}; - memoryBarrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER; - memoryBarrier.srcAccessMask = 0; - memoryBarrier.dstAccessMask = 0; + size_t barrierCount = 0; + VkImageMemoryBarrier imageMemBarriers[8 + 2 + LATTE_NUM_MAX_TEX_UNITS]{}; + + auto addImgMemBarrierForTexView = [&](LatteTextureViewVk* view) { + VkImageSubresourceRange range = { + view->GetBaseImage()->GetImageAspect(), + (uint32_t)view->firstMip, + (uint32_t)view->numMip, + (uint32_t)view->firstSlice, + (uint32_t)view->numSlice}; + auto baseTex = (LatteTextureVk*)view->baseTexture; + const auto idx = barrierCount++; + imageMemBarriers[idx].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER; + imageMemBarriers[idx].image = baseTex->GetImageObj()->m_image; + imageMemBarriers[idx].subresourceRange = range; + imageMemBarriers[idx].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + imageMemBarriers[idx].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED; + imageMemBarriers[idx].oldLayout = VK_IMAGE_LAYOUT_GENERAL; + imageMemBarriers[idx].newLayout = VK_IMAGE_LAYOUT_GENERAL; + imageMemBarriers[idx].srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + imageMemBarriers[idx].srcAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + imageMemBarriers[idx].dstAccessMask = VK_ACCESS_SHADER_READ_BIT; + imageMemBarriers[idx].dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; + imageMemBarriers[idx].dstAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; + + view->GetBaseImage()->m_vkFlushIndex = m_state.currentFlushIndex; + }; + + for (auto& i : fboVk->colorBuffer) + { + if (!i.texture) + continue; + addImgMemBarrierForTexView(static_cast(i.texture)); + } + + if (auto i = fboVk->depthBuffer.texture) + { + addImgMemBarrierForTexView(static_cast(i)); + } + + if (m_state.activeVertexDS) + m_state.activeVertexDS->ForEachView(addImgMemBarrierForTexView); + if (m_state.activeGeometryDS) + m_state.activeGeometryDS->ForEachView(addImgMemBarrierForTexView); + if (m_state.activePixelDS) + m_state.activePixelDS->ForEachView(addImgMemBarrierForTexView); VkPipelineStageFlags stages = 0; - // src & dst stages |= VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT; stages |= VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT; stages |= VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT; - // src - memoryBarrier.srcAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - memoryBarrier.srcAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - - // dst - memoryBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT; - memoryBarrier.dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT; - memoryBarrier.dstAccessMask |= VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT; - - vkCmdPipelineBarrier(m_state.currentCommandBuffer, stages, stages, 0, 1, &memoryBarrier, 0, nullptr, 0, nullptr); + vkCmdPipelineBarrier(m_state.currentCommandBuffer, stages, stages, 0, 0, nullptr, 0, nullptr, barrierCount, imageMemBarriers); performanceMonitor.vk.numDrawBarriersPerFrame.increment(); @@ -1074,7 +1106,7 @@ bool VulkanRenderer::sync_isInputTexturesSyncRequired() for (auto& tex : info->list_fboCandidates) { tex->m_vkFlushIndex_read = m_state.currentFlushIndex; - if (tex->m_vkFlushIndex_write == m_state.currentFlushIndex) + if (tex->m_vkFlushIndex < tex->m_vkFlushIndex_write) required = true; } }; @@ -1090,10 +1122,10 @@ void VulkanRenderer::sync_RenderPassLoadTextures(CachedFBOVk* fboVk) auto checkImageSyncHazard = [&](LatteTextureVk* texVk, bool isWrite = false) { //RAW / WAW - if (texVk->m_vkFlushIndex_write == m_state.currentFlushIndex) + if (texVk->m_vkFlushIndex < texVk->m_vkFlushIndex_write) flushRequired = true; //WAR - if (isWrite && texVk->m_vkFlushIndex_read == m_state.currentFlushIndex) + if (isWrite && texVk->m_vkFlushIndex < texVk->m_vkFlushIndex_read) flushRequired = true; }; @@ -1112,7 +1144,7 @@ void VulkanRenderer::sync_RenderPassLoadTextures(CachedFBOVk* fboVk) m_state.activePixelDS->ForEachView(checkViewSync); if (flushRequired) - sync_performFlushBarrier(); + sync_performFlushBarrier(fboVk); for (auto& tex : fboVk->GetTextures()) { @@ -1225,7 +1257,7 @@ void VulkanRenderer::draw_setRenderPass() sync_RenderPassLoadTextures(fboVk); if (sync_isInputTexturesSyncRequired()) - sync_performFlushBarrier(); + sync_performFlushBarrier(fboVk); if (m_featureControl.deviceExtensions.dynamic_rendering) {