From 9b4e94c4aeec9ef8f3006d4288efbcfd877e2ebf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 28 Aug 2022 23:14:30 +0200 Subject: [PATCH 1/2] Depal: Set scissor tightly around processed area. --- GPU/Common/TextureCacheCommon.cpp | 37 +++++++++++++------------------ 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index e6ce09ae62cf..0963618c0675 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -1956,12 +1956,26 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer gstate_c.Dirty(DIRTY_UVSCALEOFFSET); } + // If min is not < max, then we don't have values (wasn't set during decode.) + const KnownVertexBounds &bounds = gstate_c.vertBounds; + float u1 = 0.0f; + float v1 = 0.0f; + float u2 = depalWidth; + float v2 = framebuffer->renderHeight; + if (bounds.minV < bounds.maxV) { + u1 = (bounds.minU + gstate_c.curTextureXOffset) * framebuffer->renderScaleFactor; + v1 = (bounds.minV + gstate_c.curTextureYOffset) * framebuffer->renderScaleFactor; + u2 = (bounds.maxU + gstate_c.curTextureXOffset) * framebuffer->renderScaleFactor; + v2 = (bounds.maxV + gstate_c.curTextureYOffset) * framebuffer->renderScaleFactor; + // We need to reapply the texture next time since we cropped UV. + gstate_c.Dirty(DIRTY_TEXTURE_PARAMS); + } + Draw::Framebuffer *depalFBO = framebufferManager_->GetTempFBO(TempFBO::DEPAL, depalWidth, framebuffer->renderHeight); draw_->BindTexture(0, nullptr); draw_->BindTexture(1, nullptr); draw_->BindFramebufferAsRenderTarget(depalFBO, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }, "Depal"); - - draw_->SetScissorRect(0, 0, (int)depalWidth, (int)framebuffer->renderHeight); + draw_->SetScissorRect(u1, v1, u2 - u1, v2 - v1); Draw::Viewport vp{ 0.0f, 0.0f, (float)depalWidth, (float)framebuffer->renderHeight, 0.0f, 1.0f }; draw_->SetViewports(1, &vp); @@ -1972,25 +1986,6 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer draw_->BindSamplerStates(0, 1, &nearest); draw_->BindSamplerStates(1, 1, &clutSampler); - // If min is not < max, then we don't have values (wasn't set during decode.) - const KnownVertexBounds &bounds = gstate_c.vertBounds; - float u1 = 0.0f; - float v1 = 0.0f; - float u2 = depalWidth; - float v2 = framebuffer->renderHeight; - if (bounds.minV < bounds.maxV) { - u1 = bounds.minU + gstate_c.curTextureXOffset; - v1 = bounds.minV + gstate_c.curTextureYOffset; - u2 = bounds.maxU + gstate_c.curTextureXOffset; - v2 = bounds.maxV + gstate_c.curTextureYOffset; - // We need to reapply the texture next time since we cropped UV. - gstate_c.Dirty(DIRTY_TEXTURE_PARAMS); - } - u1 *= framebuffer->renderScaleFactor; - v1 *= framebuffer->renderScaleFactor; - u2 *= framebuffer->renderScaleFactor; - v2 *= framebuffer->renderScaleFactor; - draw2D_->Blit(textureShader, u1, v1, u2, v2, u1, v1, u2, v2, framebuffer->renderWidth, framebuffer->renderHeight, depalWidth, framebuffer->renderHeight, false, framebuffer->renderScaleFactor); gstate_c.curTextureWidth = texWidth; From b447092742ee672a073116d945024150fe96db17 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Henrik=20Rydg=C3=A5rd?= Date: Sun, 28 Aug 2022 23:16:48 +0200 Subject: [PATCH 2/2] Some Vulkan renderpass load/store optimizations. --- Common/GPU/Vulkan/VulkanQueueRunner.cpp | 68 ++++++++++------------- Common/GPU/Vulkan/VulkanQueueRunner.h | 23 +++++--- Common/GPU/Vulkan/VulkanRenderManager.cpp | 3 + Common/GPU/Vulkan/VulkanRenderManager.h | 25 +++++++++ Common/GPU/Vulkan/thin3d_vulkan.cpp | 17 ++++++ Common/GPU/thin3d.h | 18 ++++-- GPU/Common/FramebufferManagerCommon.cpp | 7 ++- GPU/Common/TextureCacheCommon.cpp | 1 + GPU/Vulkan/ShaderManagerVulkan.cpp | 2 +- 9 files changed, 108 insertions(+), 56 deletions(-) diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.cpp b/Common/GPU/Vulkan/VulkanQueueRunner.cpp index 7c9f3eb072be..ca0454830c45 100644 --- a/Common/GPU/Vulkan/VulkanQueueRunner.cpp +++ b/Common/GPU/Vulkan/VulkanQueueRunner.cpp @@ -31,7 +31,9 @@ void VulkanQueueRunner::CreateDeviceObjects() { INFO_LOG(G3D, "VulkanQueueRunner::CreateDeviceObjects"); InitBackbufferRenderPass(); - framebufferRenderPass_ = GetRenderPass(VKRRenderPassLoadAction::CLEAR, VKRRenderPassLoadAction::CLEAR, VKRRenderPassLoadAction::CLEAR); + RPKey key{ VKRRenderPassLoadAction::CLEAR, VKRRenderPassLoadAction::CLEAR, VKRRenderPassLoadAction::CLEAR, + VKRRenderPassStoreAction::STORE, VKRRenderPassStoreAction::DONT_CARE, VKRRenderPassStoreAction::DONT_CARE }; + framebufferRenderPass_ = GetRenderPass(key); #if 0 // Just to check whether it makes sense to split some of these. drawidx is way bigger than the others... @@ -189,6 +191,21 @@ void VulkanQueueRunner::InitBackbufferRenderPass() { _assert_(res == VK_SUCCESS); } +static VkAttachmentLoadOp ConvertLoadAction(VKRRenderPassLoadAction action) { + switch (action) { + case VKRRenderPassLoadAction::CLEAR: return VK_ATTACHMENT_LOAD_OP_CLEAR; + case VKRRenderPassLoadAction::KEEP: return VK_ATTACHMENT_LOAD_OP_LOAD; + default: return VK_ATTACHMENT_LOAD_OP_DONT_CARE; + } +} + +static VkAttachmentStoreOp ConvertStoreAction(VKRRenderPassStoreAction action) { + switch (action) { + case VKRRenderPassStoreAction::STORE: return VK_ATTACHMENT_STORE_OP_STORE; + default: return VK_ATTACHMENT_STORE_OP_DONT_CARE; + } +} + VkRenderPass VulkanQueueRunner::GetRenderPass(const RPKey &key) { auto pass = renderPasses_.Get(key); if (pass) { @@ -198,19 +215,8 @@ VkRenderPass VulkanQueueRunner::GetRenderPass(const RPKey &key) { VkAttachmentDescription attachments[2] = {}; attachments[0].format = VK_FORMAT_R8G8B8A8_UNORM; attachments[0].samples = VK_SAMPLE_COUNT_1_BIT; - switch (key.colorLoadAction) { - case VKRRenderPassLoadAction::CLEAR: - attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; - break; - case VKRRenderPassLoadAction::KEEP: - attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - break; - case VKRRenderPassLoadAction::DONT_CARE: - default: - attachments[0].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; - break; - } - attachments[0].storeOp = VK_ATTACHMENT_STORE_OP_STORE; + attachments[0].loadOp = ConvertLoadAction(key.colorLoadAction); + attachments[0].storeOp = ConvertStoreAction(key.colorStoreAction); attachments[0].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; attachments[0].stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE; attachments[0].initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL; @@ -219,30 +225,10 @@ VkRenderPass VulkanQueueRunner::GetRenderPass(const RPKey &key) { attachments[1].format = vulkan_->GetDeviceInfo().preferredDepthStencilFormat; attachments[1].samples = VK_SAMPLE_COUNT_1_BIT; - switch (key.depthLoadAction) { - case VKRRenderPassLoadAction::CLEAR: - attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; - break; - case VKRRenderPassLoadAction::KEEP: - attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - break; - case VKRRenderPassLoadAction::DONT_CARE: - attachments[1].loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; - break; - } - switch (key.stencilLoadAction) { - case VKRRenderPassLoadAction::CLEAR: - attachments[1].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_CLEAR; - break; - case VKRRenderPassLoadAction::KEEP: - attachments[1].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD; - break; - case VKRRenderPassLoadAction::DONT_CARE: - attachments[1].stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE; - break; - } - attachments[1].storeOp = VK_ATTACHMENT_STORE_OP_STORE; - attachments[1].stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE; + attachments[1].loadOp = ConvertLoadAction(key.depthLoadAction); + attachments[1].storeOp = ConvertStoreAction(key.depthStoreAction); + attachments[1].stencilLoadOp = ConvertLoadAction(key.stencilLoadAction); + attachments[1].stencilStoreOp = ConvertStoreAction(key.stencilStoreAction); attachments[1].initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; attachments[1].finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL; attachments[1].flags = 0; @@ -1385,7 +1371,11 @@ void VulkanQueueRunner::PerformBindFramebufferAsRenderTarget(const VKRStep &step TransitionToOptimal(cmd, fb->color.image, fb->color.layout, fb->depth.image, fb->depth.layout, &recordBarrier_); - renderPass = GetRenderPass(step.render.colorLoad, step.render.depthLoad, step.render.stencilLoad); + RPKey key{ + step.render.colorLoad, step.render.depthLoad, step.render.stencilLoad, + step.render.colorStore, step.render.depthStore, step.render.stencilStore, + }; + renderPass = GetRenderPass(key); // The transition from the optimal format happens after EndRenderPass, now that we don't // do it as part of the renderpass itself anymore. diff --git a/Common/GPU/Vulkan/VulkanQueueRunner.h b/Common/GPU/Vulkan/VulkanQueueRunner.h index c22316297a94..f80a1ba34027 100644 --- a/Common/GPU/Vulkan/VulkanQueueRunner.h +++ b/Common/GPU/Vulkan/VulkanQueueRunner.h @@ -121,10 +121,16 @@ enum class VKRStepType : uint8_t { READBACK_IMAGE, }; +// Must be the same order as Draw::RPAction enum class VKRRenderPassLoadAction : uint8_t { - DONT_CARE, + KEEP, // default. avoid when possible. CLEAR, - KEEP, + DONT_CARE, +}; + +enum class VKRRenderPassStoreAction : uint8_t { + STORE, // default. avoid when possible. + DONT_CARE, }; struct TransitionRequest { @@ -156,6 +162,9 @@ struct VKRStep { VKRRenderPassLoadAction colorLoad; VKRRenderPassLoadAction depthLoad; VKRRenderPassLoadAction stencilLoad; + VKRRenderPassStoreAction colorStore; + VKRRenderPassStoreAction depthStore; + VKRRenderPassStoreAction stencilStore; u8 clearStencil; uint32_t clearColor; float clearDepth; @@ -232,15 +241,11 @@ class VulkanQueueRunner { VKRRenderPassLoadAction colorLoadAction; VKRRenderPassLoadAction depthLoadAction; VKRRenderPassLoadAction stencilLoadAction; + VKRRenderPassStoreAction colorStoreAction; + VKRRenderPassStoreAction depthStoreAction; + VKRRenderPassStoreAction stencilStoreAction; }; - // Only call this from the render thread! Also ok during initialization (LoadCache). - VkRenderPass GetRenderPass( - VKRRenderPassLoadAction colorLoadAction, VKRRenderPassLoadAction depthLoadAction, VKRRenderPassLoadAction stencilLoadAction) { - RPKey key{ colorLoadAction, depthLoadAction, stencilLoadAction }; - return GetRenderPass(key); - } - VkRenderPass GetRenderPass(const RPKey &key); bool GetRenderPassKey(VkRenderPass passToFind, RPKey *outKey) const { diff --git a/Common/GPU/Vulkan/VulkanRenderManager.cpp b/Common/GPU/Vulkan/VulkanRenderManager.cpp index b2df509a81fc..8e17483ae96d 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.cpp +++ b/Common/GPU/Vulkan/VulkanRenderManager.cpp @@ -724,6 +724,9 @@ void VulkanRenderManager::BindFramebufferAsRenderTarget(VKRFramebuffer *fb, VKRR step->render.colorLoad = color; step->render.depthLoad = depth; step->render.stencilLoad = stencil; + step->render.colorStore = VKRRenderPassStoreAction::STORE; + step->render.depthStore = VKRRenderPassStoreAction::STORE; + step->render.stencilStore = VKRRenderPassStoreAction::STORE; step->render.clearColor = clearColor; step->render.clearDepth = clearDepth; step->render.clearStencil = clearStencil; diff --git a/Common/GPU/Vulkan/VulkanRenderManager.h b/Common/GPU/Vulkan/VulkanRenderManager.h index 7e805b26c785..1df2fc236166 100644 --- a/Common/GPU/Vulkan/VulkanRenderManager.h +++ b/Common/GPU/Vulkan/VulkanRenderManager.h @@ -361,6 +361,31 @@ class VulkanRenderManager { void Clear(uint32_t clearColor, float clearZ, int clearStencil, int clearMask); + // Cheaply set that we don't care about the contents of a surface at the start of the current render pass. + // This set the corresponding load-op of the current render pass to DONT_CARE. + // Useful when we don't know at bind-time whether we will overwrite the surface or not. + void SetLoadDontCare(VkImageAspectFlags aspects) { + _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER); + if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) + curRenderStep_->render.colorLoad = VKRRenderPassLoadAction::DONT_CARE; + if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) + curRenderStep_->render.depthLoad = VKRRenderPassLoadAction::DONT_CARE; + if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) + curRenderStep_->render.stencilLoad = VKRRenderPassLoadAction::DONT_CARE; + } + + // Cheaply set that we don't care about the contents of a surface at the end of the current render pass. + // This set the corresponding store-op of the current render pass to DONT_CARE. + void SetStoreDontCare(VkImageAspectFlags aspects) { + _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER); + if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) + curRenderStep_->render.colorStore = VKRRenderPassStoreAction::DONT_CARE; + if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) + curRenderStep_->render.depthStore = VKRRenderPassStoreAction::DONT_CARE; + if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) + curRenderStep_->render.stencilStore = VKRRenderPassStoreAction::DONT_CARE; + } + void Draw(VkPipelineLayout layout, VkDescriptorSet descSet, int numUboOffsets, const uint32_t *uboOffsets, VkBuffer vbuffer, int voffset, int count, int offset = 0) { _dbg_assert_(curRenderStep_ && curRenderStep_->stepType == VKRStepType::RENDER && curStepHasViewport_ && curStepHasScissor_); VkRenderData data{ VKRRenderCommand::DRAW }; diff --git a/Common/GPU/Vulkan/thin3d_vulkan.cpp b/Common/GPU/Vulkan/thin3d_vulkan.cpp index b7412d4221d5..171decd3eb3e 100644 --- a/Common/GPU/Vulkan/thin3d_vulkan.cpp +++ b/Common/GPU/Vulkan/thin3d_vulkan.cpp @@ -499,6 +499,8 @@ class VKContext : public DrawContext { void InvalidateCachedState() override; + void InvalidateFramebuffer(FBInvalidationStage stage, uint32_t channels) override; + private: VulkanTexture *GetNullTexture(); VulkanContext *vulkan_ = nullptr; @@ -1604,4 +1606,19 @@ void VKContext::HandleEvent(Event ev, int width, int height, void *param1, void } } +void VKContext::InvalidateFramebuffer(FBInvalidationStage stage, uint32_t channels) { + VkImageAspectFlags flags = 0; + if (channels & FBChannel::FB_COLOR_BIT) + flags |= VK_IMAGE_ASPECT_COLOR_BIT; + if (channels & FBChannel::FB_DEPTH_BIT) + flags |= VK_IMAGE_ASPECT_DEPTH_BIT; + if (channels & FBChannel::FB_STENCIL_BIT) + flags |= VK_IMAGE_ASPECT_STENCIL_BIT; + if (stage == FB_INVALIDATION_LOAD) { + renderManager_.SetLoadDontCare(flags); + } else if (stage == FB_INVALIDATION_STORE) { + renderManager_.SetStoreDontCare(flags); + } +} + } // namespace Draw diff --git a/Common/GPU/thin3d.h b/Common/GPU/thin3d.h index ff0fe1a0833a..21d605b6cda1 100644 --- a/Common/GPU/thin3d.h +++ b/Common/GPU/thin3d.h @@ -261,6 +261,11 @@ enum FBChannel { FB_FORMAT_BIT = 128, // Actually retrieves the native format instead. D3D11 only. }; +enum FBInvalidationStage { + FB_INVALIDATION_LOAD = 1, + FB_INVALIDATION_STORE = 2, +}; + enum FBBlitFilter { FB_BLIT_NEAREST = 0, FB_BLIT_LINEAR = 1, @@ -568,9 +573,9 @@ struct TextureDesc { }; enum class RPAction { - DONT_CARE, - CLEAR, - KEEP, + KEEP = 0, + CLEAR = 1, + DONT_CARE = 2, }; struct RenderPassInfo { @@ -655,8 +660,11 @@ class DrawContext { virtual void GetFramebufferDimensions(Framebuffer *fbo, int *w, int *h) = 0; - // Useful in OpenGL ES to give hints about framebuffers on tiler GPUs. - virtual void InvalidateFramebuffer(Framebuffer *fbo) {} + // Could be useful in OpenGL ES to give hints about framebuffers on tiler GPUs + // using glInvalidateFramebuffer, although drivers are known to botch that so we currently don't use it. + // In Vulkan, this sets the LOAD_OP or the STORE_OP (depending on stage) of the current render pass instance to DONT_CARE. + // channels is a bitwise combination of FBChannel::COLOR, DEPTH and STENCIL. + virtual void InvalidateFramebuffer(FBInvalidationStage stage, uint32_t channels) {} // Dynamic state virtual void SetScissorRect(int left, int top, int width, int height) = 0; diff --git a/GPU/Common/FramebufferManagerCommon.cpp b/GPU/Common/FramebufferManagerCommon.cpp index 1b25a4608538..5328ac2fed09 100644 --- a/GPU/Common/FramebufferManagerCommon.cpp +++ b/GPU/Common/FramebufferManagerCommon.cpp @@ -1307,8 +1307,6 @@ void FramebufferManagerCommon::CopyDisplayToOutput(bool reallyDirty) { else DEBUG_LOG(FRAMEBUF, "Displaying FBO %08x", vfb->fb_address); - // TODO ES3: Use glInvalidateFramebuffer to discard depth/stencil data at the end of frame. - float u0 = offsetX / (float)vfb->bufferWidth; float v0 = offsetY / (float)vfb->bufferHeight; float u1 = (480.0f + offsetX) / (float)vfb->bufferWidth; @@ -2770,6 +2768,11 @@ void FramebufferManagerCommon::BlitUsingRaster( draw_->BindFramebufferAsRenderTarget(dest, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::KEEP }, tag ? tag : "BlitUsingRaster"); draw_->BindFramebufferAsTexture(src, 0, pipeline->info.readChannel == RASTER_COLOR ? Draw::FB_COLOR_BIT : Draw::FB_DEPTH_BIT, 0); + if (destX1 == 0.0f && destY1 == 0.0f && destX2 >= destW && destY2 >= destH) { + // We overwrite the whole channel of the framebuffer, so we can invalidate the current contents. + draw_->InvalidateFramebuffer(Draw::FB_INVALIDATION_LOAD, pipeline->info.writeChannel == RASTER_COLOR ? Draw::FB_COLOR_BIT : Draw::FB_DEPTH_BIT); + } + Draw::Viewport vp{ 0.0f, 0.0f, (float)dest->Width(), (float)dest->Height(), 0.0f, 1.0f }; draw_->SetViewports(1, &vp); draw_->SetScissorRect(0, 0, (int)dest->Width(), (int)dest->Height()); diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index 0963618c0675..8bd5721a6654 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -1975,6 +1975,7 @@ void TextureCacheCommon::ApplyTextureFramebuffer(VirtualFramebuffer *framebuffer draw_->BindTexture(0, nullptr); draw_->BindTexture(1, nullptr); draw_->BindFramebufferAsRenderTarget(depalFBO, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE }, "Depal"); + draw_->InvalidateFramebuffer(Draw::FB_INVALIDATION_STORE, Draw::FB_DEPTH_BIT | Draw::FB_STENCIL_BIT); draw_->SetScissorRect(u1, v1, u2 - u1, v2 - v1); Draw::Viewport vp{ 0.0f, 0.0f, (float)depalWidth, (float)framebuffer->renderHeight, 0.0f, 1.0f }; draw_->SetViewports(1, &vp); diff --git a/GPU/Vulkan/ShaderManagerVulkan.cpp b/GPU/Vulkan/ShaderManagerVulkan.cpp index 0d4b31b96d52..a7fe4b252c08 100644 --- a/GPU/Vulkan/ShaderManagerVulkan.cpp +++ b/GPU/Vulkan/ShaderManagerVulkan.cpp @@ -358,7 +358,7 @@ VulkanFragmentShader *ShaderManagerVulkan::GetFragmentShaderFromModule(VkShaderM // instantaneous. #define CACHE_HEADER_MAGIC 0xff51f420 -#define CACHE_VERSION 20 +#define CACHE_VERSION 21 struct VulkanCacheHeader { uint32_t magic; uint32_t version;