From b4dff42dcd3864ebb6d4ce7a2a35d0140f5b2bd2 Mon Sep 17 00:00:00 2001 From: Evan Hemsley <2342303+thatcosmonaut@users.noreply.github.com> Date: Tue, 29 Oct 2024 14:43:22 -0700 Subject: [PATCH] GPU: Add SDL_CancelGPUCommandBuffer (#11316) --------- Co-authored-by: Caleb Cornett --- include/SDL3/SDL_gpu.h | 27 +++++ src/dynapi/SDL_dynapi.sym | 1 + src/dynapi/SDL_dynapi_overrides.h | 1 + src/dynapi/SDL_dynapi_procs.h | 1 + src/gpu/SDL_gpu.c | 32 +++++- src/gpu/SDL_sysgpu.h | 5 + src/gpu/d3d11/SDL_gpu_d3d11.c | 77 ++++++++----- src/gpu/d3d12/SDL_gpu_d3d12.c | 54 +++++++--- src/gpu/metal/SDL_gpu_metal.m | 166 ++++++++++++++++++++++------ src/gpu/vulkan/SDL_gpu_vulkan.c | 172 +++++++++++++++++------------- 10 files changed, 389 insertions(+), 147 deletions(-) diff --git a/include/SDL3/SDL_gpu.h b/include/SDL3/SDL_gpu.h index 91174e24d..899297ef0 100644 --- a/include/SDL3/SDL_gpu.h +++ b/include/SDL3/SDL_gpu.h @@ -3529,6 +3529,11 @@ extern SDL_DECLSPEC SDL_GPUTextureFormat SDLCALL SDL_GetGPUSwapchainTextureForma * freed by the user. You MUST NOT call this function from any thread other * than the one that created the window. * + * When using SDL_GPU_PRESENTMODE_VSYNC, this function will block if too many frames are in flight. + * Otherwise, this function will fill the swapchain texture handle with NULL if too many frames are in flight. + * The best practice is to call SDL_CancelGPUCommandBuffer if the swapchain texture handle is NULL + * to avoid enqueuing needless work on the GPU. + * * \param command_buffer a command buffer. * \param window a window that has been claimed. * \param swapchain_texture a pointer filled in with a swapchain texture @@ -3542,9 +3547,11 @@ extern SDL_DECLSPEC SDL_GPUTextureFormat SDLCALL SDL_GetGPUSwapchainTextureForma * * \since This function is available since SDL 3.1.3. * + * \sa SDL_GPUPresentMode * \sa SDL_ClaimWindowForGPUDevice * \sa SDL_SubmitGPUCommandBuffer * \sa SDL_SubmitGPUCommandBufferAndAcquireFence + * \sa SDL_CancelGPUCommandBuffer * \sa SDL_GetWindowSizeInPixels */ extern SDL_DECLSPEC bool SDLCALL SDL_AcquireGPUSwapchainTexture( @@ -3603,6 +3610,26 @@ extern SDL_DECLSPEC bool SDLCALL SDL_SubmitGPUCommandBuffer( extern SDL_DECLSPEC SDL_GPUFence *SDLCALL SDL_SubmitGPUCommandBufferAndAcquireFence( SDL_GPUCommandBuffer *command_buffer); +/** + * Cancels a command buffer. None of the enqueued commands are executed. + * + * This must be called from the thread the command buffer was acquired on. + * + * You must not reference the command buffer after calling this function. + * It is an error to call this function after a swapchain texture has been acquired. + * + * \param command_buffer a command buffer. + * \returns true on success, false on error; call SDL_GetError() for more + * information. + * + * \since This function is available since SDL 3.2.0. + * + * \sa SDL_AcquireGPUCommandBuffer + * \sa SDL_AcquireGPUSwapchainTexture + */ +extern SDL_DECLSPEC bool SDLCALL SDL_CancelGPUCommandBuffer( + SDL_GPUCommandBuffer *command_buffer); + /** * Blocks the thread until the GPU is completely idle. * diff --git a/src/dynapi/SDL_dynapi.sym b/src/dynapi/SDL_dynapi.sym index e0efccc2b..fcdb93b05 100644 --- a/src/dynapi/SDL_dynapi.sym +++ b/src/dynapi/SDL_dynapi.sym @@ -1183,6 +1183,7 @@ SDL3_0.0.0 { SDL_GetDefaultLogOutputFunction; SDL_RenderDebugText; SDL_GetSandbox; + SDL_CancelGPUCommandBuffer; # extra symbols go here (don't modify this line) local: *; }; diff --git a/src/dynapi/SDL_dynapi_overrides.h b/src/dynapi/SDL_dynapi_overrides.h index 4f5f16d88..c33efb30b 100644 --- a/src/dynapi/SDL_dynapi_overrides.h +++ b/src/dynapi/SDL_dynapi_overrides.h @@ -1208,3 +1208,4 @@ #define SDL_GetDefaultLogOutputFunction SDL_GetDefaultLogOutputFunction_REAL #define SDL_RenderDebugText SDL_RenderDebugText_REAL #define SDL_GetSandbox SDL_GetSandbox_REAL +#define SDL_CancelGPUCommandBuffer SDL_CancelGPUCommandBuffer_REAL diff --git a/src/dynapi/SDL_dynapi_procs.h b/src/dynapi/SDL_dynapi_procs.h index dcfcef6d6..cebdf0206 100644 --- a/src/dynapi/SDL_dynapi_procs.h +++ b/src/dynapi/SDL_dynapi_procs.h @@ -1214,3 +1214,4 @@ SDL_DYNAPI_PROC(bool,SDL_SetErrorV,(SDL_PRINTF_FORMAT_STRING const char *a,va_li SDL_DYNAPI_PROC(SDL_LogOutputFunction,SDL_GetDefaultLogOutputFunction,(void),(),return) SDL_DYNAPI_PROC(bool,SDL_RenderDebugText,(SDL_Renderer *a,float b,float c,const char *d),(a,b,c,d),return) SDL_DYNAPI_PROC(SDL_Sandbox,SDL_GetSandbox,(void),(),return) +SDL_DYNAPI_PROC(bool,SDL_CancelGPUCommandBuffer,(SDL_GPUCommandBuffer *a),(a),return) diff --git a/src/gpu/SDL_gpu.c b/src/gpu/SDL_gpu.c index 1957ac5de..6f1e8975d 100644 --- a/src/gpu/SDL_gpu.c +++ b/src/gpu/SDL_gpu.c @@ -1301,6 +1301,7 @@ SDL_GPUCommandBuffer *SDL_AcquireGPUCommandBuffer( commandBufferHeader->compute_pipeline_bound = false; commandBufferHeader->copy_pass.command_buffer = command_buffer; commandBufferHeader->copy_pass.in_progress = false; + commandBufferHeader->swapchain_texture_acquired = false; commandBufferHeader->submitted = false; return command_buffer; @@ -2666,6 +2667,8 @@ bool SDL_AcquireGPUSwapchainTexture( Uint32 *swapchain_texture_width, Uint32 *swapchain_texture_height) { + CommandBufferCommonHeader *commandBufferHeader = (CommandBufferCommonHeader *)command_buffer; + if (command_buffer == NULL) { SDL_InvalidParamError("command_buffer"); return false; @@ -2684,12 +2687,18 @@ bool SDL_AcquireGPUSwapchainTexture( CHECK_ANY_PASS_IN_PROGRESS("Cannot acquire a swapchain texture during a pass!", false) } - return COMMAND_BUFFER_DEVICE->AcquireSwapchainTexture( + bool result = COMMAND_BUFFER_DEVICE->AcquireSwapchainTexture( command_buffer, window, swapchain_texture, swapchain_texture_width, swapchain_texture_height); + + if (*swapchain_texture != NULL){ + commandBufferHeader->swapchain_texture_acquired = true; + } + + return result; } bool SDL_SubmitGPUCommandBuffer( @@ -2746,6 +2755,27 @@ SDL_GPUFence *SDL_SubmitGPUCommandBufferAndAcquireFence( command_buffer); } +bool SDL_CancelGPUCommandBuffer( + SDL_GPUCommandBuffer *command_buffer) +{ + CommandBufferCommonHeader *commandBufferHeader = (CommandBufferCommonHeader *)command_buffer; + + if (command_buffer == NULL) { + SDL_InvalidParamError("command_buffer"); + return false; + } + + if (COMMAND_BUFFER_DEVICE->debug_mode) { + if (commandBufferHeader->swapchain_texture_acquired) { + SDL_assert_release(!"Cannot cancel command buffer after a swapchain texture has been acquired!"); + return false; + } + } + + return COMMAND_BUFFER_DEVICE->Cancel( + command_buffer); +} + bool SDL_WaitForGPUIdle( SDL_GPUDevice *device) { diff --git a/src/gpu/SDL_sysgpu.h b/src/gpu/SDL_sysgpu.h index 7f9ef6ca4..c20531f99 100644 --- a/src/gpu/SDL_sysgpu.h +++ b/src/gpu/SDL_sysgpu.h @@ -40,6 +40,7 @@ typedef struct CommandBufferCommonHeader Pass compute_pass; bool compute_pipeline_bound; Pass copy_pass; + bool swapchain_texture_acquired; bool submitted; } CommandBufferCommonHeader; @@ -810,6 +811,9 @@ struct SDL_GPUDevice SDL_GPUFence *(*SubmitAndAcquireFence)( SDL_GPUCommandBuffer *commandBuffer); + bool (*Cancel)( + SDL_GPUCommandBuffer *commandBuffer); + bool (*Wait)( SDL_GPURenderer *driverData); @@ -928,6 +932,7 @@ struct SDL_GPUDevice ASSIGN_DRIVER_FUNC(AcquireSwapchainTexture, name) \ ASSIGN_DRIVER_FUNC(Submit, name) \ ASSIGN_DRIVER_FUNC(SubmitAndAcquireFence, name) \ + ASSIGN_DRIVER_FUNC(Cancel, name) \ ASSIGN_DRIVER_FUNC(Wait, name) \ ASSIGN_DRIVER_FUNC(WaitForFences, name) \ ASSIGN_DRIVER_FUNC(QueryFence, name) \ diff --git a/src/gpu/d3d11/SDL_gpu_d3d11.c b/src/gpu/d3d11/SDL_gpu_d3d11.c index d667efddf..f2a864bac 100644 --- a/src/gpu/d3d11/SDL_gpu_d3d11.c +++ b/src/gpu/d3d11/SDL_gpu_d3d11.c @@ -748,7 +748,7 @@ typedef struct D3D11CommandBuffer // Fences D3D11Fence *fence; - Uint8 autoReleaseFence; + bool autoReleaseFence; // Reference Counting D3D11Buffer **usedBuffers; @@ -3280,15 +3280,10 @@ static SDL_GPUCommandBuffer *D3D11_AcquireCommandBuffer( SDL_zeroa(commandBuffer->computeReadWriteStorageTextureSubresources); SDL_zeroa(commandBuffer->computeReadWriteStorageBuffers); - bool acquireFenceResult = D3D11_INTERNAL_AcquireFence(commandBuffer); - commandBuffer->autoReleaseFence = 1; + commandBuffer->autoReleaseFence = true; SDL_UnlockMutex(renderer->acquireCommandBufferLock); - if (!acquireFenceResult) { - return NULL; - } - return (SDL_GPUCommandBuffer *)commandBuffer; } @@ -4806,7 +4801,8 @@ static bool D3D11_INTERNAL_MapAndCopyTextureDownload( static bool D3D11_INTERNAL_CleanCommandBuffer( D3D11Renderer *renderer, - D3D11CommandBuffer *commandBuffer) + D3D11CommandBuffer *commandBuffer, + bool cancel) { Uint32 i, j; bool result = true; @@ -4817,17 +4813,21 @@ static bool D3D11_INTERNAL_CleanCommandBuffer( D3D11TransferBuffer *transferBuffer = commandBuffer->usedTransferBuffers[i]; for (j = 0; j < transferBuffer->bufferDownloadCount; j += 1) { - result &= D3D11_INTERNAL_MapAndCopyBufferDownload( - renderer, - transferBuffer, - &transferBuffer->bufferDownloads[j]); + if (!cancel) { + result &= D3D11_INTERNAL_MapAndCopyBufferDownload( + renderer, + transferBuffer, + &transferBuffer->bufferDownloads[j]); + } } for (j = 0; j < transferBuffer->textureDownloadCount; j += 1) { - result &= D3D11_INTERNAL_MapAndCopyTextureDownload( - renderer, - transferBuffer, - &transferBuffer->textureDownloads[j]); + if (!cancel) { + result &= D3D11_INTERNAL_MapAndCopyTextureDownload( + renderer, + transferBuffer, + &transferBuffer->textureDownloads[j]); + } } transferBuffer->bufferDownloadCount = 0; @@ -4887,10 +4887,12 @@ static bool D3D11_INTERNAL_CleanCommandBuffer( SDL_UnlockMutex(renderer->acquireCommandBufferLock); // Remove this command buffer from the submitted list - for (i = 0; i < renderer->submittedCommandBufferCount; i += 1) { - if (renderer->submittedCommandBuffers[i] == commandBuffer) { - renderer->submittedCommandBuffers[i] = renderer->submittedCommandBuffers[renderer->submittedCommandBufferCount - 1]; - renderer->submittedCommandBufferCount -= 1; + if (!cancel) { + for (i = 0; i < renderer->submittedCommandBufferCount; i += 1) { + if (renderer->submittedCommandBuffers[i] == commandBuffer) { + renderer->submittedCommandBuffers[i] = renderer->submittedCommandBuffers[renderer->submittedCommandBufferCount - 1]; + renderer->submittedCommandBufferCount -= 1; + } } } @@ -5024,7 +5026,8 @@ static bool D3D11_WaitForFences( if (res == S_OK) { result &= D3D11_INTERNAL_CleanCommandBuffer( renderer, - renderer->submittedCommandBuffers[i]); + renderer->submittedCommandBuffers[i], + false); } } @@ -5696,6 +5699,11 @@ static bool D3D11_Submit( SDL_LockMutex(renderer->contextLock); + if (!D3D11_INTERNAL_AcquireFence(d3d11CommandBuffer)) { + SDL_UnlockMutex(renderer->contextLock); + return false; + } + // Notify the command buffer completion query that we have completed recording ID3D11DeviceContext_End( renderer->immediateContext, @@ -5778,7 +5786,8 @@ static bool D3D11_Submit( if (res == S_OK) { result &= D3D11_INTERNAL_CleanCommandBuffer( renderer, - renderer->submittedCommandBuffers[i]); + renderer->submittedCommandBuffers[i], + false); } } @@ -5793,12 +5802,26 @@ static SDL_GPUFence *D3D11_SubmitAndAcquireFence( SDL_GPUCommandBuffer *commandBuffer) { D3D11CommandBuffer *d3d11CommandBuffer = (D3D11CommandBuffer *)commandBuffer; - D3D11Fence *fence = d3d11CommandBuffer->fence; + d3d11CommandBuffer->autoReleaseFence = false; + if (!D3D11_Submit(commandBuffer)) { + return NULL; + } + return (SDL_GPUFence *)d3d11CommandBuffer->fence; +} - d3d11CommandBuffer->autoReleaseFence = 0; - D3D11_Submit(commandBuffer); +static bool D3D11_Cancel( + SDL_GPUCommandBuffer *commandBuffer) +{ + D3D11CommandBuffer *d3d11CommandBuffer = (D3D11CommandBuffer *)commandBuffer; + D3D11Renderer *renderer = d3d11CommandBuffer->renderer; + bool result; - return (SDL_GPUFence *)fence; + d3d11CommandBuffer->autoReleaseFence = false; + SDL_LockMutex(renderer->contextLock); + result = D3D11_INTERNAL_CleanCommandBuffer(renderer, d3d11CommandBuffer, true); + SDL_UnlockMutex(renderer->contextLock); + + return result; } static bool D3D11_Wait( @@ -5822,7 +5845,7 @@ static bool D3D11_Wait( for (Sint32 i = renderer->submittedCommandBufferCount - 1; i >= 0; i -= 1) { commandBuffer = renderer->submittedCommandBuffers[i]; - result &= D3D11_INTERNAL_CleanCommandBuffer(renderer, commandBuffer); + result &= D3D11_INTERNAL_CleanCommandBuffer(renderer, commandBuffer, false); } D3D11_INTERNAL_PerformPendingDestroys(renderer); diff --git a/src/gpu/d3d12/SDL_gpu_d3d12.c b/src/gpu/d3d12/SDL_gpu_d3d12.c index a489f204b..00c329312 100644 --- a/src/gpu/d3d12/SDL_gpu_d3d12.c +++ b/src/gpu/d3d12/SDL_gpu_d3d12.c @@ -7297,18 +7297,20 @@ static bool D3D12_INTERNAL_CopyTextureDownload( static bool D3D12_INTERNAL_CleanCommandBuffer( D3D12Renderer *renderer, - D3D12CommandBuffer *commandBuffer) + D3D12CommandBuffer *commandBuffer, + bool cancel) { Uint32 i; HRESULT res; bool result = true; // Perform deferred texture data copies - for (i = 0; i < commandBuffer->textureDownloadCount; i += 1) { - result &= D3D12_INTERNAL_CopyTextureDownload( - commandBuffer, - commandBuffer->textureDownloads[i]); + if (!cancel) { + result &= D3D12_INTERNAL_CopyTextureDownload( + commandBuffer, + commandBuffer->textureDownloads[i]); + } SDL_free(commandBuffer->textureDownloads[i]); } commandBuffer->textureDownloadCount = 0; @@ -7401,10 +7403,12 @@ static bool D3D12_INTERNAL_CleanCommandBuffer( SDL_UnlockMutex(renderer->acquireCommandBufferLock); // Remove this command buffer from the submitted list - for (i = 0; i < renderer->submittedCommandBufferCount; i += 1) { - if (renderer->submittedCommandBuffers[i] == commandBuffer) { - renderer->submittedCommandBuffers[i] = renderer->submittedCommandBuffers[renderer->submittedCommandBufferCount - 1]; - renderer->submittedCommandBufferCount -= 1; + if (!cancel) { + for (i = 0; i < renderer->submittedCommandBufferCount; i += 1) { + if (renderer->submittedCommandBuffers[i] == commandBuffer) { + renderer->submittedCommandBuffers[i] = renderer->submittedCommandBuffers[renderer->submittedCommandBufferCount - 1]; + renderer->submittedCommandBufferCount -= 1; + } } } @@ -7573,7 +7577,8 @@ static bool D3D12_Submit( if (fenceValue == D3D12_FENCE_SIGNAL_VALUE) { result &= D3D12_INTERNAL_CleanCommandBuffer( renderer, - renderer->submittedCommandBuffers[i]); + renderer->submittedCommandBuffers[i], + false); } } @@ -7589,10 +7594,32 @@ static SDL_GPUFence *D3D12_SubmitAndAcquireFence( { D3D12CommandBuffer *d3d12CommandBuffer = (D3D12CommandBuffer *)commandBuffer; d3d12CommandBuffer->autoReleaseFence = false; - D3D12_Submit(commandBuffer); + if (!D3D12_Submit(commandBuffer)) { + return NULL; + } return (SDL_GPUFence *)d3d12CommandBuffer->inFlightFence; } +static bool D3D12_Cancel( + SDL_GPUCommandBuffer *commandBuffer) +{ + D3D12CommandBuffer *d3d12CommandBuffer = (D3D12CommandBuffer *)commandBuffer; + D3D12Renderer *renderer = d3d12CommandBuffer->renderer; + bool result; + HRESULT res; + + // Notify the command buffer that we have completed recording + res = ID3D12GraphicsCommandList_Close(d3d12CommandBuffer->graphicsCommandList); + CHECK_D3D12_ERROR_AND_RETURN("Failed to close command list!", false); + + d3d12CommandBuffer->autoReleaseFence = false; + SDL_LockMutex(renderer->submitLock); + result = D3D12_INTERNAL_CleanCommandBuffer(renderer, d3d12CommandBuffer, true); + SDL_UnlockMutex(renderer->submitLock); + + return result; +} + static bool D3D12_Wait( SDL_GPURenderer *driverData) { @@ -7636,7 +7663,7 @@ static bool D3D12_Wait( // Clean up for (Sint32 i = renderer->submittedCommandBufferCount - 1; i >= 0; i -= 1) { - result &= D3D12_INTERNAL_CleanCommandBuffer(renderer, renderer->submittedCommandBuffers[i]); + result &= D3D12_INTERNAL_CleanCommandBuffer(renderer, renderer->submittedCommandBuffers[i], false); } D3D12_INTERNAL_PerformPendingDestroys(renderer); @@ -7692,7 +7719,8 @@ static bool D3D12_WaitForFences( if (fenceValue == D3D12_FENCE_SIGNAL_VALUE) { result &= D3D12_INTERNAL_CleanCommandBuffer( renderer, - renderer->submittedCommandBuffers[i]); + renderer->submittedCommandBuffers[i], + false); } } diff --git a/src/gpu/metal/SDL_gpu_metal.m b/src/gpu/metal/SDL_gpu_metal.m index 04a5bff59..6c496a5c1 100644 --- a/src/gpu/metal/SDL_gpu_metal.m +++ b/src/gpu/metal/SDL_gpu_metal.m @@ -446,6 +446,7 @@ typedef struct MetalTextureContainer typedef struct MetalFence { SDL_AtomicInt complete; + SDL_AtomicInt referenceCount; } MetalFence; typedef struct MetalWindowData @@ -453,9 +454,12 @@ typedef struct MetalWindowData SDL_Window *window; SDL_MetalView view; CAMetalLayer *layer; + SDL_GPUPresentMode presentMode; id drawable; MetalTexture texture; MetalTextureContainer textureContainer; + SDL_GPUFence *inFlightFences[MAX_FRAMES_IN_FLIGHT]; + Uint32 frameCounter; } MetalWindowData; typedef struct MetalShader @@ -605,7 +609,7 @@ typedef struct MetalCommandBuffer // Fences MetalFence *fence; - Uint8 autoReleaseFence; + bool autoReleaseFence; // Reference Counting MetalBuffer **usedBuffers; @@ -2019,6 +2023,7 @@ static Uint8 METAL_INTERNAL_CreateFence( fence = SDL_calloc(1, sizeof(MetalFence)); SDL_SetAtomicInt(&fence->complete, 0); + SDL_SetAtomicInt(&fence->referenceCount, 0); // Add it to the available pool // FIXME: Should this be EXPAND_IF_NEEDED? @@ -2036,7 +2041,7 @@ static Uint8 METAL_INTERNAL_CreateFence( return 1; } -static Uint8 METAL_INTERNAL_AcquireFence( +static bool METAL_INTERNAL_AcquireFence( MetalRenderer *renderer, MetalCommandBuffer *commandBuffer) { @@ -2049,7 +2054,7 @@ static Uint8 METAL_INTERNAL_AcquireFence( if (!METAL_INTERNAL_CreateFence(renderer)) { SDL_UnlockMutex(renderer->fenceLock); SDL_LogError(SDL_LOG_CATEGORY_GPU, "Failed to create fence!"); - return 0; + return false; } } @@ -2061,8 +2066,9 @@ static Uint8 METAL_INTERNAL_AcquireFence( // Associate the fence with the command buffer commandBuffer->fence = fence; SDL_SetAtomicInt(&fence->complete, 0); // FIXME: Is this right? + (void)SDL_AtomicIncRef(&commandBuffer->fence->referenceCount); - return 1; + return true; } static SDL_GPUCommandBuffer *METAL_AcquireCommandBuffer( @@ -2099,8 +2105,7 @@ static SDL_GPUCommandBuffer *METAL_AcquireCommandBuffer( commandBuffer->needComputeTextureBind = true; commandBuffer->needComputeUniformBind = true; - METAL_INTERNAL_AcquireFence(renderer, commandBuffer); - commandBuffer->autoReleaseFence = 1; + commandBuffer->autoReleaseFence = true; SDL_UnlockMutex(renderer->acquireCommandBufferLock); @@ -3266,29 +3271,36 @@ static void METAL_ReleaseFence( SDL_GPURenderer *driverData, SDL_GPUFence *fence) { - METAL_INTERNAL_ReleaseFenceToPool( - (MetalRenderer *)driverData, - (MetalFence *)fence); + MetalFence *metalFence = (MetalFence *)fence; + if (SDL_AtomicDecRef(&metalFence->referenceCount)) { + METAL_INTERNAL_ReleaseFenceToPool( + (MetalRenderer *)driverData, + (MetalFence *)fence); + } } // Cleanup static void METAL_INTERNAL_CleanCommandBuffer( MetalRenderer *renderer, - MetalCommandBuffer *commandBuffer) + MetalCommandBuffer *commandBuffer, + bool cancel) { Uint32 i; - // Reference Counting - for (i = 0; i < commandBuffer->usedBufferCount; i += 1) { - (void)SDL_AtomicDecRef(&commandBuffer->usedBuffers[i]->referenceCount); + // End any active passes + if (commandBuffer->renderEncoder) { + [commandBuffer->renderEncoder endEncoding]; + commandBuffer->renderEncoder = nil; } - commandBuffer->usedBufferCount = 0; - - for (i = 0; i < commandBuffer->usedTextureCount; i += 1) { - (void)SDL_AtomicDecRef(&commandBuffer->usedTextures[i]->referenceCount); + if (commandBuffer->computeEncoder) { + [commandBuffer->computeEncoder endEncoding]; + commandBuffer->computeEncoder = nil; + } + if (commandBuffer->blitEncoder) { + [commandBuffer->blitEncoder endEncoding]; + commandBuffer->blitEncoder = nil; } - commandBuffer->usedTextureCount = 0; // Uniform buffers are now available @@ -3303,6 +3315,18 @@ static void METAL_INTERNAL_CleanCommandBuffer( SDL_UnlockMutex(renderer->acquireUniformBufferLock); + // Reference Counting + + for (i = 0; i < commandBuffer->usedBufferCount; i += 1) { + (void)SDL_AtomicDecRef(&commandBuffer->usedBuffers[i]->referenceCount); + } + commandBuffer->usedBufferCount = 0; + + for (i = 0; i < commandBuffer->usedTextureCount; i += 1) { + (void)SDL_AtomicDecRef(&commandBuffer->usedTextures[i]->referenceCount); + } + commandBuffer->usedTextureCount = 0; + // Reset presentation commandBuffer->windowDataCount = 0; @@ -3354,10 +3378,12 @@ static void METAL_INTERNAL_CleanCommandBuffer( SDL_UnlockMutex(renderer->acquireCommandBufferLock); // Remove this command buffer from the submitted list - for (i = 0; i < renderer->submittedCommandBufferCount; i += 1) { - if (renderer->submittedCommandBuffers[i] == commandBuffer) { - renderer->submittedCommandBuffers[i] = renderer->submittedCommandBuffers[renderer->submittedCommandBufferCount - 1]; - renderer->submittedCommandBufferCount -= 1; + if (!cancel) { + for (i = 0; i < renderer->submittedCommandBufferCount; i += 1) { + if (renderer->submittedCommandBuffers[i] == commandBuffer) { + renderer->submittedCommandBuffers[i] = renderer->submittedCommandBuffers[renderer->submittedCommandBufferCount - 1]; + renderer->submittedCommandBufferCount -= 1; + } } } } @@ -3483,12 +3509,19 @@ static Uint8 METAL_INTERNAL_CreateSwapchain( windowData->view = SDL_Metal_CreateView(windowData->window); windowData->drawable = nil; + windowData->presentMode = SDL_GPU_PRESENTMODE_VSYNC; + windowData->frameCounter = 0; + + for (int i = 0; i < MAX_FRAMES_IN_FLIGHT; i += 1) { + windowData->inFlightFences[i] = NULL; + } windowData->layer = (__bridge CAMetalLayer *)(SDL_Metal_GetLayer(windowData->view)); windowData->layer.device = renderer->device; #ifdef SDL_PLATFORM_MACOS if (@available(macOS 10.13, *)) { windowData->layer.displaySyncEnabled = (presentMode != SDL_GPU_PRESENTMODE_IMMEDIATE); + windowData->presentMode = presentMode; } #endif windowData->layer.pixelFormat = SDLToMetal_TextureFormat(SwapchainCompositionToFormat[swapchainComposition]); @@ -3610,6 +3643,13 @@ static void METAL_ReleaseWindow( METAL_Wait(driverData); SDL_Metal_DestroyView(windowData->view); + for (int i = 0; i < MAX_FRAMES_IN_FLIGHT; i += 1) { + if (windowData->inFlightFences[i] != NULL) { + METAL_ReleaseFence( + (SDL_GPURenderer *)renderer, + windowData->inFlightFences[i]); + } + } SDL_LockMutex(renderer->windowLock); for (Uint32 i = 0; i < renderer->claimedWindowCount; i += 1) { @@ -3653,10 +3693,6 @@ static bool METAL_AcquireSwapchainTexture( SET_STRING_ERROR_AND_RETURN("Window is not claimed by this SDL_GpuDevice", false); } - // Get the drawable and its underlying texture - windowData->drawable = [windowData->layer nextDrawable]; - windowData->texture.handle = [windowData->drawable texture]; - // Update the window size drawableSize = windowData->layer.drawableSize; windowData->textureContainer.header.info.width = (Uint32)drawableSize.width; @@ -3668,6 +3704,39 @@ static bool METAL_AcquireSwapchainTexture( *swapchainTextureHeight = (Uint32)drawableSize.height; } + if (windowData->inFlightFences[windowData->frameCounter] != NULL) { + if (windowData->presentMode == SDL_GPU_PRESENTMODE_VSYNC) { + // In VSYNC mode, block until the least recent presented frame is done + if (!METAL_WaitForFences( + (SDL_GPURenderer *)renderer, + true, + &windowData->inFlightFences[windowData->frameCounter], + 1)) { + return false; + } + } else { + if (!METAL_QueryFence( + (SDL_GPURenderer *)metalCommandBuffer->renderer, + windowData->inFlightFences[windowData->frameCounter])) { + /* + * In IMMEDIATE mode, if the least recent fence is not signaled, + * return true to indicate that there is no error but rendering should be skipped + */ + return true; + } + } + + METAL_ReleaseFence( + (SDL_GPURenderer *)metalCommandBuffer->renderer, + windowData->inFlightFences[windowData->frameCounter]); + + windowData->inFlightFences[windowData->frameCounter] = NULL; + } + + // Get the drawable and its underlying texture + windowData->drawable = [windowData->layer nextDrawable]; + windowData->texture.handle = [windowData->drawable texture]; + // Set up presentation if (metalCommandBuffer->windowDataCount == metalCommandBuffer->windowDataCapacity) { metalCommandBuffer->windowDataCapacity += 1; @@ -3723,9 +3792,12 @@ static bool METAL_SetSwapchainParameters( METAL_Wait(driverData); + windowData->presentMode = SDL_GPU_PRESENTMODE_VSYNC; + #ifdef SDL_PLATFORM_MACOS if (@available(macOS 10.13, *)) { windowData->layer.displaySyncEnabled = (presentMode != SDL_GPU_PRESENTMODE_IMMEDIATE); + windowData->presentMode = presentMode; } #endif windowData->layer.pixelFormat = SDLToMetal_TextureFormat(SwapchainCompositionToFormat[swapchainComposition]); @@ -3756,10 +3828,22 @@ static bool METAL_Submit( SDL_LockMutex(renderer->submitLock); + if (!METAL_INTERNAL_AcquireFence(renderer, metalCommandBuffer)) { + SDL_UnlockMutex(renderer->submitLock); + return false; + } + // Enqueue present requests, if applicable for (Uint32 i = 0; i < metalCommandBuffer->windowDataCount; i += 1) { - [metalCommandBuffer->handle presentDrawable:metalCommandBuffer->windowDatas[i]->drawable]; - metalCommandBuffer->windowDatas[i]->drawable = nil; + MetalWindowData *windowData = metalCommandBuffer->windowDatas[i]; + [metalCommandBuffer->handle presentDrawable:windowData->drawable]; + windowData->drawable = nil; + + windowData->inFlightFences[windowData->frameCounter] = (SDL_GPUFence *)metalCommandBuffer->fence; + + (void)SDL_AtomicIncRef(&metalCommandBuffer->fence->referenceCount); + + windowData->frameCounter = (windowData->frameCounter + 1) % MAX_FRAMES_IN_FLIGHT; } // Notify the fence when the command buffer has completed @@ -3787,7 +3871,8 @@ static bool METAL_Submit( if (SDL_GetAtomicInt(&renderer->submittedCommandBuffers[i]->fence->complete)) { METAL_INTERNAL_CleanCommandBuffer( renderer, - renderer->submittedCommandBuffers[i]); + renderer->submittedCommandBuffers[i], + false); } } @@ -3803,12 +3888,25 @@ static SDL_GPUFence *METAL_SubmitAndAcquireFence( SDL_GPUCommandBuffer *commandBuffer) { MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; - MetalFence *fence = metalCommandBuffer->fence; + metalCommandBuffer->autoReleaseFence = false; + if (!METAL_Submit(commandBuffer)) { + return NULL; + } + return (SDL_GPUFence *)metalCommandBuffer->fence; +} - metalCommandBuffer->autoReleaseFence = 0; - METAL_Submit(commandBuffer); +static bool METAL_Cancel( + SDL_GPUCommandBuffer *commandBuffer) +{ + MetalCommandBuffer *metalCommandBuffer = (MetalCommandBuffer *)commandBuffer; + MetalRenderer *renderer = metalCommandBuffer->renderer; - return (SDL_GPUFence *)fence; + metalCommandBuffer->autoReleaseFence = false; + SDL_LockMutex(renderer->submitLock); + METAL_INTERNAL_CleanCommandBuffer(renderer, metalCommandBuffer, true); + SDL_UnlockMutex(renderer->submitLock); + + return true; } static bool METAL_Wait( @@ -3832,7 +3930,7 @@ static bool METAL_Wait( for (Sint32 i = renderer->submittedCommandBufferCount - 1; i >= 0; i -= 1) { commandBuffer = renderer->submittedCommandBuffers[i]; - METAL_INTERNAL_CleanCommandBuffer(renderer, commandBuffer); + METAL_INTERNAL_CleanCommandBuffer(renderer, commandBuffer, false); } METAL_INTERNAL_PerformPendingDestroys(renderer); diff --git a/src/gpu/vulkan/SDL_gpu_vulkan.c b/src/gpu/vulkan/SDL_gpu_vulkan.c index ce3abd60d..22983a520 100644 --- a/src/gpu/vulkan/SDL_gpu_vulkan.c +++ b/src/gpu/vulkan/SDL_gpu_vulkan.c @@ -630,8 +630,6 @@ typedef struct VulkanTextureSubresource VkImageView *renderTargetViews; // One render target view per depth slice VkImageView computeWriteView; VkImageView depthStencilView; - - bool transitioned; // used for layout tracking } VulkanTextureSubresource; struct VulkanTexture @@ -685,6 +683,7 @@ typedef enum VulkanBufferUsageMode typedef enum VulkanTextureUsageMode { + VULKAN_TEXTURE_USAGE_MODE_UNINITIALIZED, VULKAN_TEXTURE_USAGE_MODE_COPY_SOURCE, VULKAN_TEXTURE_USAGE_MODE_COPY_DESTINATION, VULKAN_TEXTURE_USAGE_MODE_SAMPLER, @@ -1093,9 +1092,9 @@ typedef struct VulkanCommandBuffer Sint32 usedUniformBufferCapacity; VulkanFenceHandle *inFlightFence; - Uint8 autoReleaseFence; + bool autoReleaseFence; - Uint8 isDefrag; // Whether this CB was created for defragging + bool isDefrag; // Whether this CB was created for defragging } VulkanCommandBuffer; struct VulkanCommandPool @@ -1225,6 +1224,7 @@ static void VULKAN_ReleaseWindow(SDL_GPURenderer *driverData, SDL_Window *window static bool VULKAN_Wait(SDL_GPURenderer *driverData); static bool VULKAN_WaitForFences(SDL_GPURenderer *driverData, bool waitAll, SDL_GPUFence *const *fences, Uint32 numFences); static bool VULKAN_Submit(SDL_GPUCommandBuffer *commandBuffer); +static SDL_GPUCommandBuffer *VULKAN_AcquireCommandBuffer(SDL_GPURenderer *driverData); // Error Handling @@ -2649,7 +2649,11 @@ static void VULKAN_INTERNAL_TextureSubresourceMemoryBarrier( memoryBarrier.subresourceRange.baseMipLevel = textureSubresource->level; memoryBarrier.subresourceRange.levelCount = 1; - if (sourceUsageMode == VULKAN_TEXTURE_USAGE_MODE_COPY_SOURCE) { + if (sourceUsageMode == VULKAN_TEXTURE_USAGE_MODE_UNINITIALIZED) { + srcStages = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT; + memoryBarrier.srcAccessMask = 0; + memoryBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; + } else if (sourceUsageMode == VULKAN_TEXTURE_USAGE_MODE_COPY_SOURCE) { srcStages = VK_PIPELINE_STAGE_TRANSFER_BIT; memoryBarrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT; memoryBarrier.oldLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL; @@ -2686,10 +2690,6 @@ static void VULKAN_INTERNAL_TextureSubresourceMemoryBarrier( return; } - if (!textureSubresource->transitioned) { - memoryBarrier.oldLayout = VK_IMAGE_LAYOUT_UNDEFINED; - } - if (destinationUsageMode == VULKAN_TEXTURE_USAGE_MODE_COPY_SOURCE) { dstStages = VK_PIPELINE_STAGE_TRANSFER_BIT; memoryBarrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT; @@ -2742,8 +2742,6 @@ static void VULKAN_INTERNAL_TextureSubresourceMemoryBarrier( NULL, 1, &memoryBarrier); - - textureSubresource->transitioned = true; } static VulkanBufferUsageMode VULKAN_INTERNAL_DefaultBufferUsageMode( @@ -4701,7 +4699,6 @@ static Uint32 VULKAN_INTERNAL_CreateSwapchain( windowData->textureContainers[i].activeTexture->subresources[0].parent = windowData->textureContainers[i].activeTexture; windowData->textureContainers[i].activeTexture->subresources[0].layer = 0; windowData->textureContainers[i].activeTexture->subresources[0].level = 0; - windowData->textureContainers[i].activeTexture->subresources[0].transitioned = true; windowData->textureContainers[i].activeTexture->subresources[0].renderTargetViews = SDL_malloc(sizeof(VkImageView)); if (!VULKAN_INTERNAL_CreateRenderTargetView( renderer, @@ -5762,10 +5759,19 @@ static VulkanTexture *VULKAN_INTERNAL_CreateTexture( texture->subresources[subresourceIndex].parent = texture; texture->subresources[subresourceIndex].layer = i; texture->subresources[subresourceIndex].level = j; - texture->subresources[subresourceIndex].transitioned = false; } } + // Let's transition to the default barrier state, because for some reason Vulkan doesn't let us do that with initialLayout. + VulkanCommandBuffer *barrierCommandBuffer = (VulkanCommandBuffer *)VULKAN_AcquireCommandBuffer((SDL_GPURenderer *)renderer); + VULKAN_INTERNAL_TextureTransitionToDefaultUsage( + renderer, + barrierCommandBuffer, + VULKAN_TEXTURE_USAGE_MODE_UNINITIALIZED, + texture); + VULKAN_INTERNAL_TrackTexture(barrierCommandBuffer, texture); + VULKAN_Submit((SDL_GPUCommandBuffer *)barrierCommandBuffer); + return texture; } @@ -9359,7 +9365,7 @@ static SDL_GPUCommandBuffer *VULKAN_AcquireCommandBuffer( SDL_zeroa(commandBuffer->readOnlyComputeStorageTextures); SDL_zeroa(commandBuffer->readOnlyComputeStorageBuffers); - commandBuffer->autoReleaseFence = 1; + commandBuffer->autoReleaseFence = true; commandBuffer->isDefrag = 0; @@ -10025,7 +10031,8 @@ static void VULKAN_INTERNAL_PerformPendingDestroys( static void VULKAN_INTERNAL_CleanCommandBuffer( VulkanRenderer *renderer, - VulkanCommandBuffer *commandBuffer) + VulkanCommandBuffer *commandBuffer, + bool cancel) { if (commandBuffer->autoReleaseFence) { VULKAN_ReleaseFence( @@ -10117,10 +10124,12 @@ static void VULKAN_INTERNAL_CleanCommandBuffer( SDL_UnlockMutex(renderer->acquireCommandBufferLock); // Remove this command buffer from the submitted list - for (Uint32 i = 0; i < renderer->submittedCommandBufferCount; i += 1) { - if (renderer->submittedCommandBuffers[i] == commandBuffer) { - renderer->submittedCommandBuffers[i] = renderer->submittedCommandBuffers[renderer->submittedCommandBufferCount - 1]; - renderer->submittedCommandBufferCount -= 1; + if (!cancel) { + for (Uint32 i = 0; i < renderer->submittedCommandBufferCount; i += 1) { + if (renderer->submittedCommandBuffers[i] == commandBuffer) { + renderer->submittedCommandBuffers[i] = renderer->submittedCommandBuffers[renderer->submittedCommandBufferCount - 1]; + renderer->submittedCommandBufferCount -= 1; + } } } } @@ -10160,7 +10169,8 @@ static bool VULKAN_WaitForFences( if (result == VK_SUCCESS) { VULKAN_INTERNAL_CleanCommandBuffer( renderer, - renderer->submittedCommandBuffers[i]); + renderer->submittedCommandBuffers[i], + false); } } @@ -10187,7 +10197,7 @@ static bool VULKAN_Wait( for (i = renderer->submittedCommandBufferCount - 1; i >= 0; i -= 1) { commandBuffer = renderer->submittedCommandBuffers[i]; - VULKAN_INTERNAL_CleanCommandBuffer(renderer, commandBuffer); + VULKAN_INTERNAL_CleanCommandBuffer(renderer, commandBuffer, false); } VULKAN_INTERNAL_PerformPendingDestroys(renderer); @@ -10200,15 +10210,11 @@ static bool VULKAN_Wait( static SDL_GPUFence *VULKAN_SubmitAndAcquireFence( SDL_GPUCommandBuffer *commandBuffer) { - VulkanCommandBuffer *vulkanCommandBuffer; - - vulkanCommandBuffer = (VulkanCommandBuffer *)commandBuffer; - vulkanCommandBuffer->autoReleaseFence = 0; - + VulkanCommandBuffer *vulkanCommandBuffer = (VulkanCommandBuffer *)commandBuffer; + vulkanCommandBuffer->autoReleaseFence = false; if (!VULKAN_Submit(commandBuffer)) { return NULL; } - return (SDL_GPUFence *)vulkanCommandBuffer->inFlightFence; } @@ -10347,7 +10353,8 @@ static bool VULKAN_Submit( if (vulkanResult == VK_SUCCESS) { VULKAN_INTERNAL_CleanCommandBuffer( renderer, - renderer->submittedCommandBuffers[i]); + renderer->submittedCommandBuffers[i], + false); commandBufferCleaned = 1; } @@ -10388,6 +10395,29 @@ static bool VULKAN_Submit( return result; } +static bool VULKAN_Cancel( + SDL_GPUCommandBuffer *commandBuffer) +{ + VulkanRenderer *renderer; + VulkanCommandBuffer *vulkanCommandBuffer; + VkResult result; + + vulkanCommandBuffer = (VulkanCommandBuffer *)commandBuffer; + renderer = vulkanCommandBuffer->renderer; + + result = renderer->vkResetCommandBuffer( + vulkanCommandBuffer->commandBuffer, + VK_COMMAND_BUFFER_RESET_RELEASE_RESOURCES_BIT); + CHECK_VULKAN_ERROR_AND_RETURN(result, vkResetCommandBuffer, NULL) + + vulkanCommandBuffer->autoReleaseFence = false; + SDL_LockMutex(renderer->submitLock); + VULKAN_INTERNAL_CleanCommandBuffer(renderer, vulkanCommandBuffer, true); + SDL_UnlockMutex(renderer->submitLock); + + return true; +} + static bool VULKAN_INTERNAL_DefragmentMemory( VulkanRenderer *renderer) { @@ -10525,55 +10555,53 @@ static bool VULKAN_INTERNAL_DefragmentMemory( srcSubresource->parent->container->debugName); } - if (srcSubresource->transitioned) { - VULKAN_INTERNAL_TextureSubresourceTransitionFromDefaultUsage( - renderer, - commandBuffer, - VULKAN_TEXTURE_USAGE_MODE_COPY_SOURCE, - srcSubresource); + VULKAN_INTERNAL_TextureSubresourceTransitionFromDefaultUsage( + renderer, + commandBuffer, + VULKAN_TEXTURE_USAGE_MODE_COPY_SOURCE, + srcSubresource); - VULKAN_INTERNAL_TextureSubresourceTransitionFromDefaultUsage( - renderer, - commandBuffer, - VULKAN_TEXTURE_USAGE_MODE_COPY_DESTINATION, - dstSubresource); + VULKAN_INTERNAL_TextureSubresourceTransitionFromDefaultUsage( + renderer, + commandBuffer, + VULKAN_TEXTURE_USAGE_MODE_COPY_DESTINATION, + dstSubresource); - imageCopy.srcOffset.x = 0; - imageCopy.srcOffset.y = 0; - imageCopy.srcOffset.z = 0; - imageCopy.srcSubresource.aspectMask = srcSubresource->parent->aspectFlags; - imageCopy.srcSubresource.baseArrayLayer = srcSubresource->layer; - imageCopy.srcSubresource.layerCount = 1; - imageCopy.srcSubresource.mipLevel = srcSubresource->level; - imageCopy.extent.width = SDL_max(1, info.width >> srcSubresource->level); - imageCopy.extent.height = SDL_max(1, info.height >> srcSubresource->level); - imageCopy.extent.depth = info.type == SDL_GPU_TEXTURETYPE_3D ? info.layer_count_or_depth : 1; - imageCopy.dstOffset.x = 0; - imageCopy.dstOffset.y = 0; - imageCopy.dstOffset.z = 0; - imageCopy.dstSubresource.aspectMask = dstSubresource->parent->aspectFlags; - imageCopy.dstSubresource.baseArrayLayer = dstSubresource->layer; - imageCopy.dstSubresource.layerCount = 1; - imageCopy.dstSubresource.mipLevel = dstSubresource->level; + imageCopy.srcOffset.x = 0; + imageCopy.srcOffset.y = 0; + imageCopy.srcOffset.z = 0; + imageCopy.srcSubresource.aspectMask = srcSubresource->parent->aspectFlags; + imageCopy.srcSubresource.baseArrayLayer = srcSubresource->layer; + imageCopy.srcSubresource.layerCount = 1; + imageCopy.srcSubresource.mipLevel = srcSubresource->level; + imageCopy.extent.width = SDL_max(1, info.width >> srcSubresource->level); + imageCopy.extent.height = SDL_max(1, info.height >> srcSubresource->level); + imageCopy.extent.depth = info.type == SDL_GPU_TEXTURETYPE_3D ? info.layer_count_or_depth : 1; + imageCopy.dstOffset.x = 0; + imageCopy.dstOffset.y = 0; + imageCopy.dstOffset.z = 0; + imageCopy.dstSubresource.aspectMask = dstSubresource->parent->aspectFlags; + imageCopy.dstSubresource.baseArrayLayer = dstSubresource->layer; + imageCopy.dstSubresource.layerCount = 1; + imageCopy.dstSubresource.mipLevel = dstSubresource->level; - renderer->vkCmdCopyImage( - commandBuffer->commandBuffer, - currentRegion->vulkanTexture->image, - VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, - newTexture->image, - VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, - 1, - &imageCopy); + renderer->vkCmdCopyImage( + commandBuffer->commandBuffer, + currentRegion->vulkanTexture->image, + VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, + newTexture->image, + VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, + 1, + &imageCopy); - VULKAN_INTERNAL_TextureSubresourceTransitionToDefaultUsage( - renderer, - commandBuffer, - VULKAN_TEXTURE_USAGE_MODE_COPY_DESTINATION, - dstSubresource); + VULKAN_INTERNAL_TextureSubresourceTransitionToDefaultUsage( + renderer, + commandBuffer, + VULKAN_TEXTURE_USAGE_MODE_COPY_DESTINATION, + dstSubresource); - VULKAN_INTERNAL_TrackTexture(commandBuffer, srcSubresource->parent); - VULKAN_INTERNAL_TrackTexture(commandBuffer, dstSubresource->parent); - } + VULKAN_INTERNAL_TrackTexture(commandBuffer, srcSubresource->parent); + VULKAN_INTERNAL_TrackTexture(commandBuffer, dstSubresource->parent); } // re-point original container to new texture