shadps4-emu · TECHNICANGEL · Feb 23, 2026 · Feb 23, 2026 · Feb 23, 2026 · Feb 28, 2026
diff --git a/.gitignore b/.gitignore
@@ -418,3 +418,4 @@ FodyWeavers.xsd
 # JetBrains
 .idea
 cmake-build-*
+nul
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1034,6 +1034,8 @@ set(VIDEO_CORE src/video_core/amdgpu/cb_db_extent.h
                src/video_core/renderer_vulkan/vk_resource_pool.h
                src/video_core/renderer_vulkan/vk_scheduler.cpp
                src/video_core/renderer_vulkan/vk_scheduler.h
+               src/video_core/renderer_vulkan/vk_compute_scheduler.cpp
+               src/video_core/renderer_vulkan/vk_compute_scheduler.h
                src/video_core/renderer_vulkan/vk_shader_hle.cpp
                src/video_core/renderer_vulkan/vk_shader_hle.h
                src/video_core/renderer_vulkan/vk_shader_util.cpp

diff --git a/externals/CMakeLists.txt b/externals/CMakeLists.txt
@@ -151,7 +151,7 @@ endif()
 
 # sirit
 add_subdirectory(sirit)
-if (WIN32)
+if (WIN32 AND NOT MSVC)
     target_compile_options(sirit PRIVATE "-Wno-error=unused-command-line-argument")
 endif()
 

diff --git a/src/common/config.cpp b/src/common/config.cpp
@@ -175,6 +175,7 @@ static ConfigEntry<bool> shouldCopyGPUBuffers(false);
 static ConfigEntry<bool> readbacksEnabled(false);
 static ConfigEntry<bool> readbackLinearImagesEnabled(false);
 static ConfigEntry<bool> directMemoryAccessEnabled(false);
+static ConfigEntry<bool> asyncComputeEnabled(true);
 static ConfigEntry<bool> shouldDumpShaders(false);
 static ConfigEntry<bool> shouldPatchShaders(false);
 static ConfigEntry<u32> vblankFrequency(60);
@@ -452,6 +453,10 @@ bool directMemoryAccess() {
     return directMemoryAccessEnabled.get();
 }
 
+bool asyncCompute() {
+    return asyncComputeEnabled.get();
+}
+
 bool dumpShaders() {
     return shouldDumpShaders.get();
 }
@@ -603,6 +608,10 @@ void setDirectMemoryAccess(bool enable, bool is_game_specific) {
     directMemoryAccessEnabled.set(enable, is_game_specific);
 }
 
+void setAsyncCompute(bool enable, bool is_game_specific) {
+    asyncComputeEnabled.set(enable, is_game_specific);
+}
+
 void setDumpShaders(bool enable, bool is_game_specific) {
     shouldDumpShaders.set(enable, is_game_specific);
 }

diff --git a/src/common/config.h b/src/common/config.h
@@ -69,6 +69,8 @@ bool readbackLinearImages();
 void setReadbackLinearImages(bool enable, bool is_game_specific = false);
 bool directMemoryAccess();
 void setDirectMemoryAccess(bool enable, bool is_game_specific = false);
+bool asyncCompute();
+void setAsyncCompute(bool enable, bool is_game_specific = false);
 bool dumpShaders();
 void setDumpShaders(bool enable, bool is_game_specific = false);
 u32 vblankFreq();

diff --git a/src/video_core/buffer_cache/buffer.cpp b/src/video_core/buffer_cache/buffer.cpp
@@ -1,4 +1,5 @@
 // SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
+#include <array>
 // SPDX-License-Identifier: GPL-2.0-or-later
 
 #include "common/alignment.h"
@@ -104,10 +105,20 @@ Buffer::Buffer(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
                VAddr cpu_addr_, vk::BufferUsageFlags flags, u64 size_bytes_)
     : cpu_addr{cpu_addr_}, size_bytes{size_bytes_}, instance{&instance_}, scheduler{&scheduler_},
       usage{usage_}, buffer{instance->GetDevice(), instance->GetAllocator()} {
-    // Create buffer object.
+    // Check if we need concurrent sharing for async compute
+    const bool has_async = instance->HasDedicatedComputeQueue();
+    std::array<u32, 2> queue_families = {
+        instance->GetGraphicsQueueFamilyIndex(),
+        instance->GetComputeQueueFamilyIndex(),
+    };
+
+    // Create buffer object with concurrent sharing if async compute is available
     const vk::BufferCreateInfo buffer_ci = {
         .size = size_bytes,
         .usage = flags,
+        .sharingMode = has_async ? vk::SharingMode::eConcurrent : vk::SharingMode::eExclusive,
+        .queueFamilyIndexCount = has_async ? static_cast<u32>(queue_families.size()) : 0,
+        .pQueueFamilyIndices = has_async ? queue_families.data() : nullptr,
     };
     VmaAllocationInfo alloc_info{};
     buffer.Create(buffer_ci, usage, &alloc_info);

diff --git a/src/video_core/renderer_vulkan/vk_compute_scheduler.cpp b/src/video_core/renderer_vulkan/vk_compute_scheduler.cpp
@@ -0,0 +1,201 @@
+// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "common/assert.h"
+#include "video_core/renderer_vulkan/vk_compute_scheduler.h"
+#include "video_core/renderer_vulkan/vk_instance.h"
+#include "video_core/renderer_vulkan/vk_scheduler.h"
+
+namespace Vulkan {
+
+ComputeScheduler::ComputeScheduler(const Instance& instance_)
+    : instance{instance_}, master_semaphore{instance},
+      command_pool{instance, &master_semaphore, instance.GetComputeQueueFamilyIndex()},
+      compute_queue{instance.GetComputeQueue()}, is_dedicated{instance.HasDedicatedComputeQueue()} {
+    AllocateWorkerCommandBuffers();
+    LOG_INFO(Render_Vulkan, "ComputeScheduler initialized (dedicated queue: {})", is_dedicated);
+}
+
+ComputeScheduler::~ComputeScheduler() = default;
+
+void ComputeScheduler::Flush() {
+    SubmitExecution();
+}
+
+void ComputeScheduler::Finish() {
+    const u64 presubmit_tick = CurrentTick();
+    SubmitExecution();
+    Wait(presubmit_tick);
+}
+
+void ComputeScheduler::Wait(u64 tick) {
+    if (tick >= master_semaphore.CurrentTick()) {
+        Flush();
+    }
+    master_semaphore.Wait(tick);
+}
+
+void ComputeScheduler::PopPendingOperations() {
+    master_semaphore.Refresh();
+    while (!pending_ops.empty() && master_semaphore.IsFree(pending_ops.front().gpu_tick)) {
+        pending_ops.front().callback();
+        pending_ops.pop();
+    }
+}
+
+void ComputeScheduler::WaitForGraphics(Scheduler& graphics_scheduler) {
+    if (!is_dedicated) {
+        // If sharing the queue, standard pipeline barriers handle this.
+        graphics_scheduler.EndRendering();
+        return;
+    }
+
+    // End any active rendering
+    graphics_scheduler.EndRendering();
+
+    // The tick we want to wait for is the CURRENT graphics tick minus one (the last submitted)
+    // Graphics scheduler's CurrentTick() is always the one it's BUILDING, not the one it just submitted.
+    const auto graphics_tick = graphics_scheduler.CurrentTick() - 1;
+
+    // If we've already synced with this tick or a later one, skip adding another wait
+    if (graphics_tick <= last_graphics_sync_tick || graphics_tick == 0) {
+        return;
+    }
+
+    const auto graphics_sem = graphics_scheduler.GetTimelineSemaphore();
+
+    std::lock_guard<std::mutex> lk{submit_mutex};
+
+    // Check if we already have a wait for this semaphore in the current batch
+    bool already_waiting = false;
+    for (size_t i = 0; i < wait_semaphores.size(); ++i) {
+        if (wait_semaphores[i] == graphics_sem) {
+            wait_values[i] = std::max(wait_values[i], graphics_tick);
+            already_waiting = true;
+            break;
+        }
+    }
+
+    if (!already_waiting) {
+        wait_semaphores.push_back(graphics_sem);
+        wait_values.push_back(graphics_tick);
+    }
+
+    last_graphics_sync_tick = graphics_tick;
+}
+
+void ComputeScheduler::SignalGraphics(Scheduler& graphics_scheduler) {
+    if (!is_dedicated || !has_pending_work) {
+        return;
+    }
+
+    // Submit any pending compute work to the GPU.
+    // This is safe because graphics work hasn't been submitted yet for this DRAW.
+    Flush();
+
+    const auto compute_sem = master_semaphore.Handle();
+    // The tick we just submitted in Flush() is CurrentTick() - 1
+    const auto signal_value = master_semaphore.CurrentTick() - 1;
+
+    if (signal_value > 0) {
+        // Register the wait. The graphics scheduler will apply this 
+        // in its NEXT SubmitExecution call.
+        graphics_scheduler.Wait(compute_sem, signal_value);
+    }
+}
+
+void ComputeScheduler::OnComputeDispatch(Scheduler& graphics_scheduler) {
+    // Mark that we have work that needs to be synced later
+    has_pending_work = true;
+
+    // Baseline sync: ensure compute is waiting for current graphics state if not already doing so
+    WaitForGraphics(graphics_scheduler);
+}
+
+void ComputeScheduler::AllocateWorkerCommandBuffers() {
+    const vk::CommandBufferBeginInfo begin_info = {
+        .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit,
+    };
+
+    current_cmdbuf = command_pool.Commit();
+    Check(current_cmdbuf.begin(begin_info));
+    has_pending_work = false;
+}
+
+void ComputeScheduler::SubmitExecution() {
+    std::lock_guard<std::mutex> lk{submit_mutex};
+
+    if (!has_pending_work && wait_semaphores.empty()) {
+        // No work to submit and no waits to process
+        return;
+    }
+
+    // Apply global memory barrier to ensure compute results are visible to graphics
+    if (has_pending_work) {
+        vk::MemoryBarrier2 memory_barrier = {
+            .srcStageMask = vk::PipelineStageFlagBits2::eComputeShader,
+            .srcAccessMask = vk::AccessFlagBits2::eShaderStorageWrite | vk::AccessFlagBits2::eShaderWrite,
+            .dstStageMask = vk::PipelineStageFlagBits2::eAllGraphics | vk::PipelineStageFlagBits2::eComputeShader | vk::PipelineStageFlagBits2::eAllCommands,
+            .dstAccessMask = vk::AccessFlagBits2::eShaderStorageRead | vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eUniformRead | vk::AccessFlagBits2::eIndexRead | vk::AccessFlagBits2::eVertexAttributeRead | vk::AccessFlagBits2::eIndirectCommandRead,
+        };
+
+        vk::DependencyInfo dependency_info = {
+            .memoryBarrierCount = 1,
+            .pMemoryBarriers = &memory_barrier,
+        };
+        current_cmdbuf.pipelineBarrier2(dependency_info);
+    }
+
+    Check(current_cmdbuf.end());
+
+    const u64 signal_value = master_semaphore.NextTick();
+    const vk::Semaphore timeline = master_semaphore.Handle();
+
+    // Build wait semaphore infos using synchronization2
+    std::vector<vk::SemaphoreSubmitInfo> wait_infos;
+    wait_infos.reserve(wait_semaphores.size());
+    for (size_t i = 0; i < wait_semaphores.size(); ++i) {
+        wait_infos.push_back({
+            .semaphore = wait_semaphores[i],
+            .value = wait_values[i],
+            .stageMask = vk::PipelineStageFlagBits2::eComputeShader,
+        });
+    }
+
+    // Signal semaphore info
+    const vk::SemaphoreSubmitInfo signal_info = {
+        .semaphore = timeline,
+        .value = signal_value,
+        .stageMask = vk::PipelineStageFlagBits2::eComputeShader,
+    };
+
+    // Command buffer info
+    const vk::CommandBufferSubmitInfo cmdbuf_info = {
+        .commandBuffer = current_cmdbuf,
+    };
+
+    // Use vkQueueSubmit2 (synchronization2)
+    const vk::SubmitInfo2 submit_info = {
+        .waitSemaphoreInfoCount = static_cast<u32>(wait_infos.size()),
+        .pWaitSemaphoreInfos = wait_infos.data(),
+        .commandBufferInfoCount = 1U,
+        .pCommandBufferInfos = &cmdbuf_info,
+        .signalSemaphoreInfoCount = 1U,
+        .pSignalSemaphoreInfos = &signal_info,
+    };
+
+    auto submit_result = compute_queue.submit2(submit_info, nullptr);
+    ASSERT_MSG(submit_result != vk::Result::eErrorDeviceLost,
+               "Device lost during compute submit! signal_value={}", signal_value);
+
+    // Clear waits after submission
+    wait_semaphores.clear();
+    wait_values.clear();
+
+    master_semaphore.Refresh();
+    AllocateWorkerCommandBuffers();
+
+    PopPendingOperations();
+}
+
+} // namespace Vulkan
-Original file line number
+Diff line change
@@ Expand Up / @@ -418,3 +418,4 @@ FodyWeavers.xsd @@
     # JetBrains
     .idea
     cmake-build-*
+    nul