Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -418,3 +418,4 @@ FodyWeavers.xsd
# JetBrains
.idea
cmake-build-*
nul
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1034,6 +1034,8 @@ set(VIDEO_CORE src/video_core/amdgpu/cb_db_extent.h
src/video_core/renderer_vulkan/vk_resource_pool.h
src/video_core/renderer_vulkan/vk_scheduler.cpp
src/video_core/renderer_vulkan/vk_scheduler.h
src/video_core/renderer_vulkan/vk_compute_scheduler.cpp
src/video_core/renderer_vulkan/vk_compute_scheduler.h
src/video_core/renderer_vulkan/vk_shader_hle.cpp
src/video_core/renderer_vulkan/vk_shader_hle.h
src/video_core/renderer_vulkan/vk_shader_util.cpp
Expand Down
2 changes: 1 addition & 1 deletion externals/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ endif()

# sirit
add_subdirectory(sirit)
if (WIN32)
if (WIN32 AND NOT MSVC)
target_compile_options(sirit PRIVATE "-Wno-error=unused-command-line-argument")
endif()

Expand Down
9 changes: 9 additions & 0 deletions src/common/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,7 @@ static ConfigEntry<bool> shouldCopyGPUBuffers(false);
static ConfigEntry<bool> readbacksEnabled(false);
static ConfigEntry<bool> readbackLinearImagesEnabled(false);
static ConfigEntry<bool> directMemoryAccessEnabled(false);
static ConfigEntry<bool> asyncComputeEnabled(true);
static ConfigEntry<bool> shouldDumpShaders(false);
static ConfigEntry<bool> shouldPatchShaders(false);
static ConfigEntry<u32> vblankFrequency(60);
Expand Down Expand Up @@ -452,6 +453,10 @@ bool directMemoryAccess() {
return directMemoryAccessEnabled.get();
}

bool asyncCompute() {
return asyncComputeEnabled.get();
}

bool dumpShaders() {
return shouldDumpShaders.get();
}
Expand Down Expand Up @@ -603,6 +608,10 @@ void setDirectMemoryAccess(bool enable, bool is_game_specific) {
directMemoryAccessEnabled.set(enable, is_game_specific);
}

void setAsyncCompute(bool enable, bool is_game_specific) {
asyncComputeEnabled.set(enable, is_game_specific);
}

void setDumpShaders(bool enable, bool is_game_specific) {
shouldDumpShaders.set(enable, is_game_specific);
}
Expand Down
2 changes: 2 additions & 0 deletions src/common/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ bool readbackLinearImages();
void setReadbackLinearImages(bool enable, bool is_game_specific = false);
bool directMemoryAccess();
void setDirectMemoryAccess(bool enable, bool is_game_specific = false);
bool asyncCompute();
void setAsyncCompute(bool enable, bool is_game_specific = false);
bool dumpShaders();
void setDumpShaders(bool enable, bool is_game_specific = false);
u32 vblankFreq();
Expand Down
13 changes: 12 additions & 1 deletion src/video_core/buffer_cache/buffer.cpp
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
#include <array>
// SPDX-License-Identifier: GPL-2.0-or-later

#include "common/alignment.h"
Expand Down Expand Up @@ -104,10 +105,20 @@ Buffer::Buffer(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
VAddr cpu_addr_, vk::BufferUsageFlags flags, u64 size_bytes_)
: cpu_addr{cpu_addr_}, size_bytes{size_bytes_}, instance{&instance_}, scheduler{&scheduler_},
usage{usage_}, buffer{instance->GetDevice(), instance->GetAllocator()} {
// Create buffer object.
// Check if we need concurrent sharing for async compute
const bool has_async = instance->HasDedicatedComputeQueue();
std::array<u32, 2> queue_families = {
instance->GetGraphicsQueueFamilyIndex(),
instance->GetComputeQueueFamilyIndex(),
};

// Create buffer object with concurrent sharing if async compute is available
const vk::BufferCreateInfo buffer_ci = {
.size = size_bytes,
.usage = flags,
.sharingMode = has_async ? vk::SharingMode::eConcurrent : vk::SharingMode::eExclusive,
.queueFamilyIndexCount = has_async ? static_cast<u32>(queue_families.size()) : 0,
.pQueueFamilyIndices = has_async ? queue_families.data() : nullptr,
};
VmaAllocationInfo alloc_info{};
buffer.Create(buffer_ci, usage, &alloc_info);
Expand Down
201 changes: 201 additions & 0 deletions src/video_core/renderer_vulkan/vk_compute_scheduler.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
// SPDX-FileCopyrightText: Copyright 2025 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#include "common/assert.h"
#include "video_core/renderer_vulkan/vk_compute_scheduler.h"
#include "video_core/renderer_vulkan/vk_instance.h"
#include "video_core/renderer_vulkan/vk_scheduler.h"

namespace Vulkan {

ComputeScheduler::ComputeScheduler(const Instance& instance_)
: instance{instance_}, master_semaphore{instance},
command_pool{instance, &master_semaphore, instance.GetComputeQueueFamilyIndex()},
compute_queue{instance.GetComputeQueue()}, is_dedicated{instance.HasDedicatedComputeQueue()} {
AllocateWorkerCommandBuffers();
LOG_INFO(Render_Vulkan, "ComputeScheduler initialized (dedicated queue: {})", is_dedicated);
}

ComputeScheduler::~ComputeScheduler() = default;

void ComputeScheduler::Flush() {
SubmitExecution();
}

void ComputeScheduler::Finish() {
const u64 presubmit_tick = CurrentTick();
SubmitExecution();
Wait(presubmit_tick);
}

void ComputeScheduler::Wait(u64 tick) {
if (tick >= master_semaphore.CurrentTick()) {
Flush();
}
master_semaphore.Wait(tick);
}

void ComputeScheduler::PopPendingOperations() {
master_semaphore.Refresh();
while (!pending_ops.empty() && master_semaphore.IsFree(pending_ops.front().gpu_tick)) {
pending_ops.front().callback();
pending_ops.pop();
}
}

void ComputeScheduler::WaitForGraphics(Scheduler& graphics_scheduler) {
if (!is_dedicated) {
// If sharing the queue, standard pipeline barriers handle this.
graphics_scheduler.EndRendering();
return;
}

// End any active rendering
graphics_scheduler.EndRendering();

// The tick we want to wait for is the CURRENT graphics tick minus one (the last submitted)
// Graphics scheduler's CurrentTick() is always the one it's BUILDING, not the one it just submitted.
const auto graphics_tick = graphics_scheduler.CurrentTick() - 1;

// If we've already synced with this tick or a later one, skip adding another wait
if (graphics_tick <= last_graphics_sync_tick || graphics_tick == 0) {
return;
}

const auto graphics_sem = graphics_scheduler.GetTimelineSemaphore();

std::lock_guard<std::mutex> lk{submit_mutex};

// Check if we already have a wait for this semaphore in the current batch
bool already_waiting = false;
for (size_t i = 0; i < wait_semaphores.size(); ++i) {
if (wait_semaphores[i] == graphics_sem) {
wait_values[i] = std::max(wait_values[i], graphics_tick);
already_waiting = true;
break;
}
}

if (!already_waiting) {
wait_semaphores.push_back(graphics_sem);
wait_values.push_back(graphics_tick);
}

last_graphics_sync_tick = graphics_tick;
}

void ComputeScheduler::SignalGraphics(Scheduler& graphics_scheduler) {
if (!is_dedicated || !has_pending_work) {
return;
}

// Submit any pending compute work to the GPU.
// This is safe because graphics work hasn't been submitted yet for this DRAW.
Flush();

const auto compute_sem = master_semaphore.Handle();
// The tick we just submitted in Flush() is CurrentTick() - 1
const auto signal_value = master_semaphore.CurrentTick() - 1;

if (signal_value > 0) {
// Register the wait. The graphics scheduler will apply this
// in its NEXT SubmitExecution call.
graphics_scheduler.Wait(compute_sem, signal_value);
}
}

void ComputeScheduler::OnComputeDispatch(Scheduler& graphics_scheduler) {
// Mark that we have work that needs to be synced later
has_pending_work = true;

// Baseline sync: ensure compute is waiting for current graphics state if not already doing so
WaitForGraphics(graphics_scheduler);
}

void ComputeScheduler::AllocateWorkerCommandBuffers() {
const vk::CommandBufferBeginInfo begin_info = {
.flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit,
};

current_cmdbuf = command_pool.Commit();
Check(current_cmdbuf.begin(begin_info));
has_pending_work = false;
}

void ComputeScheduler::SubmitExecution() {
std::lock_guard<std::mutex> lk{submit_mutex};

if (!has_pending_work && wait_semaphores.empty()) {
// No work to submit and no waits to process
return;
}

// Apply global memory barrier to ensure compute results are visible to graphics
if (has_pending_work) {
vk::MemoryBarrier2 memory_barrier = {
.srcStageMask = vk::PipelineStageFlagBits2::eComputeShader,
.srcAccessMask = vk::AccessFlagBits2::eShaderStorageWrite | vk::AccessFlagBits2::eShaderWrite,
.dstStageMask = vk::PipelineStageFlagBits2::eAllGraphics | vk::PipelineStageFlagBits2::eComputeShader | vk::PipelineStageFlagBits2::eAllCommands,
.dstAccessMask = vk::AccessFlagBits2::eShaderStorageRead | vk::AccessFlagBits2::eShaderRead | vk::AccessFlagBits2::eUniformRead | vk::AccessFlagBits2::eIndexRead | vk::AccessFlagBits2::eVertexAttributeRead | vk::AccessFlagBits2::eIndirectCommandRead,
};

vk::DependencyInfo dependency_info = {
.memoryBarrierCount = 1,
.pMemoryBarriers = &memory_barrier,
};
current_cmdbuf.pipelineBarrier2(dependency_info);
}

Check(current_cmdbuf.end());

const u64 signal_value = master_semaphore.NextTick();
const vk::Semaphore timeline = master_semaphore.Handle();

// Build wait semaphore infos using synchronization2
std::vector<vk::SemaphoreSubmitInfo> wait_infos;
wait_infos.reserve(wait_semaphores.size());
for (size_t i = 0; i < wait_semaphores.size(); ++i) {
wait_infos.push_back({
.semaphore = wait_semaphores[i],
.value = wait_values[i],
.stageMask = vk::PipelineStageFlagBits2::eComputeShader,
});
}

// Signal semaphore info
const vk::SemaphoreSubmitInfo signal_info = {
.semaphore = timeline,
.value = signal_value,
.stageMask = vk::PipelineStageFlagBits2::eComputeShader,
};

// Command buffer info
const vk::CommandBufferSubmitInfo cmdbuf_info = {
.commandBuffer = current_cmdbuf,
};

// Use vkQueueSubmit2 (synchronization2)
const vk::SubmitInfo2 submit_info = {
.waitSemaphoreInfoCount = static_cast<u32>(wait_infos.size()),
.pWaitSemaphoreInfos = wait_infos.data(),
.commandBufferInfoCount = 1U,
.pCommandBufferInfos = &cmdbuf_info,
.signalSemaphoreInfoCount = 1U,
.pSignalSemaphoreInfos = &signal_info,
};

auto submit_result = compute_queue.submit2(submit_info, nullptr);
ASSERT_MSG(submit_result != vk::Result::eErrorDeviceLost,
"Device lost during compute submit! signal_value={}", signal_value);

// Clear waits after submission
wait_semaphores.clear();
wait_values.clear();

master_semaphore.Refresh();
AllocateWorkerCommandBuffers();

PopPendingOperations();
}

} // namespace Vulkan
Loading
Loading