Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 31 additions & 8 deletions src/core/libraries/gnmdriver/gnmdriver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2081,6 +2081,11 @@ static inline s32 PatchFlipRequest(u32* cmdbuf, u32 size, u32 vo_handle, u32 buf
// check for `prepareFlip` packet
cmdbuf += size - 64;
ASSERT_MSG(cmdbuf[0] == 0xc03e1000, "Can't find `prepareFlip` packet");
// PS4 returns 0x80d11080 instead of crashing
// if (cmdbuf[0] != 0xc03e1000) {
// LOG_ERROR(Lib_GnmDriver, "Can't find `prepareFlip` packet");
// return 0x80d11080; // SCE_GNM_ERROR_SUBMISSION_AND_FLIP_FAILED_INVALID_COMMAND_BUFFER
// }

std::array<u32, 7> backup{};
std::memcpy(backup.data(), cmdbuf, backup.size() * sizeof(decltype(backup)::value_type));
Expand All @@ -2089,15 +2094,14 @@ static inline s32 PatchFlipRequest(u32* cmdbuf, u32 size, u32 vo_handle, u32 buf
"Invalid flip packet");
ASSERT_MSG(buf_idx != 0xffff'ffffu, "Invalid VO buffer index");

const s32 flip_result = VideoOut::sceVideoOutSubmitEopFlip(vo_handle, buf_idx, flip_mode,
flip_arg, nullptr /*unk*/);
const s32 flip_result = liverpool->ReserveFlip();
if (flip_result != 0) {
if (flip_result == 0x80290012) {
LOG_ERROR(Lib_GnmDriver, "Flip queue is full");
return 0x80d11081;
} else {
LOG_ERROR(Lib_GnmDriver, "Flip request failed");
return flip_result;
LOG_ERROR(Lib_GnmDriver, "Flip request failed with {:#x}", flip_result);
return 0x80d11082; // SCE_GNM_ERROR_SUBMISSION_AND_FLIP_FAILED_REQUEST_FAILED
}
}

Expand Down Expand Up @@ -2169,6 +2173,14 @@ s32 PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffers(u32 count, u32* dcb_gpu_addrs
vo_handle, buf_idx, flip_mode, flip_arg);
}

// Shared submission loop. When flip has a value, it is associated with the
// last command buffer in the batch so the flip triggers after the final
// command buffer completes.
static s32 SubmitCommandBuffersInternal(u32 count, const u32* dcb_gpu_addrs[],
u32* dcb_sizes_in_bytes, const u32* ccb_gpu_addrs[],
u32* ccb_sizes_in_bytes,
std::optional<AmdGpu::Liverpool::FlipRequest> flip);

s32 PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffersForWorkload(
u32 workload, u32 count, u32* dcb_gpu_addrs[], u32* dcb_sizes_in_bytes, u32* ccb_gpu_addrs[],
u32* ccb_sizes_in_bytes, u32 vo_handle, u32 buf_idx, u32 flip_mode, s64 flip_arg) {
Expand All @@ -2183,16 +2195,25 @@ s32 PS4_SYSV_ABI sceGnmSubmitAndFlipCommandBuffersForWorkload(
return patch_result;
}

return sceGnmSubmitCommandBuffers(count, const_cast<const u32**>(dcb_gpu_addrs),
dcb_sizes_in_bytes, const_cast<const u32**>(ccb_gpu_addrs),
ccb_sizes_in_bytes);
return SubmitCommandBuffersInternal(count, const_cast<const u32**>(dcb_gpu_addrs),
dcb_sizes_in_bytes, const_cast<const u32**>(ccb_gpu_addrs),
ccb_sizes_in_bytes,
AmdGpu::Liverpool::FlipRequest{buf_idx, flip_arg});
}

int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload(u32 workload, u32 count,
const u32* dcb_gpu_addrs[],
u32* dcb_sizes_in_bytes,
const u32* ccb_gpu_addrs[],
u32* ccb_sizes_in_bytes) {
return SubmitCommandBuffersInternal(count, dcb_gpu_addrs, dcb_sizes_in_bytes, ccb_gpu_addrs,
ccb_sizes_in_bytes, std::nullopt);
}

static s32 SubmitCommandBuffersInternal(u32 count, const u32* dcb_gpu_addrs[],
u32* dcb_sizes_in_bytes, const u32* ccb_gpu_addrs[],
u32* ccb_sizes_in_bytes,
std::optional<AmdGpu::Liverpool::FlipRequest> flip) {
HLE_TRACE;
LOG_DEBUG(Lib_GnmDriver, "called");

Expand Down Expand Up @@ -2288,7 +2309,9 @@ int PS4_SYSV_ABI sceGnmSubmitCommandBuffersForWorkload(u32 workload, u32 count,
.base_addr = reinterpret_cast<uintptr_t>(ccb),
});
}
liverpool->SubmitGfx(dcb_span, ccb_span);
// Associate the flip with the last command buffer in the batch.
const bool is_last = (cbpair == count - 1);
liverpool->SubmitGfx(dcb_span, ccb_span, is_last ? flip : std::nullopt);
}

return ORBIS_OK;
Expand Down
32 changes: 19 additions & 13 deletions src/core/libraries/videoout/driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,17 +55,27 @@ int VideoOutDriver::Open(const ServiceThreadParams* params) {
return ORBIS_VIDEO_OUT_ERROR_RESOURCE_BUSY;
}
main_port.is_open = true;
liverpool->SetVoPort(&main_port);
main_port.flip_status.gc_queue_num = 0;
main_port.flip_status.flip_pending_num = 0;
liverpool->SetVideoOut(&main_port, this);
return 1;
}

void VideoOutDriver::Close(s32 handle) {
// Drain all pending GPU submissions before closing.
// Must be done before taking mutex since the GPU thread needs it to
// enqueue flip requests.
liverpool->WaitGpuIdle();

std::scoped_lock lock{mutex};

main_port.is_open = false;
main_port.flip_rate = 0;
main_port.prev_index = -1;
main_port.flip_status.gc_queue_num = 0;
main_port.flip_status.flip_pending_num = 0;
ASSERT(main_port.flip_events.empty());
liverpool->SetVideoOut(nullptr, nullptr);
}

VideoOutPort* VideoOutDriver::GetPort(int handle) {
Expand Down Expand Up @@ -220,32 +230,28 @@ void VideoOutDriver::DrawLastFrame() {
}
}

bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg,
bool is_eop /*= false*/) {
bool VideoOutDriver::SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg) {
{
std::unique_lock lock{port->port_mutex};
if (index != -1 && port->flip_status.flip_pending_num > 16) {
LOG_ERROR(Lib_VideoOut, "Flip queue is full");
return false;
}

if (is_eop) {
++port->flip_status.gc_queue_num;
}
++port->flip_status.flip_pending_num; // integral GPU and CPU pending flips counter
++port->flip_status.flip_pending_num;
port->flip_status.submit_tsc = Libraries::Kernel::sceKernelReadTsc();
}

if (!is_eop) {
// Non EOP flips can arrive from any thread so ask GPU thread to perform them
liverpool->SendCommand([=, this]() { SubmitFlipInternal(port, index, flip_arg, is_eop); });
} else {
SubmitFlipInternal(port, index, flip_arg, is_eop);
}
// CPU flips can arrive from any thread so ask GPU thread to perform them
liverpool->SendCommand([=, this]() { SubmitFlipInternal(port, index, flip_arg, false); });

return true;
}

void VideoOutDriver::EnqueueFlip(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop) {
SubmitFlipInternal(port, index, flip_arg, is_eop);
}

void VideoOutDriver::SubmitFlipInternal(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop) {
Vulkan::Frame* frame;
if (index == -1) {
Expand Down
8 changes: 6 additions & 2 deletions src/core/libraries/videoout/driver.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-FileCopyrightText: Copyright 2024 shadPS4 Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later

#pragma once
Expand Down Expand Up @@ -88,7 +88,11 @@ class VideoOutDriver {
const BufferAttribute* attribute);
int UnregisterBuffers(VideoOutPort* port, s32 attributeIndex);

bool SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop = false);
bool SubmitFlip(VideoOutPort* port, s32 index, s64 flip_arg);

// Push a frame to the presenter. Called by Liverpool (GPU thread) when
// a submission with an associated flip completes.
void EnqueueFlip(VideoOutPort* port, s32 index, s64 flip_arg, bool is_eop);

private:
struct Request {
Expand Down
18 changes: 0 additions & 18 deletions src/core/libraries/videoout/video_out.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
#include "core/libraries/videoout/driver.h"
#include "core/libraries/videoout/video_out.h"
#include "core/libraries/videoout/videoout_error.h"
#include "core/platform.h"
#include "video_core/renderer_vulkan/vk_presenter.h"

extern std::unique_ptr<Vulkan::Presenter> presenter;
Expand Down Expand Up @@ -342,23 +341,6 @@ s32 PS4_SYSV_ABI sceVideoOutGetBufferLabelAddress(s32 handle, uintptr_t* label_a
return 16;
}

s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, s64 flip_arg, void** unk) {
auto* port = driver->GetPort(handle);
if (!port) {
return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE;
}

Platform::IrqC::Instance()->RegisterOnce(
Platform::InterruptId::GfxFlip, [=](Platform::InterruptId irq) {
ASSERT_MSG(irq == Platform::InterruptId::GfxFlip, "An unexpected IRQ occured");
ASSERT_MSG(port->buffer_labels[buf_id] == 1, "Out of order flip IRQ");
const auto result = driver->SubmitFlip(port, buf_id, flip_arg, true);
ASSERT_MSG(result, "EOP flip submission failed");
});

return ORBIS_OK;
}

s32 PS4_SYSV_ABI sceVideoOutGetDeviceCapabilityInfo(
s32 handle, SceVideoOutDeviceCapabilityInfo* pDeviceCapabilityInfo) {
pDeviceCapabilityInfo->capability = 0;
Expand Down
3 changes: 0 additions & 3 deletions src/core/libraries/videoout/video_out.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,9 +138,6 @@ s32 PS4_SYSV_ABI sceVideoOutGetEventData(const Kernel::OrbisKernelEvent* ev, s64
s32 PS4_SYSV_ABI sceVideoOutColorSettingsSetGamma(SceVideoOutColorSettings* settings, float gamma);
s32 PS4_SYSV_ABI sceVideoOutAdjustColor(s32 handle, const SceVideoOutColorSettings* settings);

// Internal system functions
s32 sceVideoOutSubmitEopFlip(s32 handle, u32 buf_id, u32 mode, s64 flip_arg, void** unk);

void RegisterLib(Core::Loader::SymbolsResolver* sym);

} // namespace Libraries::VideoOut
63 changes: 49 additions & 14 deletions src/video_core/amdgpu/liverpool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,9 @@
#include "core/debug_state.h"
#include "core/emulator_settings.h"
#include "core/libraries/kernel/process.h"
#include "core/libraries/kernel/time.h"
#include "core/libraries/videoout/driver.h"
#include "core/libraries/videoout/videoout_error.h"
#include "core/memory.h"
#include "core/platform.h"
#include "video_core/amdgpu/liverpool.h"
Expand Down Expand Up @@ -120,19 +122,37 @@ void Liverpool::Process(std::stop_token stoken) {
if (queue.submits.empty()) {
continue;
}
task = queue.submits.front();
task = queue.submits.front().task;
}
task.resume();

if (task.done()) {
task.destroy();
std::optional<FlipRequest> flip;
{
std::scoped_lock lock{queue.m_access};
flip = std::move(queue.submits.front().flip);
queue.submits.pop();
}

std::scoped_lock lock{queue.m_access};
queue.submits.pop();
task.destroy();

--num_submits;
std::scoped_lock lock2{submit_mutex};
submit_cv.notify_all();
{
std::scoped_lock lock2{submit_mutex};
submit_cv.notify_all();
}

// Perform flip after the submission completes.
auto* port = vo_port.load(std::memory_order_acquire);
auto* drv = vo_driver.load(std::memory_order_acquire);
if (flip && port && drv) {
ASSERT_MSG(flip->buf_id < Libraries::VideoOut::MaxDisplayBuffers,
"Invalid flip buffer index {}", flip->buf_id);
ASSERT_MSG(port->buffer_labels[flip->buf_id] == 1, "Out of order flip IRQ");
drv->EnqueueFlip(port, flip->buf_id, flip->flip_arg, true);
} else if (flip) {
LOG_WARNING(Lib_GnmDriver, "EOP flip dropped — VideoOut port is not available");
}
}
}

Expand Down Expand Up @@ -263,9 +283,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c

switch (nop->data_block[0]) {
case PM4CmdNop::PayloadType::PatchedFlip: {
// There is no evidence that GPU CP drives flip events by parsing
// special NOP packets. For convenience lets assume that it does.
Platform::IrqC::Instance()->Signal(Platform::InterruptId::GfxFlip);
// Flip is performed when the submission completes, not here.
break;
}
case PM4CmdNop::PayloadType::DebugMarkerPush: {
Expand Down Expand Up @@ -811,9 +829,10 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
// there are no other submits to yield to we can sleep the thread
// instead and allow other tasks to run.
const u64* wait_addr = wait_reg_mem->Address<u64*>();
if (vo_port->IsVoLabel(wait_addr) &&
auto* port = vo_port.load(std::memory_order_acquire);
if (port && port->IsVoLabel(wait_addr) &&
num_submits == mapped_queues[GfxQueueId].submits.size()) {
vo_port->WaitVoLabel([&] { return wait_reg_mem->Test(regs.reg_array); });
port->WaitVoLabel([&] { return wait_reg_mem->Test(regs.reg_array); });
break;
}
while (!wait_reg_mem->Test(regs.reg_array)) {
Expand Down Expand Up @@ -1199,7 +1218,23 @@ Liverpool::CmdBuffer Liverpool::CopyCmdBuffers(std::span<const u32> dcb, std::sp
return std::make_pair(dcb, ccb);
}

void Liverpool::SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb) {
s32 Liverpool::ReserveFlip() {
auto* port = vo_port.load(std::memory_order_acquire);
if (!port) {
return ORBIS_VIDEO_OUT_ERROR_INVALID_HANDLE;
}
std::unique_lock lock{port->port_mutex};
if (port->flip_status.flip_pending_num > 16) {
return ORBIS_VIDEO_OUT_ERROR_FLIP_QUEUE_FULL;
}
++port->flip_status.gc_queue_num;
++port->flip_status.flip_pending_num;
port->flip_status.submit_tsc = Libraries::Kernel::sceKernelReadTsc();
return ORBIS_OK;
}

void Liverpool::SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb,
std::optional<FlipRequest> flip) {
auto& queue = mapped_queues[GfxQueueId];

if (EmulatorSettings.IsCopyGpuBuffers()) {
Expand All @@ -1209,7 +1244,7 @@ void Liverpool::SubmitGfx(std::span<const u32> dcb, std::span<const u32> ccb) {
auto task = ProcessGraphics(dcb, ccb);
{
std::scoped_lock lock{queue.m_access};
queue.submits.emplace(task.handle);
queue.submits.push({task.handle, std::move(flip)});
}

std::scoped_lock lk{submit_mutex};
Expand All @@ -1225,7 +1260,7 @@ void Liverpool::SubmitAsc(u32 gnm_vqid, std::span<const u32> acb) {
const auto& task = ProcessCompute(acb, vqid);
{
std::scoped_lock lock{queue.m_access};
queue.submits.emplace(task.handle);
queue.submits.push({task.handle, std::nullopt});
}

std::scoped_lock lk{submit_mutex};
Expand Down
Loading
Loading