Skip to content
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions antora/modules/ROOT/nav.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
** xref:samples/extensions/buffer_device_address/README.adoc[Buffer device address]
** xref:samples/extensions/calibrated_timestamps/README.adoc[Calibrated timestamps]
** xref:samples/extensions/conditional_rendering/README.adoc[Conditional rendering]
** xref:samples/extensions/compute_shader_derivatives/README.adoc[Compute shader derivatives]
** xref:samples/extensions/conservative_rasterization/README.adoc[Conservative rasterization]
** xref:samples/extensions/debug_utils/README.adoc[Debug utils]
** xref:samples/extensions/descriptor_buffer_basic/README.adoc[Descriptor buffer basic]
Expand Down
6 changes: 6 additions & 0 deletions framework/vulkan_type_mapping.h
Original file line number Diff line number Diff line change
Expand Up @@ -266,6 +266,12 @@ struct HPPType<VkPhysicalDeviceTimelineSemaphoreFeaturesKHR>
using Type = vk::PhysicalDeviceTimelineSemaphoreFeaturesKHR;
};

template <>
struct HPPType<VkPhysicalDeviceComputeShaderDerivativesFeaturesKHR>
{
using Type = vk::PhysicalDeviceComputeShaderDerivativesFeaturesKHR;
};

template <>
struct HPPType<VkPhysicalDeviceVertexInputDynamicStateFeaturesEXT>
{
Expand Down
8 changes: 7 additions & 1 deletion samples/extensions/README.adoc
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
////
- Copyright (c) 2021-2024, The Khronos Group
- Copyright (c) 2021-2025, The Khronos Group
-
- SPDX-License-Identifier: Apache-2.0
-
Expand Down Expand Up @@ -302,3 +302,9 @@ Demonstrate the use of the host image extension to directly copy from a host buf
*Extensions:* https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_extended_dynamic_state3.html[`VK_EXT_line_rasterization`], https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VK_EXT_extended_dynamic_state3.html[`VK_EXT_extended_dynamic_state3`]

Demonstrate how to use dynamic multisample rasterization (MSAA)

=== xref:./{extension_samplespath}compute_shader_derivatives/README.adoc[Compute shader derivatives]

*Extension*: https://docs.vulkan.org/features/latest/features/proposals/VK_KHR_compute_shader_derivatives.html[`VK_KHR_compute_shader_derivatives`]

Demonstrate how to use derivatives (dFdx/dFdy) in compute shaders via derivative groups and how to request/enable the corresponding device feature.
30 changes: 30 additions & 0 deletions samples/extensions/compute_shader_derivatives/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright (c) 2025, Holochip Inc.

# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 the "License";
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

get_filename_component(FOLDER_NAME ${CMAKE_CURRENT_LIST_DIR} NAME)
get_filename_component(PARENT_DIR ${CMAKE_CURRENT_LIST_DIR} PATH)
get_filename_component(CATEGORY_NAME ${PARENT_DIR} NAME)

add_sample(
ID ${FOLDER_NAME}
CATEGORY ${CATEGORY_NAME}
AUTHOR "Holochip"
NAME "Compute shader derivatives"
DESCRIPTION "Demonstrates VK_KHR_compute_shader_derivatives with a minimal compute dispatch using dFdx/dFdy in compute"
SHADER_FILES_SLANG
"compute_shader_derivatives/slang/derivatives.comp.slang"
)
46 changes: 46 additions & 0 deletions samples/extensions/compute_shader_derivatives/README.adoc
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
////
- Copyright (c) 2025, Holochip Inc.
-
- SPDX-License-Identifier: Apache-2.0
-
- Licensed under the Apache License, Version 2.0 the "License";
- you may not use this file except in compliance with the License.
- You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-
////
= VK_KHR_compute_shader_derivatives — Derivatives in compute shaders

This sample demonstrates VK_KHR_compute_shader_derivatives, which enables the use of derivative instructions (like dFdx/dFdy) inside compute shaders. Traditionally, derivatives are only available in fragment shaders, but this extension defines derivative groups in compute and how invocations are paired for derivative computations.

== What is it?
- SPIR-V: The companion SPIR-V extension allows derivative instructions in the Compute execution model.
- Vulkan: The device feature is exposed via `VkPhysicalDeviceComputeShaderDerivativesFeaturesKHR` with two booleans:
* `computeDerivativeGroupQuads` — enables quad-based derivative groups.
* `computeDerivativeGroupLinear` — enables linearly mapped derivative groups.
- GLSL: Use `#extension GL_KHR_compute_shader_derivatives : enable` and a layout qualifier to choose the grouping:
* `layout(derivative_group_quadsNV) in;`
* `layout(derivative_group_linearNV) in;`
(The `NV` suffix is retained in the GLSL tokens for compatibility.)

== Why/when to use it
- Port algorithms that rely on derivatives (e.g., LOD selection, filtering, gradients) to compute for flexibility or performance.
- Keep consistent behavior with fragment-stage derivatives by choosing an appropriate grouping mode (quads vs. linear).

== What this sample does
- Requests and requires the feature `computeDerivativeGroupQuads`.
- Builds a minimal compute pipeline with a shader that calls `dFdx`/`dFdy` in compute.
- Runs a small per-frame command buffer that dispatches once and then transitions the swapchain image to `PRESENT` so presentation is validation-clean. The compute shader has no resource bindings; it exists to demonstrate that derivative instructions are accepted and execute in compute.

== Required Vulkan extensions and features
- Instance extension: `VK_KHR_get_physical_device_properties2` (for feature chaining).
- Device extension: `VK_KHR_compute_shader_derivatives` (required).
- Device feature: `VkPhysicalDeviceComputeShaderDerivativesFeaturesKHR::computeDerivativeGroupQuads = VK_TRUE`.

Original file line number Diff line number Diff line change
@@ -0,0 +1,269 @@
/* Copyright (c) 2025, Holochip Inc.
*
* SPDX-License-Identifier: Apache-2.0
*
* Licensed under the Apache License, Version 2.0 the "License";
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "compute_shader_derivatives.h"

#include "common/vk_common.h"
#include "common/vk_initializers.h"
#include "core/util/logging.hpp"

ComputeShaderDerivatives::ComputeShaderDerivatives()
{
title = "Compute shader derivatives (VK_KHR_compute_shader_derivatives)";

// Use Vulkan 1.2 instance so SPIR-V 1.4 modules produced by Slang are valid under validation
set_api_version(VK_API_VERSION_1_2);

// Needed for feature chaining
add_instance_extension(VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME);
// Device extension providing the feature
add_device_extension(VK_KHR_COMPUTE_SHADER_DERIVATIVES_EXTENSION_NAME);
// Toolchains may still emit SPV_NV_compute_shader_derivatives; enable NV extension if available to satisfy validation
add_device_extension(VK_NV_COMPUTE_SHADER_DERIVATIVES_EXTENSION_NAME, /*optional*/ true);
}

ComputeShaderDerivatives::~ComputeShaderDerivatives()
{
if (has_device())
{
VkDevice device = get_device().get_handle();

if (compute_pipeline)
{
vkDestroyPipeline(device, compute_pipeline, nullptr);
}
if (pipeline_layout)
{
vkDestroyPipelineLayout(device, pipeline_layout, nullptr);
}
if (descriptor_pool)
{
vkDestroyDescriptorPool(device, descriptor_pool, nullptr);
}
if (descriptor_set_layout)
{
vkDestroyDescriptorSetLayout(device, descriptor_set_layout, nullptr);
}
if (result_buffer)
{
vkDestroyBuffer(device, result_buffer, nullptr);
}
if (result_memory)
{
vkFreeMemory(device, result_memory, nullptr);
}
}
}

void ComputeShaderDerivatives::create_output_buffer_and_descriptors()
{
auto device = get_device().get_handle();

// Create host-visible buffer to store 16 float4 entries
VkBufferCreateInfo buf_ci{VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO};
buf_ci.size = result_size;
buf_ci.usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT;
buf_ci.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
VK_CHECK(vkCreateBuffer(device, &buf_ci, nullptr, &result_buffer));

VkMemoryRequirements mem_req{};
vkGetBufferMemoryRequirements(device, result_buffer, &mem_req);

VkMemoryAllocateInfo alloc_info{VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO};
alloc_info.allocationSize = mem_req.size;
alloc_info.memoryTypeIndex = get_device().get_gpu().get_memory_type(mem_req.memoryTypeBits, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
VK_CHECK(vkAllocateMemory(device, &alloc_info, nullptr, &result_memory));
VK_CHECK(vkBindBufferMemory(device, result_buffer, result_memory, 0));

// Create descriptor pool for one storage buffer descriptor
VkDescriptorPoolSize pool_size{};
pool_size.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
pool_size.descriptorCount = 1;

VkDescriptorPoolCreateInfo pool_ci{VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO};
pool_ci.maxSets = 1;
pool_ci.poolSizeCount = 1;
pool_ci.pPoolSizes = &pool_size;
VK_CHECK(vkCreateDescriptorPool(device, &pool_ci, nullptr, &descriptor_pool));
}

void ComputeShaderDerivatives::request_gpu_features(vkb::core::PhysicalDeviceC &gpu)
{
// Require quads derivative group (the sample shader uses layout(derivative_group_quadsNV/derivative_group_quads_khr))
REQUEST_REQUIRED_FEATURE(gpu, VkPhysicalDeviceComputeShaderDerivativesFeaturesKHR, computeDerivativeGroupQuads);
// Users may switch to the linear mode by changing the shader qualifier
}

void ComputeShaderDerivatives::create_compute_pipeline()
{
auto device = get_device().get_handle();

// Descriptor set layout: binding 0 as storage buffer for results
VkDescriptorSetLayoutBinding binding{};
binding.binding = 0;
binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
binding.descriptorCount = 1;
binding.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT;
binding.pImmutableSamplers = nullptr;

VkDescriptorSetLayoutCreateInfo set_layout_ci{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO};
set_layout_ci.bindingCount = 1;
set_layout_ci.pBindings = &binding;
VK_CHECK(vkCreateDescriptorSetLayout(device, &set_layout_ci, nullptr, &descriptor_set_layout));

// Pipeline layout uses the descriptor set layout at set 0
VkPipelineLayoutCreateInfo layout_ci{VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO};
layout_ci.setLayoutCount = 1;
layout_ci.pSetLayouts = &descriptor_set_layout;
VK_CHECK(vkCreatePipelineLayout(device, &layout_ci, nullptr, &pipeline_layout));

// Allocate and update descriptor set now that layout exists
VkDescriptorSetAllocateInfo alloc_info{VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO};
alloc_info.descriptorPool = descriptor_pool;
alloc_info.descriptorSetCount = 1;
alloc_info.pSetLayouts = &descriptor_set_layout;
VK_CHECK(vkAllocateDescriptorSets(device, &alloc_info, &descriptor_set));

VkDescriptorBufferInfo buffer_info{};
buffer_info.buffer = result_buffer;
buffer_info.offset = 0;
buffer_info.range = result_size;

VkWriteDescriptorSet write{VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET};
write.dstSet = descriptor_set;
write.dstBinding = 0;
write.dstArrayElement = 0;
write.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
write.descriptorCount = 1;
write.pBufferInfo = &buffer_info;
vkUpdateDescriptorSets(device, 1, &write, 0, nullptr);

// Load compute shader explicitly from slang path to ensure SPV_KHR_compute_shader_derivatives is used
VkPipelineShaderStageCreateInfo stage = load_shader("compute_shader_derivatives/slang/derivatives.comp.spv", VK_SHADER_STAGE_COMPUTE_BIT);

VkComputePipelineCreateInfo compute_ci{VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO};
compute_ci.stage = stage;
compute_ci.layout = pipeline_layout;
VK_CHECK(vkCreateComputePipelines(device, pipeline_cache, 1, &compute_ci, nullptr, &compute_pipeline));
}

bool ComputeShaderDerivatives::prepare(const vkb::ApplicationOptions &options)
{
if (!ApiVulkanSample::prepare(options))
{
return false;
}

// Create buffer + descriptors first, then the pipeline/layout that reference the set layout
create_output_buffer_and_descriptors();
create_compute_pipeline();

prepared = true;
return true;
}

void ComputeShaderDerivatives::build_command_buffers()
{
// Not used; this sample records per-frame in render()
}

void ComputeShaderDerivatives::render(float delta_time)
{
if (!prepared)
{
return;
}

// Acquire swapchain image and signal acquired_image_ready
prepare_frame();

// Recreate and record the current frame's command buffer
recreate_current_command_buffer();
VkCommandBuffer cmd = draw_cmd_buffers[current_buffer];

VkCommandBufferBeginInfo begin_info = vkb::initializers::command_buffer_begin_info();
VK_CHECK(vkBeginCommandBuffer(cmd, &begin_info));

// Dispatch a single workgroup; shader has local_size 4x4
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, compute_pipeline);
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline_layout, 0, 1, &descriptor_set, 0, nullptr);
vkCmdDispatch(cmd, 1, 1, 1);

// Transition the acquired swapchain image to PRESENT so presentation is valid
VkImageSubresourceRange range{};
range.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
range.baseMipLevel = 0;
range.levelCount = 1;
range.baseArrayLayer = 0;
range.layerCount = 1;
vkb::image_layout_transition(cmd,
swapchain_buffers[current_buffer].image,
VK_IMAGE_LAYOUT_UNDEFINED,
VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
range);

VK_CHECK(vkEndCommandBuffer(cmd));

// Submit: wait on acquire semaphore, signal render_complete for present
VkPipelineStageFlags wait_stage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
VkSubmitInfo submit_info{};
submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
submit_info.waitSemaphoreCount = 1;
submit_info.pWaitSemaphores = &semaphores.acquired_image_ready;
submit_info.pWaitDstStageMask = &wait_stage;
submit_info.commandBufferCount = 1;
submit_info.pCommandBuffers = &cmd;
submit_info.signalSemaphoreCount = 1;
submit_info.pSignalSemaphores = &semaphores.render_complete;

{
auto &queue = get_device().get_queue_by_present(0);
VkQueue q = static_cast<VkQueue>(queue.get_handle());
VK_CHECK(vkQueueSubmit(q, 1, &submit_info, VK_NULL_HANDLE));

// One-time readback and print results after the compute dispatch completes
if (!printed_once)
{
VK_CHECK(vkQueueWaitIdle(q));
void *mapped = nullptr;
VK_CHECK(vkMapMemory(get_device().get_handle(), result_memory, 0, result_size, 0, &mapped));
float *data = static_cast<float *>(mapped);
// Each entry is float4: v, ddx, ddy, pad
for (uint32_t y = 0; y < 4; ++y)
{
for (uint32_t x = 0; x < 4; ++x)
{
uint32_t idx = y * 4 + x;
float v = data[idx * 4 + 0];
float ddx = data[idx * 4 + 1];
float ddy = data[idx * 4 + 2];
LOGI("compute-derivatives CPU: tid=({}, {}) v={} ddx={} ddy={}", x, y, v, ddx, ddy);
}
}
vkUnmapMemory(get_device().get_handle(), result_memory);
printed_once = true;
}
}

// Present (waits on render_complete)
submit_frame();
}

std::unique_ptr<vkb::Application> create_compute_shader_derivatives()
{
return std::make_unique<ComputeShaderDerivatives>();
}
Loading
Loading