Vulkan计算着色器开发Skill vulkan-compute

Vulkan计算着色器开发技能是一个专门用于GPU并行计算的工具,专注于Vulkan API下的计算着色器编程。该技能提供完整的计算管线配置方案,包括GLSL/HLSL着色器代码生成、SPIR-V字节码编译、描述符集管理、内存屏障同步等核心功能。适用于高性能计算、图形渲染、AI推理加速、科学模拟等需要GPU并行处理的场景。关键词:Vulkan计算着色器,GPU并行计算,SPIR-V编译,计算管线配置,描述符集管理,内存屏障同步,高性能计算,图形编程。

游戏开发 0 次安装 0 次浏览 更新于 2/25/2026

name: vulkan-compute description: Vulkan计算着色器开发与管线配置。生成GLSL/HLSL计算着色器,编译为SPIR-V,配置计算管线,管理描述符集与资源绑定,实现内存屏障与同步。 allowed-tools: Bash(*) 读取 写入 编辑 全局搜索 文本搜索 网络获取 metadata: author: babysitter-sdk version: “1.0.0” category: compute-shaders backlog-id: SK-004

vulkan-compute

你是 vulkan-compute - 一个专门用于Vulkan计算着色器开发和管线配置的技能。该技能提供使用Vulkan API进行GPU计算的专家级能力。

概述

此技能支持AI驱动的Vulkan计算操作,包括:

  • 生成GLSL/HLSL计算着色器
  • 将着色器编译为SPIR-V字节码
  • 配置Vulkan计算管线
  • 管理描述符集和资源绑定
  • 处理推送常量和特化常量
  • 配置工作组维度和调度
  • 实现内存屏障和同步
  • 支持Vulkan验证层进行调试

前提条件

  • Vulkan SDK 1.3+
  • glslangValidator 或 glslc (SPIR-V编译器)
  • SPIRV-Tools (可选)
  • 支持Vulkan的GPU

能力

1. GLSL计算着色器生成

生成GLSL计算着色器:

#version 450

// 工作组大小规格
layout(local_size_x = 256, local_size_y = 1, local_size_z = 1) in;

// 缓冲区绑定
layout(set = 0, binding = 0) readonly buffer InputBuffer {
    float inputData[];
};

layout(set = 0, binding = 1) writeonly buffer OutputBuffer {
    float outputData[];
};

// 推送常量用于运行时参数
layout(push_constant) uniform PushConstants {
    uint dataSize;
    float multiplier;
} pc;

void main() {
    uint gid = gl_GlobalInvocationID.x;

    if (gid < pc.dataSize) {
        outputData[gid] = inputData[gid] * pc.multiplier;
    }
}

2. SPIR-V编译

将着色器编译为SPIR-V:

# 使用glslangValidator
glslangValidator -V compute.glsl -o compute.spv

# 使用glslc (Google编译器)
glslc -fshader-stage=compute compute.glsl -o compute.spv

# 带优化
glslc -O compute.glsl -o compute.spv

# 生成人类可读的SPIR-V
spirv-dis compute.spv -o compute.spvasm

# 验证SPIR-V
spirv-val compute.spv

# 优化SPIR-V
spirv-opt -O compute.spv -o compute_opt.spv

3. 计算管线创建

创建Vulkan计算管线:

// 加载SPIR-V着色器
VkShaderModuleCreateInfo shaderInfo = {
    .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
    .codeSize = spirvSize,
    .pCode = spirvCode
};
VkShaderModule shaderModule;
vkCreateShaderModule(device, &shaderInfo, NULL, &shaderModule);

// 带描述符集和推送常量的管线布局
VkPushConstantRange pushConstantRange = {
    .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
    .offset = 0,
    .size = sizeof(PushConstants)
};

VkPipelineLayoutCreateInfo layoutInfo = {
    .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
    .setLayoutCount = 1,
    .pSetLayouts = &descriptorSetLayout,
    .pushConstantRangeCount = 1,
    .pPushConstantRanges = &pushConstantRange
};
VkPipelineLayout pipelineLayout;
vkCreatePipelineLayout(device, &layoutInfo, NULL, &pipelineLayout);

// 创建计算管线
VkComputePipelineCreateInfo pipelineInfo = {
    .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
    .stage = {
        .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
        .stage = VK_SHADER_STAGE_COMPUTE_BIT,
        .module = shaderModule,
        .pName = "main"
    },
    .layout = pipelineLayout
};
VkPipeline computePipeline;
vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &pipelineInfo, NULL, &computePipeline);

4. 描述符集管理

配置资源绑定:

// 描述符集布局
VkDescriptorSetLayoutBinding bindings[] = {
    {
        .binding = 0,
        .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
        .descriptorCount = 1,
        .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT
    },
    {
        .binding = 1,
        .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
        .descriptorCount = 1,
        .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT
    }
};

VkDescriptorSetLayoutCreateInfo layoutInfo = {
    .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
    .bindingCount = 2,
    .pBindings = bindings
};
VkDescriptorSetLayout descriptorSetLayout;
vkCreateDescriptorSetLayout(device, &layoutInfo, NULL, &descriptorSetLayout);

// 分配和更新描述符集
VkDescriptorBufferInfo inputBufferInfo = {
    .buffer = inputBuffer,
    .offset = 0,
    .range = VK_WHOLE_SIZE
};

VkDescriptorBufferInfo outputBufferInfo = {
    .buffer = outputBuffer,
    .offset = 0,
    .range = VK_WHOLE_SIZE
};

VkWriteDescriptorSet writes[] = {
    {
        .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
        .dstSet = descriptorSet,
        .dstBinding = 0,
        .descriptorCount = 1,
        .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
        .pBufferInfo = &inputBufferInfo
    },
    {
        .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
        .dstSet = descriptorSet,
        .dstBinding = 1,
        .descriptorCount = 1,
        .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
        .pBufferInfo = &outputBufferInfo
    }
};
vkUpdateDescriptorSets(device, 2, writes, 0, NULL);

5. 特化常量

运行时着色器定制:

// 在着色器中
layout(constant_id = 0) const uint WORKGROUP_SIZE = 256;
layout(constant_id = 1) const bool USE_FAST_MATH = false;

layout(local_size_x_id = 0) in;
// 在C代码中
VkSpecializationMapEntry entries[] = {
    {0, 0, sizeof(uint32_t)},  // WORKGROUP_SIZE
    {1, sizeof(uint32_t), sizeof(VkBool32)}  // USE_FAST_MATH
};

struct {
    uint32_t workgroupSize;
    VkBool32 useFastMath;
} specData = {512, VK_TRUE};

VkSpecializationInfo specInfo = {
    .mapEntryCount = 2,
    .pMapEntries = entries,
    .dataSize = sizeof(specData),
    .pData = &specData
};

// 在管线创建中使用
pipelineInfo.stage.pSpecializationInfo = &specInfo;

6. 计算调度

执行计算工作:

// 记录命令缓冲区
vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, computePipeline);
vkCmdBindDescriptorSets(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE,
    pipelineLayout, 0, 1, &descriptorSet, 0, NULL);
vkCmdPushConstants(commandBuffer, pipelineLayout, VK_SHADER_STAGE_COMPUTE_BIT,
    0, sizeof(PushConstants), &pushConstants);

// 调度
uint32_t groupCountX = (dataSize + 255) / 256;
vkCmdDispatch(commandBuffer, groupCountX, 1, 1);

// 间接调度
vkCmdDispatchIndirect(commandBuffer, indirectBuffer, 0);

7. 内存屏障与同步

正确同步:

// 缓冲区内存屏障
VkBufferMemoryBarrier barrier = {
    .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
    .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
    .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
    .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
    .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
    .buffer = buffer,
    .offset = 0,
    .size = VK_WHOLE_SIZE
};

vkCmdPipelineBarrier(commandBuffer,
    VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
    VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
    0, 0, NULL, 1, &barrier, 0, NULL);

// 计算到传输的内存屏障
VkMemoryBarrier memoryBarrier = {
    .sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER,
    .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
    .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT
};

vkCmdPipelineBarrier(commandBuffer,
    VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
    VK_PIPELINE_STAGE_TRANSFER_BIT,
    0, 1, &memoryBarrier, 0, NULL, 0, NULL);

8. 验证层

使用验证层调试:

// 启用验证层
const char* validationLayers[] = {
    "VK_LAYER_KHRONOS_validation"
};

VkInstanceCreateInfo createInfo = {
    .enabledLayerCount = 1,
    .ppEnabledLayerNames = validationLayers
};

// 调试消息回调
VkDebugUtilsMessengerCreateInfoEXT debugInfo = {
    .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
    .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
                       VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
    .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
                   VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
    .pfnUserCallback = debugCallback
};

流程集成

此技能与以下流程集成:

  • compute-shader-development.js - 计算着色器工作流

输出格式

{
  "operation": "compile-shader",
  "status": "success",
  "input": "compute.glsl",
  "output": "compute.spv",
  "spirv_size": 1024,
  "workgroup_size": [256, 1, 1],
  "bindings": [
    {"binding": 0, "type": "storage_buffer", "access": "readonly"},
    {"binding": 1, "type": "storage_buffer", "access": "writeonly"}
  ],
  "push_constants_size": 8,
  "artifacts": ["compute.spv", "compute.spvasm"]
}

依赖项

  • Vulkan SDK 1.3+
  • glslangValidator 或 glslc
  • SPIRV-Tools (可选)

约束

  • 工作组大小受设备限制 (通常为1024个线程)
  • 描述符集数量有限 (通常为4个)
  • 推送常量大小有限 (128+字节)
  • SPIR-V版本必须与Vulkan版本匹配