From 6c8ae19be66cee247980a48e736a4e05d14de179 Mon Sep 17 00:00:00 2001 From: 3gg <3gg@shellblade.net> Date: Tue, 2 Dec 2025 16:39:36 -0800 Subject: Immediate-mode renderer, triangle demo, shader compilation in cmake, Agility SDK --- .../inc/hlsl/vk/khr/cooperative_matrix.h | 275 +++++++++++++++ .../inc/hlsl/vk/khr/cooperative_matrix.impl | 377 +++++++++++++++++++++ 2 files changed, 652 insertions(+) create mode 100644 contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.h create mode 100644 contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.impl (limited to 'contrib/dxc_2025_07_14/inc/hlsl/vk/khr') diff --git a/contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.h b/contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.h new file mode 100644 index 0000000..a53ab4c --- /dev/null +++ b/contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.h @@ -0,0 +1,275 @@ +// Copyright (c) 2024 Google LLC +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#ifndef _HLSL_VK_KHR_COOPERATIVE_MATRIX_H_ +#define _HLSL_VK_KHR_COOPERATIVE_MATRIX_H_ + +#if __SPIRV_MAJOR_VERSION__ == 1 && __SPIRV_MINOR_VERSION__ < 6 +#error "CooperativeMatrix requires a minimum of SPIR-V 1.6" +#endif + +#include "vk/spirv.h" + +namespace vk { +namespace khr { + +// The base cooperative matrix class. The template arguments correspond to the +// operands in the OpTypeCooperativeMatrixKHR instruction. +template +class CooperativeMatrix { + template + CooperativeMatrix cast(); + + // Apply OpSNegate or OFNegate, depending on ComponentType, in a element by + // element manner. + CooperativeMatrix negate(); + + // Apply OpIAdd or OFAdd, depending on ComponentType, in a element by element + // manner. + CooperativeMatrix operator+(CooperativeMatrix other); + + // Apply OpISub or OFSub, depending on ComponentType, in a element by element + // manner. + CooperativeMatrix operator-(CooperativeMatrix other); + + // Apply OpIMul or OFMul, depending on ComponentType, in a element by element + // manner. + CooperativeMatrix operator*(CooperativeMatrix other); + + // Apply OpSDiv, OpUDiv or OFDiv, depending on ComponentType, in a element by + // element manner. + CooperativeMatrix operator/(CooperativeMatrix other); + + // Apply OpMatrixTimesScalar in a element by element manner. + CooperativeMatrix operator*(ComponentType scalar); + + // Store the cooperative matrix using OpCooperativeMatrixStoreKHR to + // data using the given memory layout, stride, and memory access operands. + // `NonPrivatePointer` and `MakePointerAvailable` with the workgroup scope + // will be added to the memory access operands to make the memory coherent. + // + // This function uses a SPIR-V pointer because HLSL does not allow groupshared + // memory object to be passed by reference. The pointer is a hack to get + // around that. + // + // The layout and stride will be passed to the SPIR-V instruction as is. The + // precise meaning can be found in the specification for + // SPV_KHR_cooperative_matrix. + template + void Store(WorkgroupSpirvPointer data, uint32_t stride); + + // Same as above, but uses MemoryAccessMaskNone for the memory access + // operands. + template + void Store(WorkgroupSpirvPointer data, uint32_t stride) { + Store(data, stride); + } + + // Store the cooperative matrix using OpCooperativeMatrixStoreKHR to + // data[index] using the given memory layout, stride, and memory access + // operands. The layout and stride will be passed to the SPIR-V instruction as + // is. The precise meaning can be found in the specification for + // SPV_KHR_cooperative_matrix. + template + void Store(RWStructuredBuffer data, uint32_t index, uint32_t stride); + + // Same as above, but uses MemoryAccessMaskNone for the memory access + // operands. + template + void Store(RWStructuredBuffer data, uint32_t index, uint32_t stride) { + Store(data, index, stride); + } + + // Store the cooperative matrix using OpCooperativeMatrixStoreKHR to + // data[index] using the given memory layout, stride, and memory access + // operands. `NonPrivatePointer` and `MakePointerAvailable` with the + // QueueFamily scope will be added to the memory access operands to make the + // memory coherent. + // + // The layout and stride will be passed to the SPIR-V instruction as is. The + // precise meaning can be found in the specification for + // SPV_KHR_cooperative_matrix. + template + void CoherentStore(globallycoherent RWStructuredBuffer data, + uint32_t index, uint32_t stride); + + // Same as above, but uses MemoryAccessMaskNone for the memory access operands + // template argument. + template + void CoherentStore(globallycoherent RWStructuredBuffer data, + uint32_t index, uint32_t stride) { + CoherentStore(data, index, stride); + } + + // Loads a cooperative matrix using OpCooperativeMatrixLoadKHR from + // data using the given memory layout, stride, and memory access operands. + // `NonPrivatePointer` and `MakePointerVisible` with the workgroup scope + // will be added to the memory access operands to make the memory coherent. + // + // This function uses a SPIR-V pointer because HLSL does not allow groupshared + // memory object to be passed by reference. The pointer is a hack to get + // around that. + // + // The layout and stride will be passed to the SPIR-V instruction as is. The + // precise meaning can be found in the specification for + // SPV_KHR_cooperative_matrix. + template + static CooperativeMatrix Load(WorkgroupSpirvPointer data, + uint32_t stride); + + // Same as above, but uses MemoryAccessMaskNone for the memory access + // operands. + template + static CooperativeMatrix Load(WorkgroupSpirvPointer data, + uint32_t stride) { + return Load(data, stride); + } + + // Loads a cooperative matrix using OpCooperativeMatrixLoadKHR from + // data[index] using the given memory layout, stride, and memory access + // operands. + // + // The layout and stride will be passed to the SPIR-V instruction as is. The + // precise meaning can be found in the specification for + // SPV_KHR_cooperative_matrix. + template + static CooperativeMatrix Load(RWStructuredBuffer data, uint32_t index, + uint32_t stride); + + // Same as above, but uses MemoryAccessMaskNone for the memory access + // operands. + template + static CooperativeMatrix Load(RWStructuredBuffer data, uint32_t index, + uint32_t stride) { + return Load(data, index, stride); + } + + // Loads a cooperative matrix using OpCooperativeMatrixLoadKHR from + // data[index] using the given memory layout, stride, and memory access + // operands. `NonPrivatePointer` and `MakePointerVisible` with the QueueFamily + // scope will be added to the memory access operands to make the memory + // coherent. + // + // + // The layout and stride will be passed to the SPIR-V instruction as is. The + // precise meaning can be found in the specification for + // SPV_KHR_cooperative_matrix. + template + static CooperativeMatrix + CoherentLoad(globallycoherent RWStructuredBuffer data, uint32_t index, + uint32_t stride); + + // Same as above, but uses MemoryAccessMaskNone for the memory access operands + // template argument. + template + static CooperativeMatrix + CoherentLoad(globallycoherent RWStructuredBuffer data, uint32_t index, + uint32_t stride) { + return CoherentLoad(data, index, stride); + } + + // Loads a cooperative matrix using OpCooperativeMatrixLoadKHR from + // data[index] using the given memory layout, stride, and memory access + // operands. No memory access bits are added to the operands. Since the memory + // is readonly, there should be no need. + // + // The layout and stride will be passed to the SPIR-V instruction as is. The + // precise meaning can be found in the specification for + // SPV_KHR_cooperative_matrix. + template + static CooperativeMatrix Load(StructuredBuffer data, uint32_t index, + uint32_t stride); + + // Same as above, but uses MemoryAccessMaskNone for the memory access + // operands. + template + static CooperativeMatrix Load(StructuredBuffer data, uint32_t index, + uint32_t stride) { + return Load(data, index, stride); + } + + // Constructs a cooperative matrix with all values initialized to v. Note that + // all threads in scope must have the same value for v. + static CooperativeMatrix Splat(ComponentType v); + + // Returns the result of OpCooperativeMatrixLengthKHR on the current type. + static uint32_t GetLength(); + + // Functions to access the elements of the cooperative matrix. The index must + // be less than GetLength(). + void Set(ComponentType value, uint32_t index); + ComponentType Get(uint32_t index); + + static const bool hasSignedIntegerComponentType = + (ComponentType(0) - ComponentType(1) < ComponentType(0)); + + // clang-format off + using SpirvMatrixType = vk::SpirvOpaqueType< + /* OpTypeCooperativeMatrixKHR */ 4456, ComponentType, + vk::integral_constant, vk::integral_constant, + vk::integral_constant, vk::integral_constant >; + + [[vk::ext_extension("SPV_KHR_cooperative_matrix")]] + [[vk::ext_capability(/* CooperativeMatrixKHRCapability */ 6022)]] + [[vk::ext_capability(/* VulkanMemoryModel */ 5345)]] + SpirvMatrixType _matrix; + // clang-format on +}; + +// Cooperative matrix that can be used in the "a" position of a multiply add +// instruction (r = (a * b) + c). +template +using CooperativeMatrixA = + CooperativeMatrix; + +// Cooperative matrix that can be used in the "b" position of a multiply add +// instruction (r = (a * b) + c). +template +using CooperativeMatrixB = + CooperativeMatrix; + +// Cooperative matrix that can be used in the "r" and "c" position of a multiply +// add instruction (r = (a * b) + c). +template +using CooperativeMatrixAccumulator = + CooperativeMatrix; + +// Returns the result of OpCooperativeMatrixMulAddKHR when applied to a, b, and +// c. The cooperative matrix operands are inferred, with the +// SaturatingAccumulationKHR bit not set. +template +CooperativeMatrixAccumulator +cooperativeMatrixMultiplyAdd( + CooperativeMatrixA a, + CooperativeMatrixB b, + CooperativeMatrixAccumulator c); + +// Returns the result of OpCooperativeMatrixMulAddKHR when applied to a, b, and +// c. The cooperative matrix operands are inferred, with the +// SaturatingAccumulationKHR bit set. +template +CooperativeMatrixAccumulator +cooperativeMatrixSaturatingMultiplyAdd( + CooperativeMatrixA a, + CooperativeMatrixB b, + CooperativeMatrixAccumulator c); + +} // namespace khr +} // namespace vk + +#include "cooperative_matrix.impl" +#endif // _HLSL_VK_KHR_COOPERATIVE_MATRIX_H_ diff --git a/contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.impl b/contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.impl new file mode 100644 index 0000000..2acae8e --- /dev/null +++ b/contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.impl @@ -0,0 +1,377 @@ +// Copyright (c) 2024 Google LLC +// +// This file is licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "vk/opcode_selector.h" + +template +[[vk::ext_instruction(/* OpMatrixTimesScalar */ 143)]] ResultType +__builtin_spv_MatrixTimesScalar(ResultType a, ComponentType b); + +template +[[vk::ext_instruction(/* OpCompositeExtract */ 81)]] ComponentType +__builtin_spv_ExtractFromCooperativeMatrix( + typename vk::khr::CooperativeMatrix::SpirvMatrixType matrix, + uint32_t index); + +template +[[vk::ext_instruction(/* OpCompositeConstruct */ 80)]] CoopMatrixType +__builtin_spv_ConstructCooperativeMatrix(ComponentType value); + +template +[[vk::ext_instruction(/* OpAccessChain */ 65)]] ResultPointerType +__builtin_spv_AccessChain([[vk::ext_reference]] BaseType base, uint32_t index); + +template +[[vk::ext_instruction(/* OpLoad */ 61)]] ObjectType +__builtin_spv_LoadPointer(PointerType base); + +template +[[vk::ext_instruction(/* OpLoad */ 62)]] void +__builtin_spv_StorePointer(PointerType base, ObjectType object); + +template +[[vk::ext_instruction(/* OpCompositeInsert */ 82)]] +typename vk::khr::CooperativeMatrix::SpirvMatrixType +__builtin_spv_InsertIntoCooperativeMatrix( + ComponentType value, + typename vk::khr::CooperativeMatrix::SpirvMatrixType matrix, + uint32_t index); + +// Define the load and store instructions +template +[[vk::ext_instruction(/* OpCooperativeMatrixLoadKHR */ 4457)]] ResultType +__builtin_spv_CooperativeMatrixLoadKHR( + [[vk::ext_reference]] PointerType pointer, + vk::CooperativeMatrixLayout memory_layout, uint stride, + [[vk::ext_literal]] uint32_t memory_operand); + +template +[[vk::ext_instruction(/* OpCooperativeMatrixLoadKHR */ 4457)]] ResultType +__builtin_spv_CooperativeMatrixLoadKHR( + [[vk::ext_reference]] PointerType pointer, + vk::CooperativeMatrixLayout memory_layout, uint stride, + [[vk::ext_literal]] uint32_t memory_operand, vk::Scope scope); + +template +[[vk::ext_instruction(/* OpCooperativeMatrixLoadKHR */ 4457)]] ResultType +__builtin_spv_CooperativeMatrixWorkgroupLoadKHR( + vk::WorkgroupSpirvPointer pointer, + vk::CooperativeMatrixLayout memory_layout, uint stride, + [[vk::ext_literal]] uint32_t memory_operand, vk::Scope scope); + +template +[[vk::ext_instruction(/* OpCooperativeMatrixStoreKHR */ 4458)]] void +__builtin_spv_CooperativeMatrixStoreKHR( + [[vk::ext_reference]] PointerType pointer, ObjectType object, + vk::CooperativeMatrixLayout memory_layout, uint stride, + [[vk::ext_literal]] uint32_t memory_operand, vk::Scope scope); + +template +[[vk::ext_instruction(/* OpCooperativeMatrixStoreKHR */ 4458)]] void +__builtin_spv_CooperativeMatrixStoreKHR( + [[vk::ext_reference]] PointerType pointer, ObjectType object, + vk::CooperativeMatrixLayout memory_layout, uint stride, + [[vk::ext_literal]] uint32_t memory_operand); + +template +[[vk::ext_instruction(/* OpCooperativeMatrixStoreKHR */ 4458)]] void +__builtin_spv_CooperativeMatrixWorkgroupStoreKHR( + vk::WorkgroupSpirvPointer pointer, ObjectType object, + vk::CooperativeMatrixLayout memory_layout, uint stride, + [[vk::ext_literal]] uint32_t memory_operand, vk::Scope scope); + +// We cannot define `OpCooperativeMatrixLengthKHR` using ext_instruction because +// one of the operands is a type id. This builtin will have specific code in the +// compiler to expand it. +template uint __builtin_spv_CooperativeMatrixLengthKHR(); + +// Arithmetic Instructions +template +[[vk::ext_instruction(/* OpCooperativeMatrixMulAddKHR */ 4459)]] ResultType +__builtin_spv_CooperativeMatrixMulAddKHR(MatrixTypeA a, MatrixTypeB b, + MatrixTypeC c, + [[vk::ext_literal]] int operands); +namespace vk { +namespace khr { + +template +template +CooperativeMatrix +CooperativeMatrix::cast() { + using ResultType = + CooperativeMatrix; + ResultType result; + result._matrix = util::ConversionSelector:: + template Convert(_matrix); + return result; +} + +template +CooperativeMatrix +CooperativeMatrix::negate() { + CooperativeMatrix result; + result._matrix = util::ArithmeticSelector::Negate(_matrix); + return result; +} + +template +CooperativeMatrix +CooperativeMatrix::operator+( + CooperativeMatrix other) { + CooperativeMatrix result; + result._matrix = + util::ArithmeticSelector::Add(_matrix, other._matrix); + return result; +} + +template +CooperativeMatrix +CooperativeMatrix::operator-( + CooperativeMatrix other) { + CooperativeMatrix result; + result._matrix = + util::ArithmeticSelector::Sub(_matrix, other._matrix); + return result; +} + +template +CooperativeMatrix +CooperativeMatrix::operator*( + CooperativeMatrix other) { + CooperativeMatrix result; + result._matrix = + util::ArithmeticSelector::Mul(_matrix, other._matrix); + return result; +} + +template +CooperativeMatrix +CooperativeMatrix::operator/( + CooperativeMatrix other) { + CooperativeMatrix result; + result._matrix = + util::ArithmeticSelector::Div(_matrix, other._matrix); + return result; +} + +template +CooperativeMatrix +CooperativeMatrix::operator*( + ComponentType scalar) { + CooperativeMatrix result; + result._matrix = __builtin_spv_MatrixTimesScalar(_matrix, scalar); + return result; +} + +template +template +void CooperativeMatrix::Store( + WorkgroupSpirvPointer data, uint32_t stride) { + __builtin_spv_CooperativeMatrixWorkgroupStoreKHR( + data, _matrix, layout, stride, + memoryAccessOperands | MemoryAccessNonPrivatePointerMask | + MemoryAccessMakePointerAvailableMask, + ScopeWorkgroup); +} + +template +template +void CooperativeMatrix::Store( + RWStructuredBuffer data, uint32_t index, uint32_t stride) { + __builtin_spv_CooperativeMatrixStoreKHR(data[index], _matrix, layout, stride, + memoryAccessOperands); +} + +template +template +void CooperativeMatrix::CoherentStore( + globallycoherent RWStructuredBuffer data, uint32_t index, + uint32_t stride) { + __builtin_spv_CooperativeMatrixStoreKHR( + data[index], _matrix, layout, stride, + memoryAccessOperands | MemoryAccessNonPrivatePointerMask | + MemoryAccessMakePointerAvailableMask, + ScopeQueueFamily); +} + +template +template +CooperativeMatrix +CooperativeMatrix::Load( + vk::WorkgroupSpirvPointer buffer, uint32_t stride) { + CooperativeMatrix result; + result._matrix = + __builtin_spv_CooperativeMatrixWorkgroupLoadKHR( + buffer, layout, stride, + memoryAccessOperands | MemoryAccessNonPrivatePointerMask | + MemoryAccessMakePointerVisibleMask, + ScopeWorkgroup); + return result; +} + +template +template +CooperativeMatrix +CooperativeMatrix::Load( + RWStructuredBuffer buffer, uint32_t index, uint32_t stride) { + CooperativeMatrix result; + result._matrix = __builtin_spv_CooperativeMatrixLoadKHR( + buffer[index], layout, stride, memoryAccessOperands); + return result; +} + +template +template +CooperativeMatrix +CooperativeMatrix::CoherentLoad( + RWStructuredBuffer buffer, uint32_t index, uint32_t stride) { + CooperativeMatrix result; + result._matrix = __builtin_spv_CooperativeMatrixLoadKHR( + buffer[index], layout, stride, + memoryAccessOperands | MemoryAccessNonPrivatePointerMask | + MemoryAccessMakePointerVisibleMask, + ScopeQueueFamily); + return result; +} + +template +template +CooperativeMatrix +CooperativeMatrix::Load( + StructuredBuffer buffer, uint32_t index, uint32_t stride) { + CooperativeMatrix result; + result._matrix = __builtin_spv_CooperativeMatrixLoadKHR( + buffer[index], layout, stride, MemoryAccessMaskNone); + return result; +} + +template +CooperativeMatrix +CooperativeMatrix::Splat( + ComponentType v) { + CooperativeMatrix result; + result._matrix = __builtin_spv_ConstructCooperativeMatrix(v); + return result; +} + +template +uint CooperativeMatrix::GetLength() { + return __builtin_spv_CooperativeMatrixLengthKHR(); +} + +template +ComponentType CooperativeMatrix::Get( + uint32_t index) { + // clang-format off + using ComponentPtr = vk::SpirvOpaqueType< + /* OpTypePointer */ 32, + /* function storage class */ vk::Literal >, + ComponentType>; + // clang-format on + ComponentPtr ptr = __builtin_spv_AccessChain(_matrix, index); + return __builtin_spv_LoadPointer(ptr); +} + +template +void CooperativeMatrix::Set( + ComponentType value, uint32_t index) { + // clang-format off + using ComponentPtr = vk::SpirvOpaqueType< + /* OpTypePointer */ 32, + /* function storage class */ vk::Literal >, + ComponentType>; + // clang-format on + ComponentPtr ptr = __builtin_spv_AccessChain(_matrix, index); + return __builtin_spv_StorePointer(ptr, value); +} + +template +CooperativeMatrixAccumulator +cooperativeMatrixMultiplyAdd( + CooperativeMatrixA a, + CooperativeMatrixB b, + CooperativeMatrixAccumulator c) { + + const vk::CooperativeMatrixOperandsMask allSignedComponents = + vk::CooperativeMatrixOperandsMatrixASignedComponentsKHRMask | + vk::CooperativeMatrixOperandsMatrixBSignedComponentsKHRMask | + vk::CooperativeMatrixOperandsMatrixCSignedComponentsKHRMask | + vk::CooperativeMatrixOperandsMatrixResultSignedComponentsKHRMask; + + const vk::CooperativeMatrixOperandsMask operands = + (vk::CooperativeMatrixOperandsMask)( + a.hasSignedIntegerComponentType + ? allSignedComponents + : vk::CooperativeMatrixOperandsMaskNone); + + CooperativeMatrixAccumulator result; + result._matrix = __builtin_spv_CooperativeMatrixMulAddKHR< + typename CooperativeMatrixAccumulator::SpirvMatrixType>( + a._matrix, b._matrix, c._matrix, operands); + return result; +} + +template +CooperativeMatrixAccumulator +cooperativeMatrixSaturatingMultiplyAdd( + CooperativeMatrixA a, + CooperativeMatrixB b, + CooperativeMatrixAccumulator c) { + + const vk::CooperativeMatrixOperandsMask allSignedComponents = + vk::CooperativeMatrixOperandsMatrixASignedComponentsKHRMask | + vk::CooperativeMatrixOperandsMatrixBSignedComponentsKHRMask | + vk::CooperativeMatrixOperandsMatrixCSignedComponentsKHRMask | + vk::CooperativeMatrixOperandsMatrixResultSignedComponentsKHRMask | + vk::CooperativeMatrixOperandsSaturatingAccumulationKHRMask; + + const vk::CooperativeMatrixOperandsMask operands = + (vk::CooperativeMatrixOperandsMask)( + a.hasSignedIntegerComponentType + ? allSignedComponents + : vk::CooperativeMatrixOperandsSaturatingAccumulationKHRMask); + CooperativeMatrixAccumulator result; + result._matrix = __builtin_spv_CooperativeMatrixMulAddKHR< + typename CooperativeMatrixAccumulator::SpirvMatrixType>( + a._matrix, b._matrix, c._matrix, operands); + return result; +} + +} // namespace khr +} // namespace vk -- cgit v1.2.3