From 6c8ae19be66cee247980a48e736a4e05d14de179 Mon Sep 17 00:00:00 2001
From: 3gg <3gg@shellblade.net>
Date: Tue, 2 Dec 2025 16:39:36 -0800
Subject: Immediate-mode renderer, triangle demo, shader compilation in cmake,
 Agility SDK

---
 .../inc/hlsl/vk/khr/cooperative_matrix.h           | 275 +++++++++++++++
 .../inc/hlsl/vk/khr/cooperative_matrix.impl        | 377 +++++++++++++++++++++
 2 files changed, 652 insertions(+)
 create mode 100644 contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.h
 create mode 100644 contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.impl

(limited to 'contrib/dxc_2025_07_14/inc/hlsl/vk/khr')
diff --git a/contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.h b/contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.h
new file mode 100644
index 0000000..a53ab4c
--- /dev/null
+++ b/contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.h
@@ -0,0 +1,275 @@
+// Copyright (c) 2024 Google LLC
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef _HLSL_VK_KHR_COOPERATIVE_MATRIX_H_
+#define _HLSL_VK_KHR_COOPERATIVE_MATRIX_H_
+
+#if __SPIRV_MAJOR_VERSION__ == 1 && __SPIRV_MINOR_VERSION__ < 6
+#error "CooperativeMatrix requires a minimum of SPIR-V 1.6"
+#endif
+
+#include "vk/spirv.h"
+
+namespace vk {
+namespace khr {
+
+// The base cooperative matrix class. The template arguments correspond to the
+// operands in the OpTypeCooperativeMatrixKHR instruction.
+template <typename ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+class CooperativeMatrix {
+  template <class NewComponentType>
+  CooperativeMatrix<NewComponentType, scope, rows, columns, use> cast();
+
+  // Apply OpSNegate or OFNegate, depending on ComponentType, in a element by
+  // element manner.
+  CooperativeMatrix negate();
+
+  // Apply OpIAdd or OFAdd, depending on ComponentType, in a element by element
+  // manner.
+  CooperativeMatrix operator+(CooperativeMatrix other);
+
+  // Apply OpISub or OFSub, depending on ComponentType, in a element by element
+  // manner.
+  CooperativeMatrix operator-(CooperativeMatrix other);
+
+  // Apply OpIMul or OFMul, depending on ComponentType, in a element by element
+  // manner.
+  CooperativeMatrix operator*(CooperativeMatrix other);
+
+  // Apply OpSDiv, OpUDiv or OFDiv, depending on ComponentType, in a element by
+  // element manner.
+  CooperativeMatrix operator/(CooperativeMatrix other);
+
+  // Apply OpMatrixTimesScalar in a element by element manner.
+  CooperativeMatrix operator*(ComponentType scalar);
+
+  // Store the cooperative matrix using OpCooperativeMatrixStoreKHR to
+  // data using the given memory layout, stride, and memory access operands.
+  // `NonPrivatePointer` and `MakePointerAvailable` with the workgroup scope
+  // will be added to the memory access operands to make the memory coherent.
+  //
+  // This function uses a SPIR-V pointer because HLSL does not allow groupshared
+  // memory object to be passed by reference. The pointer is a hack to get
+  // around that.
+  //
+  // The layout and stride will be passed to the SPIR-V instruction as is. The
+  // precise meaning can be found in the specification for
+  // SPV_KHR_cooperative_matrix.
+  template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+            class Type>
+  void Store(WorkgroupSpirvPointer<Type> data, uint32_t stride);
+
+  // Same as above, but uses MemoryAccessMaskNone for the memory access
+  // operands.
+  template <CooperativeMatrixLayout layout, class Type>
+  void Store(WorkgroupSpirvPointer<Type> data, uint32_t stride) {
+    Store<MemoryAccessMaskNone, layout>(data, stride);
+  }
+
+  // Store the cooperative matrix using OpCooperativeMatrixStoreKHR to
+  // data[index] using the given memory layout, stride, and memory access
+  // operands. The layout and stride will be passed to the SPIR-V instruction as
+  // is. The precise meaning can be found in the specification for
+  // SPV_KHR_cooperative_matrix.
+  template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+            class Type>
+  void Store(RWStructuredBuffer<Type> data, uint32_t index, uint32_t stride);
+
+  // Same as above, but uses MemoryAccessMaskNone for the memory access
+  // operands.
+  template <CooperativeMatrixLayout layout, class Type>
+  void Store(RWStructuredBuffer<Type> data, uint32_t index, uint32_t stride) {
+    Store<MemoryAccessMaskNone, layout>(data, index, stride);
+  }
+
+  // Store the cooperative matrix using OpCooperativeMatrixStoreKHR to
+  // data[index] using the given memory layout, stride, and memory access
+  // operands. `NonPrivatePointer` and `MakePointerAvailable` with the
+  // QueueFamily scope will be added to the memory access operands to make the
+  // memory coherent.
+  //
+  // The layout and stride will be passed to the SPIR-V instruction as is. The
+  // precise meaning can be found in the specification for
+  // SPV_KHR_cooperative_matrix.
+  template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+            class Type>
+  void CoherentStore(globallycoherent RWStructuredBuffer<Type> data,
+                     uint32_t index, uint32_t stride);
+
+  // Same as above, but uses MemoryAccessMaskNone for the memory access operands
+  // template argument.
+  template <CooperativeMatrixLayout layout, class Type>
+  void CoherentStore(globallycoherent RWStructuredBuffer<Type> data,
+                     uint32_t index, uint32_t stride) {
+    CoherentStore<MemoryAccessMaskNone, layout>(data, index, stride);
+  }
+
+  // Loads a cooperative matrix using OpCooperativeMatrixLoadKHR from
+  // data using the given memory layout, stride, and memory access operands.
+  // `NonPrivatePointer` and `MakePointerVisible` with the workgroup scope
+  // will be added to the memory access operands to make the memory coherent.
+  //
+  // This function uses a SPIR-V pointer because HLSL does not allow groupshared
+  // memory object to be passed by reference. The pointer is a hack to get
+  // around that.
+  //
+  // The layout and stride will be passed to the SPIR-V instruction as is. The
+  // precise meaning can be found in the specification for
+  // SPV_KHR_cooperative_matrix.
+  template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+            class Type>
+  static CooperativeMatrix Load(WorkgroupSpirvPointer<Type> data,
+                                uint32_t stride);
+
+  // Same as above, but uses MemoryAccessMaskNone for the memory access
+  // operands.
+  template <CooperativeMatrixLayout layout, class Type>
+  static CooperativeMatrix Load(WorkgroupSpirvPointer<Type> data,
+                                uint32_t stride) {
+    return Load<MemoryAccessMaskNone, layout>(data, stride);
+  }
+
+  // Loads a cooperative matrix using OpCooperativeMatrixLoadKHR from
+  // data[index] using the given memory layout, stride, and memory access
+  // operands.
+  //
+  // The layout and stride will be passed to the SPIR-V instruction as is. The
+  // precise meaning can be found in the specification for
+  // SPV_KHR_cooperative_matrix.
+  template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+            class Type>
+  static CooperativeMatrix Load(RWStructuredBuffer<Type> data, uint32_t index,
+                                uint32_t stride);
+
+  // Same as above, but uses MemoryAccessMaskNone for the memory access
+  // operands.
+  template <CooperativeMatrixLayout layout, class Type>
+  static CooperativeMatrix Load(RWStructuredBuffer<Type> data, uint32_t index,
+                                uint32_t stride) {
+    return Load<MemoryAccessMaskNone, layout>(data, index, stride);
+  }
+
+  // Loads a cooperative matrix using OpCooperativeMatrixLoadKHR from
+  // data[index] using the given memory layout, stride, and memory access
+  // operands. `NonPrivatePointer` and `MakePointerVisible` with the QueueFamily
+  // scope will be added to the memory access operands to make the memory
+  // coherent.
+  //
+  //
+  // The layout and stride will be passed to the SPIR-V instruction as is. The
+  // precise meaning can be found in the specification for
+  // SPV_KHR_cooperative_matrix.
+  template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+            class Type>
+  static CooperativeMatrix
+  CoherentLoad(globallycoherent RWStructuredBuffer<Type> data, uint32_t index,
+               uint32_t stride);
+
+  // Same as above, but uses MemoryAccessMaskNone for the memory access operands
+  // template argument.
+  template <CooperativeMatrixLayout layout, class Type>
+  static CooperativeMatrix
+  CoherentLoad(globallycoherent RWStructuredBuffer<Type> data, uint32_t index,
+               uint32_t stride) {
+    return CoherentLoad<MemoryAccessMaskNone, layout>(data, index, stride);
+  }
+
+  // Loads a cooperative matrix using OpCooperativeMatrixLoadKHR from
+  // data[index] using the given memory layout, stride, and memory access
+  // operands. No memory access bits are added to the operands. Since the memory
+  // is readonly, there should be no need.
+  //
+  // The layout and stride will be passed to the SPIR-V instruction as is. The
+  // precise meaning can be found in the specification for
+  // SPV_KHR_cooperative_matrix.
+  template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+            class Type>
+  static CooperativeMatrix Load(StructuredBuffer<Type> data, uint32_t index,
+                                uint32_t stride);
+
+  // Same as above, but uses MemoryAccessMaskNone for the memory access
+  // operands.
+  template <CooperativeMatrixLayout layout, class Type>
+  static CooperativeMatrix Load(StructuredBuffer<Type> data, uint32_t index,
+                                uint32_t stride) {
+    return Load<MemoryAccessMaskNone, layout>(data, index, stride);
+  }
+
+  // Constructs a cooperative matrix with all values initialized to v. Note that
+  // all threads in scope must have the same value for v.
+  static CooperativeMatrix Splat(ComponentType v);
+
+  // Returns the result of OpCooperativeMatrixLengthKHR on the current type.￼
+  static uint32_t GetLength();
+
+  // Functions to access the elements of the cooperative matrix. The index must
+  // be less than GetLength().
+  void Set(ComponentType value, uint32_t index);
+  ComponentType Get(uint32_t index);
+
+  static const bool hasSignedIntegerComponentType =
+      (ComponentType(0) - ComponentType(1) < ComponentType(0));
+
+  // clang-format off
+  using SpirvMatrixType = vk::SpirvOpaqueType<
+      /* OpTypeCooperativeMatrixKHR */ 4456, ComponentType,
+      vk::integral_constant<uint, scope>, vk::integral_constant<uint, rows>,
+      vk::integral_constant<uint, columns>, vk::integral_constant<uint, use> >;
+
+  [[vk::ext_extension("SPV_KHR_cooperative_matrix")]]
+  [[vk::ext_capability(/* CooperativeMatrixKHRCapability */ 6022)]]
+  [[vk::ext_capability(/* VulkanMemoryModel */ 5345)]]
+  SpirvMatrixType _matrix;
+  // clang-format on
+};
+
+// Cooperative matrix that can be used in the "a" position of a multiply add
+// instruction (r = (a * b) + c).
+template <typename ComponentType, Scope scope, uint rows, uint columns>
+using CooperativeMatrixA =
+    CooperativeMatrix<ComponentType, scope, rows, columns,
+                      CooperativeMatrixUseMatrixAKHR>;
+
+// Cooperative matrix that can be used in the "b" position of a multiply add
+// instruction (r = (a * b) + c).
+template <typename ComponentType, Scope scope, uint rows, uint columns>
+using CooperativeMatrixB =
+    CooperativeMatrix<ComponentType, scope, rows, columns,
+                      CooperativeMatrixUseMatrixBKHR>;
+
+// Cooperative matrix that can be used in the "r" and "c" position of a multiply
+// add instruction (r = (a * b) + c).
+template <typename ComponentType, Scope scope, uint rows, uint columns>
+using CooperativeMatrixAccumulator =
+    CooperativeMatrix<ComponentType, scope, rows, columns,
+                      CooperativeMatrixUseMatrixAccumulatorKHR>;
+
+// Returns the result of OpCooperativeMatrixMulAddKHR when applied to a, b, and
+// c. The cooperative matrix operands are inferred, with the
+// SaturatingAccumulationKHR bit not set.
+template <typename ComponentType, Scope scope, uint rows, uint columns, uint K>
+CooperativeMatrixAccumulator<ComponentType, scope, rows, columns>
+cooperativeMatrixMultiplyAdd(
+    CooperativeMatrixA<ComponentType, scope, rows, K> a,
+    CooperativeMatrixB<ComponentType, scope, K, columns> b,
+    CooperativeMatrixAccumulator<ComponentType, scope, rows, columns> c);
+
+// Returns the result of OpCooperativeMatrixMulAddKHR when applied to a, b, and
+// c. The cooperative matrix operands are inferred, with the
+// SaturatingAccumulationKHR bit set.
+template <typename ComponentType, Scope scope, uint rows, uint columns, uint K>
+CooperativeMatrixAccumulator<ComponentType, scope, rows, columns>
+cooperativeMatrixSaturatingMultiplyAdd(
+    CooperativeMatrixA<ComponentType, scope, rows, K> a,
+    CooperativeMatrixB<ComponentType, scope, K, columns> b,
+    CooperativeMatrixAccumulator<ComponentType, scope, rows, columns> c);
+
+} // namespace khr
+} // namespace vk
+
+#include "cooperative_matrix.impl"
+#endif // _HLSL_VK_KHR_COOPERATIVE_MATRIX_H_
diff --git a/contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.impl b/contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.impl
new file mode 100644
index 0000000..2acae8e
--- /dev/null
+++ b/contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.impl
@@ -0,0 +1,377 @@
+// Copyright (c) 2024 Google LLC
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "vk/opcode_selector.h"
+
+template <typename ResultType, typename ComponentType>
+[[vk::ext_instruction(/* OpMatrixTimesScalar */ 143)]] ResultType
+__builtin_spv_MatrixTimesScalar(ResultType a, ComponentType b);
+
+template <typename ComponentType, vk::Scope scope, uint rows, uint columns,
+          vk::CooperativeMatrixUse use>
+[[vk::ext_instruction(/* OpCompositeExtract */ 81)]] ComponentType
+__builtin_spv_ExtractFromCooperativeMatrix(
+    typename vk::khr::CooperativeMatrix<ComponentType, scope, rows, columns,
+                                        use>::SpirvMatrixType matrix,
+    uint32_t index);
+
+template <typename CoopMatrixType, typename ComponentType>
+[[vk::ext_instruction(/* OpCompositeConstruct */ 80)]] CoopMatrixType
+__builtin_spv_ConstructCooperativeMatrix(ComponentType value);
+
+template <class ResultPointerType, class BaseType>
+[[vk::ext_instruction(/* OpAccessChain */ 65)]] ResultPointerType
+__builtin_spv_AccessChain([[vk::ext_reference]] BaseType base, uint32_t index);
+
+template <class ObjectType, class PointerType>
+[[vk::ext_instruction(/* OpLoad */ 61)]] ObjectType
+__builtin_spv_LoadPointer(PointerType base);
+
+template <class PointerType, class ObjectType>
+[[vk::ext_instruction(/* OpLoad */ 62)]] void
+__builtin_spv_StorePointer(PointerType base, ObjectType object);
+
+template <typename ComponentType, vk::Scope scope, uint rows, uint columns,
+          vk::CooperativeMatrixUse use>
+[[vk::ext_instruction(/* OpCompositeInsert */ 82)]]
+typename vk::khr::CooperativeMatrix<ComponentType, scope, rows, columns,
+                                    use>::SpirvMatrixType
+__builtin_spv_InsertIntoCooperativeMatrix(
+    ComponentType value,
+    typename vk::khr::CooperativeMatrix<ComponentType, scope, rows, columns,
+                                        use>::SpirvMatrixType matrix,
+    uint32_t index);
+
+// Define the load and store instructions
+template <typename ResultType, typename PointerType>
+[[vk::ext_instruction(/* OpCooperativeMatrixLoadKHR */ 4457)]] ResultType
+__builtin_spv_CooperativeMatrixLoadKHR(
+    [[vk::ext_reference]] PointerType pointer,
+    vk::CooperativeMatrixLayout memory_layout, uint stride,
+    [[vk::ext_literal]] uint32_t memory_operand);
+
+template <typename ResultType, typename PointerType>
+[[vk::ext_instruction(/* OpCooperativeMatrixLoadKHR */ 4457)]] ResultType
+__builtin_spv_CooperativeMatrixLoadKHR(
+    [[vk::ext_reference]] PointerType pointer,
+    vk::CooperativeMatrixLayout memory_layout, uint stride,
+    [[vk::ext_literal]] uint32_t memory_operand, vk::Scope scope);
+
+template <typename ResultType, typename PointerType>
+[[vk::ext_instruction(/* OpCooperativeMatrixLoadKHR */ 4457)]] ResultType
+__builtin_spv_CooperativeMatrixWorkgroupLoadKHR(
+    vk::WorkgroupSpirvPointer<PointerType> pointer,
+    vk::CooperativeMatrixLayout memory_layout, uint stride,
+    [[vk::ext_literal]] uint32_t memory_operand, vk::Scope scope);
+
+template <typename ObjectType, typename PointerType>
+[[vk::ext_instruction(/* OpCooperativeMatrixStoreKHR */ 4458)]] void
+__builtin_spv_CooperativeMatrixStoreKHR(
+    [[vk::ext_reference]] PointerType pointer, ObjectType object,
+    vk::CooperativeMatrixLayout memory_layout, uint stride,
+    [[vk::ext_literal]] uint32_t memory_operand, vk::Scope scope);
+
+template <typename ObjectType, typename PointerType>
+[[vk::ext_instruction(/* OpCooperativeMatrixStoreKHR */ 4458)]] void
+__builtin_spv_CooperativeMatrixStoreKHR(
+    [[vk::ext_reference]] PointerType pointer, ObjectType object,
+    vk::CooperativeMatrixLayout memory_layout, uint stride,
+    [[vk::ext_literal]] uint32_t memory_operand);
+
+template <typename ObjectType, typename PointerType>
+[[vk::ext_instruction(/* OpCooperativeMatrixStoreKHR */ 4458)]] void
+__builtin_spv_CooperativeMatrixWorkgroupStoreKHR(
+    vk::WorkgroupSpirvPointer<PointerType> pointer, ObjectType object,
+    vk::CooperativeMatrixLayout memory_layout, uint stride,
+    [[vk::ext_literal]] uint32_t memory_operand, vk::Scope scope);
+
+// We cannot define `OpCooperativeMatrixLengthKHR` using ext_instruction because
+// one of the operands is a type id. This builtin will have specific code in the
+// compiler to expand it.
+template <class MatrixType> uint __builtin_spv_CooperativeMatrixLengthKHR();
+
+// Arithmetic Instructions
+template <typename ResultType, typename MatrixTypeA, typename MatrixTypeB,
+          typename MatrixTypeC>
+[[vk::ext_instruction(/* OpCooperativeMatrixMulAddKHR */ 4459)]] ResultType
+__builtin_spv_CooperativeMatrixMulAddKHR(MatrixTypeA a, MatrixTypeB b,
+                                         MatrixTypeC c,
+                                         [[vk::ext_literal]] int operands);
+namespace vk {
+namespace khr {
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+template <class NewComponentType>
+CooperativeMatrix<NewComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::cast() {
+  using ResultType =
+      CooperativeMatrix<NewComponentType, scope, rows, columns, use>;
+  ResultType result;
+  result._matrix = util::ConversionSelector<ComponentType, NewComponentType>::
+      template Convert<typename ResultType::SpirvMatrixType>(_matrix);
+  return result;
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::negate() {
+  CooperativeMatrix result;
+  result._matrix = util::ArithmeticSelector<ComponentType>::Negate(_matrix);
+  return result;
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::operator+(
+    CooperativeMatrix other) {
+  CooperativeMatrix result;
+  result._matrix =
+      util::ArithmeticSelector<ComponentType>::Add(_matrix, other._matrix);
+  return result;
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::operator-(
+    CooperativeMatrix other) {
+  CooperativeMatrix result;
+  result._matrix =
+      util::ArithmeticSelector<ComponentType>::Sub(_matrix, other._matrix);
+  return result;
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::operator*(
+    CooperativeMatrix other) {
+  CooperativeMatrix result;
+  result._matrix =
+      util::ArithmeticSelector<ComponentType>::Mul(_matrix, other._matrix);
+  return result;
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::operator/(
+    CooperativeMatrix other) {
+  CooperativeMatrix result;
+  result._matrix =
+      util::ArithmeticSelector<ComponentType>::Div(_matrix, other._matrix);
+  return result;
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::operator*(
+    ComponentType scalar) {
+  CooperativeMatrix result;
+  result._matrix = __builtin_spv_MatrixTimesScalar(_matrix, scalar);
+  return result;
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+          class Type>
+void CooperativeMatrix<ComponentType, scope, rows, columns, use>::Store(
+    WorkgroupSpirvPointer<Type> data, uint32_t stride) {
+  __builtin_spv_CooperativeMatrixWorkgroupStoreKHR(
+      data, _matrix, layout, stride,
+      memoryAccessOperands | MemoryAccessNonPrivatePointerMask |
+          MemoryAccessMakePointerAvailableMask,
+      ScopeWorkgroup);
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+          class Type>
+void CooperativeMatrix<ComponentType, scope, rows, columns, use>::Store(
+    RWStructuredBuffer<Type> data, uint32_t index, uint32_t stride) {
+  __builtin_spv_CooperativeMatrixStoreKHR(data[index], _matrix, layout, stride,
+                                          memoryAccessOperands);
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+          class Type>
+void CooperativeMatrix<ComponentType, scope, rows, columns, use>::CoherentStore(
+    globallycoherent RWStructuredBuffer<Type> data, uint32_t index,
+    uint32_t stride) {
+  __builtin_spv_CooperativeMatrixStoreKHR(
+      data[index], _matrix, layout, stride,
+      memoryAccessOperands | MemoryAccessNonPrivatePointerMask |
+          MemoryAccessMakePointerAvailableMask,
+      ScopeQueueFamily);
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+          class Type>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::Load(
+    vk::WorkgroupSpirvPointer<Type> buffer, uint32_t stride) {
+  CooperativeMatrix result;
+  result._matrix =
+      __builtin_spv_CooperativeMatrixWorkgroupLoadKHR<SpirvMatrixType>(
+          buffer, layout, stride,
+          memoryAccessOperands | MemoryAccessNonPrivatePointerMask |
+              MemoryAccessMakePointerVisibleMask,
+          ScopeWorkgroup);
+  return result;
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+          class Type>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::Load(
+    RWStructuredBuffer<Type> buffer, uint32_t index, uint32_t stride) {
+  CooperativeMatrix result;
+  result._matrix = __builtin_spv_CooperativeMatrixLoadKHR<SpirvMatrixType>(
+      buffer[index], layout, stride, memoryAccessOperands);
+  return result;
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+          class Type>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::CoherentLoad(
+    RWStructuredBuffer<Type> buffer, uint32_t index, uint32_t stride) {
+  CooperativeMatrix result;
+  result._matrix = __builtin_spv_CooperativeMatrixLoadKHR<SpirvMatrixType>(
+      buffer[index], layout, stride,
+      memoryAccessOperands | MemoryAccessNonPrivatePointerMask |
+          MemoryAccessMakePointerVisibleMask,
+      ScopeQueueFamily);
+  return result;
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+          class Type>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::Load(
+    StructuredBuffer<Type> buffer, uint32_t index, uint32_t stride) {
+  CooperativeMatrix result;
+  result._matrix = __builtin_spv_CooperativeMatrixLoadKHR<SpirvMatrixType>(
+      buffer[index], layout, stride, MemoryAccessMaskNone);
+  return result;
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::Splat(
+    ComponentType v) {
+  CooperativeMatrix result;
+  result._matrix = __builtin_spv_ConstructCooperativeMatrix<SpirvMatrixType>(v);
+  return result;
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+uint CooperativeMatrix<ComponentType, scope, rows, columns, use>::GetLength() {
+  return __builtin_spv_CooperativeMatrixLengthKHR<SpirvMatrixType>();
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+ComponentType CooperativeMatrix<ComponentType, scope, rows, columns, use>::Get(
+    uint32_t index) {
+  // clang-format off
+  using ComponentPtr = vk::SpirvOpaqueType<
+      /* OpTypePointer */ 32,
+      /* function storage class */ vk::Literal<vk::integral_constant<uint, 7> >,
+      ComponentType>;
+  // clang-format on
+  ComponentPtr ptr = __builtin_spv_AccessChain<ComponentPtr>(_matrix, index);
+  return __builtin_spv_LoadPointer<ComponentType>(ptr);
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+void CooperativeMatrix<ComponentType, scope, rows, columns, use>::Set(
+    ComponentType value, uint32_t index) {
+  // clang-format off
+  using ComponentPtr = vk::SpirvOpaqueType<
+      /* OpTypePointer */ 32,
+      /* function storage class */ vk::Literal<vk::integral_constant<uint, 7> >,
+      ComponentType>;
+  // clang-format on
+  ComponentPtr ptr = __builtin_spv_AccessChain<ComponentPtr>(_matrix, index);
+  return __builtin_spv_StorePointer(ptr, value);
+}
+
+template <typename ComponentType, Scope scope, uint rows, uint columns, uint K>
+CooperativeMatrixAccumulator<ComponentType, scope, rows, columns>
+cooperativeMatrixMultiplyAdd(
+    CooperativeMatrixA<ComponentType, scope, rows, K> a,
+    CooperativeMatrixB<ComponentType, scope, K, columns> b,
+    CooperativeMatrixAccumulator<ComponentType, scope, rows, columns> c) {
+
+  const vk::CooperativeMatrixOperandsMask allSignedComponents =
+      vk::CooperativeMatrixOperandsMatrixASignedComponentsKHRMask |
+      vk::CooperativeMatrixOperandsMatrixBSignedComponentsKHRMask |
+      vk::CooperativeMatrixOperandsMatrixCSignedComponentsKHRMask |
+      vk::CooperativeMatrixOperandsMatrixResultSignedComponentsKHRMask;
+
+  const vk::CooperativeMatrixOperandsMask operands =
+      (vk::CooperativeMatrixOperandsMask)(
+          a.hasSignedIntegerComponentType
+              ? allSignedComponents
+              : vk::CooperativeMatrixOperandsMaskNone);
+
+  CooperativeMatrixAccumulator<ComponentType, scope, rows, columns> result;
+  result._matrix = __builtin_spv_CooperativeMatrixMulAddKHR<
+      typename CooperativeMatrixAccumulator<ComponentType, scope, rows,
+                                            columns>::SpirvMatrixType>(
+      a._matrix, b._matrix, c._matrix, operands);
+  return result;
+}
+
+template <typename ComponentType, Scope scope, uint rows, uint columns, uint K>
+CooperativeMatrixAccumulator<ComponentType, scope, rows, columns>
+cooperativeMatrixSaturatingMultiplyAdd(
+    CooperativeMatrixA<ComponentType, scope, rows, K> a,
+    CooperativeMatrixB<ComponentType, scope, K, columns> b,
+    CooperativeMatrixAccumulator<ComponentType, scope, rows, columns> c) {
+
+  const vk::CooperativeMatrixOperandsMask allSignedComponents =
+      vk::CooperativeMatrixOperandsMatrixASignedComponentsKHRMask |
+      vk::CooperativeMatrixOperandsMatrixBSignedComponentsKHRMask |
+      vk::CooperativeMatrixOperandsMatrixCSignedComponentsKHRMask |
+      vk::CooperativeMatrixOperandsMatrixResultSignedComponentsKHRMask |
+      vk::CooperativeMatrixOperandsSaturatingAccumulationKHRMask;
+
+  const vk::CooperativeMatrixOperandsMask operands =
+      (vk::CooperativeMatrixOperandsMask)(
+          a.hasSignedIntegerComponentType
+              ? allSignedComponents
+              : vk::CooperativeMatrixOperandsSaturatingAccumulationKHRMask);
+  CooperativeMatrixAccumulator<ComponentType, scope, rows, columns> result;
+  result._matrix = __builtin_spv_CooperativeMatrixMulAddKHR<
+      typename CooperativeMatrixAccumulator<ComponentType, scope, rows,
+                                            columns>::SpirvMatrixType>(
+      a._matrix, b._matrix, c._matrix, operands);
+  return result;
+}
+
+} // namespace khr
+} // namespace vk
-- 
cgit v1.2.3