4 files changed, 964 insertions, 0 deletions
diff --git a/contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.h b/contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.h
new file mode 100644
index 0000000..a53ab4c
--- /dev/null
+++ b/contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.h
@@ -0,0 +1,275 @@
+// Copyright (c) 2024 Google LLC
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#ifndef _HLSL_VK_KHR_COOPERATIVE_MATRIX_H_
+#define _HLSL_VK_KHR_COOPERATIVE_MATRIX_H_
+#if __SPIRV_MAJOR_VERSION__ == 1 && __SPIRV_MINOR_VERSION__ < 6
+#error "CooperativeMatrix requires a minimum of SPIR-V 1.6"
+#endif
+#include "vk/spirv.h"
+namespace vk {
+namespace khr {
+// The base cooperative matrix class. The template arguments correspond to the
+// operands in the OpTypeCooperativeMatrixKHR instruction.
+template <typename ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+class CooperativeMatrix {
+  template <class NewComponentType>
+  CooperativeMatrix<NewComponentType, scope, rows, columns, use> cast();
+  // Apply OpSNegate or OFNegate, depending on ComponentType, in a element by
+  // element manner.
+  CooperativeMatrix negate();
+  // Apply OpIAdd or OFAdd, depending on ComponentType, in a element by element
+  // manner.
+  CooperativeMatrix operator+(CooperativeMatrix other);
+  // Apply OpISub or OFSub, depending on ComponentType, in a element by element
+  // manner.
+  CooperativeMatrix operator-(CooperativeMatrix other);
+  // Apply OpIMul or OFMul, depending on ComponentType, in a element by element
+  // manner.
+  CooperativeMatrix operator*(CooperativeMatrix other);
+  // Apply OpSDiv, OpUDiv or OFDiv, depending on ComponentType, in a element by
+  // element manner.
+  CooperativeMatrix operator/(CooperativeMatrix other);
+  // Apply OpMatrixTimesScalar in a element by element manner.
+  CooperativeMatrix operator*(ComponentType scalar);
+  // Store the cooperative matrix using OpCooperativeMatrixStoreKHR to
+  // data using the given memory layout, stride, and memory access operands.
+  // `NonPrivatePointer` and `MakePointerAvailable` with the workgroup scope
+  // will be added to the memory access operands to make the memory coherent.
+  //
+  // This function uses a SPIR-V pointer because HLSL does not allow groupshared
+  // memory object to be passed by reference. The pointer is a hack to get
+  // around that.
+  //
+  // The layout and stride will be passed to the SPIR-V instruction as is. The
+  // precise meaning can be found in the specification for
+  // SPV_KHR_cooperative_matrix.
+  template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+            class Type>
+  void Store(WorkgroupSpirvPointer<Type> data, uint32_t stride);
+  // Same as above, but uses MemoryAccessMaskNone for the memory access
+  // operands.
+  template <CooperativeMatrixLayout layout, class Type>
+  void Store(WorkgroupSpirvPointer<Type> data, uint32_t stride) {
+    Store<MemoryAccessMaskNone, layout>(data, stride);
+  }
+  // Store the cooperative matrix using OpCooperativeMatrixStoreKHR to
+  // data[index] using the given memory layout, stride, and memory access
+  // operands. The layout and stride will be passed to the SPIR-V instruction as
+  // is. The precise meaning can be found in the specification for
+  // SPV_KHR_cooperative_matrix.
+  template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+            class Type>
+  void Store(RWStructuredBuffer<Type> data, uint32_t index, uint32_t stride);
+  // Same as above, but uses MemoryAccessMaskNone for the memory access
+  // operands.
+  template <CooperativeMatrixLayout layout, class Type>
+  void Store(RWStructuredBuffer<Type> data, uint32_t index, uint32_t stride) {
+    Store<MemoryAccessMaskNone, layout>(data, index, stride);
+  }
+  // Store the cooperative matrix using OpCooperativeMatrixStoreKHR to
+  // data[index] using the given memory layout, stride, and memory access
+  // operands. `NonPrivatePointer` and `MakePointerAvailable` with the
+  // QueueFamily scope will be added to the memory access operands to make the
+  // memory coherent.
+  //
+  // The layout and stride will be passed to the SPIR-V instruction as is. The
+  // precise meaning can be found in the specification for
+  // SPV_KHR_cooperative_matrix.
+  template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+            class Type>
+  void CoherentStore(globallycoherent RWStructuredBuffer<Type> data,
+                     uint32_t index, uint32_t stride);
+  // Same as above, but uses MemoryAccessMaskNone for the memory access operands
+  // template argument.
+  template <CooperativeMatrixLayout layout, class Type>
+  void CoherentStore(globallycoherent RWStructuredBuffer<Type> data,
+                     uint32_t index, uint32_t stride) {
+    CoherentStore<MemoryAccessMaskNone, layout>(data, index, stride);
+  }
+  // Loads a cooperative matrix using OpCooperativeMatrixLoadKHR from
+  // data using the given memory layout, stride, and memory access operands.
+  // `NonPrivatePointer` and `MakePointerVisible` with the workgroup scope
+  // will be added to the memory access operands to make the memory coherent.
+  //
+  // This function uses a SPIR-V pointer because HLSL does not allow groupshared
+  // memory object to be passed by reference. The pointer is a hack to get
+  // around that.
+  //
+  // The layout and stride will be passed to the SPIR-V instruction as is. The
+  // precise meaning can be found in the specification for
+  // SPV_KHR_cooperative_matrix.
+  template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+            class Type>
+  static CooperativeMatrix Load(WorkgroupSpirvPointer<Type> data,
+                                uint32_t stride);
+  // Same as above, but uses MemoryAccessMaskNone for the memory access
+  // operands.
+  template <CooperativeMatrixLayout layout, class Type>
+  static CooperativeMatrix Load(WorkgroupSpirvPointer<Type> data,
+                                uint32_t stride) {
+    return Load<MemoryAccessMaskNone, layout>(data, stride);
+  }
+  // Loads a cooperative matrix using OpCooperativeMatrixLoadKHR from
+  // data[index] using the given memory layout, stride, and memory access
+  // operands.
+  //
+  // The layout and stride will be passed to the SPIR-V instruction as is. The
+  // precise meaning can be found in the specification for
+  // SPV_KHR_cooperative_matrix.
+  template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+            class Type>
+  static CooperativeMatrix Load(RWStructuredBuffer<Type> data, uint32_t index,
+                                uint32_t stride);
+  // Same as above, but uses MemoryAccessMaskNone for the memory access
+  // operands.
+  template <CooperativeMatrixLayout layout, class Type>
+  static CooperativeMatrix Load(RWStructuredBuffer<Type> data, uint32_t index,
+                                uint32_t stride) {
+    return Load<MemoryAccessMaskNone, layout>(data, index, stride);
+  }
+  // Loads a cooperative matrix using OpCooperativeMatrixLoadKHR from
+  // data[index] using the given memory layout, stride, and memory access
+  // operands. `NonPrivatePointer` and `MakePointerVisible` with the QueueFamily
+  // scope will be added to the memory access operands to make the memory
+  // coherent.
+  //
+  //
+  // The layout and stride will be passed to the SPIR-V instruction as is. The
+  // precise meaning can be found in the specification for
+  // SPV_KHR_cooperative_matrix.
+  template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+            class Type>
+  static CooperativeMatrix
+  CoherentLoad(globallycoherent RWStructuredBuffer<Type> data, uint32_t index,
+               uint32_t stride);
+  // Same as above, but uses MemoryAccessMaskNone for the memory access operands
+  // template argument.
+  template <CooperativeMatrixLayout layout, class Type>
+  static CooperativeMatrix
+  CoherentLoad(globallycoherent RWStructuredBuffer<Type> data, uint32_t index,
+               uint32_t stride) {
+    return CoherentLoad<MemoryAccessMaskNone, layout>(data, index, stride);
+  }
+  // Loads a cooperative matrix using OpCooperativeMatrixLoadKHR from
+  // data[index] using the given memory layout, stride, and memory access
+  // operands. No memory access bits are added to the operands. Since the memory
+  // is readonly, there should be no need.
+  //
+  // The layout and stride will be passed to the SPIR-V instruction as is. The
+  // precise meaning can be found in the specification for
+  // SPV_KHR_cooperative_matrix.
+  template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+            class Type>
+  static CooperativeMatrix Load(StructuredBuffer<Type> data, uint32_t index,
+                                uint32_t stride);
+  // Same as above, but uses MemoryAccessMaskNone for the memory access
+  // operands.
+  template <CooperativeMatrixLayout layout, class Type>
+  static CooperativeMatrix Load(StructuredBuffer<Type> data, uint32_t index,
+                                uint32_t stride) {
+    return Load<MemoryAccessMaskNone, layout>(data, index, stride);
+  }
+  // Constructs a cooperative matrix with all values initialized to v. Note that
+  // all threads in scope must have the same value for v.
+  static CooperativeMatrix Splat(ComponentType v);
+  // Returns the result of OpCooperativeMatrixLengthKHR on the current type.
+  static uint32_t GetLength();
+  // Functions to access the elements of the cooperative matrix. The index must
+  // be less than GetLength().
+  void Set(ComponentType value, uint32_t index);
+  ComponentType Get(uint32_t index);
+  static const bool hasSignedIntegerComponentType =
+      (ComponentType(0) - ComponentType(1) < ComponentType(0));
+  // clang-format off
+  using SpirvMatrixType = vk::SpirvOpaqueType<
+      /* OpTypeCooperativeMatrixKHR */ 4456, ComponentType,
+      vk::integral_constant<uint, scope>, vk::integral_constant<uint, rows>,
+      vk::integral_constant<uint, columns>, vk::integral_constant<uint, use> >;
+  [[vk::ext_extension("SPV_KHR_cooperative_matrix")]]
+  [[vk::ext_capability(/* CooperativeMatrixKHRCapability */ 6022)]]
+  [[vk::ext_capability(/* VulkanMemoryModel */ 5345)]]
+  SpirvMatrixType _matrix;
+  // clang-format on
+};
+// Cooperative matrix that can be used in the "a" position of a multiply add
+// instruction (r = (a * b) + c).
+template <typename ComponentType, Scope scope, uint rows, uint columns>
+using CooperativeMatrixA =
+    CooperativeMatrix<ComponentType, scope, rows, columns,
+                      CooperativeMatrixUseMatrixAKHR>;
+// Cooperative matrix that can be used in the "b" position of a multiply add
+// instruction (r = (a * b) + c).
+template <typename ComponentType, Scope scope, uint rows, uint columns>
+using CooperativeMatrixB =
+    CooperativeMatrix<ComponentType, scope, rows, columns,
+                      CooperativeMatrixUseMatrixBKHR>;
+// Cooperative matrix that can be used in the "r" and "c" position of a multiply
+// add instruction (r = (a * b) + c).
+template <typename ComponentType, Scope scope, uint rows, uint columns>
+using CooperativeMatrixAccumulator =
+    CooperativeMatrix<ComponentType, scope, rows, columns,
+                      CooperativeMatrixUseMatrixAccumulatorKHR>;
+// Returns the result of OpCooperativeMatrixMulAddKHR when applied to a, b, and
+// c. The cooperative matrix operands are inferred, with the
+// SaturatingAccumulationKHR bit not set.
+template <typename ComponentType, Scope scope, uint rows, uint columns, uint K>
+CooperativeMatrixAccumulator<ComponentType, scope, rows, columns>
+cooperativeMatrixMultiplyAdd(
+    CooperativeMatrixA<ComponentType, scope, rows, K> a,
+    CooperativeMatrixB<ComponentType, scope, K, columns> b,
+    CooperativeMatrixAccumulator<ComponentType, scope, rows, columns> c);
+// Returns the result of OpCooperativeMatrixMulAddKHR when applied to a, b, and
+// c. The cooperative matrix operands are inferred, with the
+// SaturatingAccumulationKHR bit set.
+template <typename ComponentType, Scope scope, uint rows, uint columns, uint K>
+CooperativeMatrixAccumulator<ComponentType, scope, rows, columns>
+cooperativeMatrixSaturatingMultiplyAdd(
+    CooperativeMatrixA<ComponentType, scope, rows, K> a,
+    CooperativeMatrixB<ComponentType, scope, K, columns> b,
+    CooperativeMatrixAccumulator<ComponentType, scope, rows, columns> c);
+} // namespace khr
+} // namespace vk
+#include "cooperative_matrix.impl"
+#endif // _HLSL_VK_KHR_COOPERATIVE_MATRIX_H_
diff --git a/contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.impl b/contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.impl
new file mode 100644
index 0000000..2acae8e
--- /dev/null
+++ b/contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.impl
@@ -0,0 +1,377 @@
+// Copyright (c) 2024 Google LLC
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#include "vk/opcode_selector.h"
+template <typename ResultType, typename ComponentType>
+[[vk::ext_instruction(/* OpMatrixTimesScalar */ 143)]] ResultType
+__builtin_spv_MatrixTimesScalar(ResultType a, ComponentType b);
+template <typename ComponentType, vk::Scope scope, uint rows, uint columns,
+          vk::CooperativeMatrixUse use>
+[[vk::ext_instruction(/* OpCompositeExtract */ 81)]] ComponentType
+__builtin_spv_ExtractFromCooperativeMatrix(
+    typename vk::khr::CooperativeMatrix<ComponentType, scope, rows, columns,
+                                        use>::SpirvMatrixType matrix,
+    uint32_t index);
+template <typename CoopMatrixType, typename ComponentType>
+[[vk::ext_instruction(/* OpCompositeConstruct */ 80)]] CoopMatrixType
+__builtin_spv_ConstructCooperativeMatrix(ComponentType value);
+template <class ResultPointerType, class BaseType>
+[[vk::ext_instruction(/* OpAccessChain */ 65)]] ResultPointerType
+__builtin_spv_AccessChain([[vk::ext_reference]] BaseType base, uint32_t index);
+template <class ObjectType, class PointerType>
+[[vk::ext_instruction(/* OpLoad */ 61)]] ObjectType
+__builtin_spv_LoadPointer(PointerType base);
+template <class PointerType, class ObjectType>
+[[vk::ext_instruction(/* OpLoad */ 62)]] void
+__builtin_spv_StorePointer(PointerType base, ObjectType object);
+template <typename ComponentType, vk::Scope scope, uint rows, uint columns,
+          vk::CooperativeMatrixUse use>
+[[vk::ext_instruction(/* OpCompositeInsert */ 82)]]
+typename vk::khr::CooperativeMatrix<ComponentType, scope, rows, columns,
+                                    use>::SpirvMatrixType
+__builtin_spv_InsertIntoCooperativeMatrix(
+    ComponentType value,
+    typename vk::khr::CooperativeMatrix<ComponentType, scope, rows, columns,
+                                        use>::SpirvMatrixType matrix,
+    uint32_t index);
+// Define the load and store instructions
+template <typename ResultType, typename PointerType>
+[[vk::ext_instruction(/* OpCooperativeMatrixLoadKHR */ 4457)]] ResultType
+__builtin_spv_CooperativeMatrixLoadKHR(
+    [[vk::ext_reference]] PointerType pointer,
+    vk::CooperativeMatrixLayout memory_layout, uint stride,
+    [[vk::ext_literal]] uint32_t memory_operand);
+template <typename ResultType, typename PointerType>
+[[vk::ext_instruction(/* OpCooperativeMatrixLoadKHR */ 4457)]] ResultType
+__builtin_spv_CooperativeMatrixLoadKHR(
+    [[vk::ext_reference]] PointerType pointer,
+    vk::CooperativeMatrixLayout memory_layout, uint stride,
+    [[vk::ext_literal]] uint32_t memory_operand, vk::Scope scope);
+template <typename ResultType, typename PointerType>
+[[vk::ext_instruction(/* OpCooperativeMatrixLoadKHR */ 4457)]] ResultType
+__builtin_spv_CooperativeMatrixWorkgroupLoadKHR(
+    vk::WorkgroupSpirvPointer<PointerType> pointer,
+    vk::CooperativeMatrixLayout memory_layout, uint stride,
+    [[vk::ext_literal]] uint32_t memory_operand, vk::Scope scope);
+template <typename ObjectType, typename PointerType>
+[[vk::ext_instruction(/* OpCooperativeMatrixStoreKHR */ 4458)]] void
+__builtin_spv_CooperativeMatrixStoreKHR(
+    [[vk::ext_reference]] PointerType pointer, ObjectType object,
+    vk::CooperativeMatrixLayout memory_layout, uint stride,
+    [[vk::ext_literal]] uint32_t memory_operand, vk::Scope scope);
+template <typename ObjectType, typename PointerType>
+[[vk::ext_instruction(/* OpCooperativeMatrixStoreKHR */ 4458)]] void
+__builtin_spv_CooperativeMatrixStoreKHR(
+    [[vk::ext_reference]] PointerType pointer, ObjectType object,
+    vk::CooperativeMatrixLayout memory_layout, uint stride,
+    [[vk::ext_literal]] uint32_t memory_operand);
+template <typename ObjectType, typename PointerType>
+[[vk::ext_instruction(/* OpCooperativeMatrixStoreKHR */ 4458)]] void
+__builtin_spv_CooperativeMatrixWorkgroupStoreKHR(
+    vk::WorkgroupSpirvPointer<PointerType> pointer, ObjectType object,
+    vk::CooperativeMatrixLayout memory_layout, uint stride,
+    [[vk::ext_literal]] uint32_t memory_operand, vk::Scope scope);
+// We cannot define `OpCooperativeMatrixLengthKHR` using ext_instruction because
+// one of the operands is a type id. This builtin will have specific code in the
+// compiler to expand it.
+template <class MatrixType> uint __builtin_spv_CooperativeMatrixLengthKHR();
+// Arithmetic Instructions
+template <typename ResultType, typename MatrixTypeA, typename MatrixTypeB,
+          typename MatrixTypeC>
+[[vk::ext_instruction(/* OpCooperativeMatrixMulAddKHR */ 4459)]] ResultType
+__builtin_spv_CooperativeMatrixMulAddKHR(MatrixTypeA a, MatrixTypeB b,
+                                         MatrixTypeC c,
+                                         [[vk::ext_literal]] int operands);
+namespace vk {
+namespace khr {
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+template <class NewComponentType>
+CooperativeMatrix<NewComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::cast() {
+  using ResultType =
+      CooperativeMatrix<NewComponentType, scope, rows, columns, use>;
+  ResultType result;
+  result._matrix = util::ConversionSelector<ComponentType, NewComponentType>::
+      template Convert<typename ResultType::SpirvMatrixType>(_matrix);
+  return result;
+}
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::negate() {
+  CooperativeMatrix result;
+  result._matrix = util::ArithmeticSelector<ComponentType>::Negate(_matrix);
+  return result;
+}
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::operator+(
+    CooperativeMatrix other) {
+  CooperativeMatrix result;
+  result._matrix =
+      util::ArithmeticSelector<ComponentType>::Add(_matrix, other._matrix);
+  return result;
+}
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::operator-(
+    CooperativeMatrix other) {
+  CooperativeMatrix result;
+  result._matrix =
+      util::ArithmeticSelector<ComponentType>::Sub(_matrix, other._matrix);
+  return result;
+}
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::operator*(
+    CooperativeMatrix other) {
+  CooperativeMatrix result;
+  result._matrix =
+      util::ArithmeticSelector<ComponentType>::Mul(_matrix, other._matrix);
+  return result;
+}
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::operator/(
+    CooperativeMatrix other) {
+  CooperativeMatrix result;
+  result._matrix =
+      util::ArithmeticSelector<ComponentType>::Div(_matrix, other._matrix);
+  return result;
+}
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::operator*(
+    ComponentType scalar) {
+  CooperativeMatrix result;
+  result._matrix = __builtin_spv_MatrixTimesScalar(_matrix, scalar);
+  return result;
+}
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+          class Type>
+void CooperativeMatrix<ComponentType, scope, rows, columns, use>::Store(
+    WorkgroupSpirvPointer<Type> data, uint32_t stride) {
+  __builtin_spv_CooperativeMatrixWorkgroupStoreKHR(
+      data, _matrix, layout, stride,
+      memoryAccessOperands | MemoryAccessNonPrivatePointerMask |
+          MemoryAccessMakePointerAvailableMask,
+      ScopeWorkgroup);
+}
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+          class Type>
+void CooperativeMatrix<ComponentType, scope, rows, columns, use>::Store(
+    RWStructuredBuffer<Type> data, uint32_t index, uint32_t stride) {
+  __builtin_spv_CooperativeMatrixStoreKHR(data[index], _matrix, layout, stride,
+                                          memoryAccessOperands);
+}
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+          class Type>
+void CooperativeMatrix<ComponentType, scope, rows, columns, use>::CoherentStore(
+    globallycoherent RWStructuredBuffer<Type> data, uint32_t index,
+    uint32_t stride) {
+  __builtin_spv_CooperativeMatrixStoreKHR(
+      data[index], _matrix, layout, stride,
+      memoryAccessOperands | MemoryAccessNonPrivatePointerMask |
+          MemoryAccessMakePointerAvailableMask,
+      ScopeQueueFamily);
+}
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+          class Type>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::Load(
+    vk::WorkgroupSpirvPointer<Type> buffer, uint32_t stride) {
+  CooperativeMatrix result;
+  result._matrix =
+      __builtin_spv_CooperativeMatrixWorkgroupLoadKHR<SpirvMatrixType>(
+          buffer, layout, stride,
+          memoryAccessOperands | MemoryAccessNonPrivatePointerMask |
+              MemoryAccessMakePointerVisibleMask,
+          ScopeWorkgroup);
+  return result;
+}
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+          class Type>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::Load(
+    RWStructuredBuffer<Type> buffer, uint32_t index, uint32_t stride) {
+  CooperativeMatrix result;
+  result._matrix = __builtin_spv_CooperativeMatrixLoadKHR<SpirvMatrixType>(
+      buffer[index], layout, stride, memoryAccessOperands);
+  return result;
+}
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+          class Type>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::CoherentLoad(
+    RWStructuredBuffer<Type> buffer, uint32_t index, uint32_t stride) {
+  CooperativeMatrix result;
+  result._matrix = __builtin_spv_CooperativeMatrixLoadKHR<SpirvMatrixType>(
+      buffer[index], layout, stride,
+      memoryAccessOperands | MemoryAccessNonPrivatePointerMask |
+          MemoryAccessMakePointerVisibleMask,
+      ScopeQueueFamily);
+  return result;
+}
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+          class Type>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::Load(
+    StructuredBuffer<Type> buffer, uint32_t index, uint32_t stride) {
+  CooperativeMatrix result;
+  result._matrix = __builtin_spv_CooperativeMatrixLoadKHR<SpirvMatrixType>(
+      buffer[index], layout, stride, MemoryAccessMaskNone);
+  return result;
+}
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::Splat(
+    ComponentType v) {
+  CooperativeMatrix result;
+  result._matrix = __builtin_spv_ConstructCooperativeMatrix<SpirvMatrixType>(v);
+  return result;
+}
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+uint CooperativeMatrix<ComponentType, scope, rows, columns, use>::GetLength() {
+  return __builtin_spv_CooperativeMatrixLengthKHR<SpirvMatrixType>();
+}
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+ComponentType CooperativeMatrix<ComponentType, scope, rows, columns, use>::Get(
+    uint32_t index) {
+  // clang-format off
+  using ComponentPtr = vk::SpirvOpaqueType<
+      /* OpTypePointer */ 32,
+      /* function storage class */ vk::Literal<vk::integral_constant<uint, 7> >,
+      ComponentType>;
+  // clang-format on
+  ComponentPtr ptr = __builtin_spv_AccessChain<ComponentPtr>(_matrix, index);
+  return __builtin_spv_LoadPointer<ComponentType>(ptr);
+}
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+void CooperativeMatrix<ComponentType, scope, rows, columns, use>::Set(
+    ComponentType value, uint32_t index) {
+  // clang-format off
+  using ComponentPtr = vk::SpirvOpaqueType<
+      /* OpTypePointer */ 32,
+      /* function storage class */ vk::Literal<vk::integral_constant<uint, 7> >,
+      ComponentType>;
+  // clang-format on
+  ComponentPtr ptr = __builtin_spv_AccessChain<ComponentPtr>(_matrix, index);
+  return __builtin_spv_StorePointer(ptr, value);
+}
+template <typename ComponentType, Scope scope, uint rows, uint columns, uint K>
+CooperativeMatrixAccumulator<ComponentType, scope, rows, columns>
+cooperativeMatrixMultiplyAdd(
+    CooperativeMatrixA<ComponentType, scope, rows, K> a,
+    CooperativeMatrixB<ComponentType, scope, K, columns> b,
+    CooperativeMatrixAccumulator<ComponentType, scope, rows, columns> c) {
+  const vk::CooperativeMatrixOperandsMask allSignedComponents =
+      vk::CooperativeMatrixOperandsMatrixASignedComponentsKHRMask |
+      vk::CooperativeMatrixOperandsMatrixBSignedComponentsKHRMask |
+      vk::CooperativeMatrixOperandsMatrixCSignedComponentsKHRMask |
+      vk::CooperativeMatrixOperandsMatrixResultSignedComponentsKHRMask;
+  const vk::CooperativeMatrixOperandsMask operands =
+      (vk::CooperativeMatrixOperandsMask)(
+          a.hasSignedIntegerComponentType
+              ? allSignedComponents
+              : vk::CooperativeMatrixOperandsMaskNone);
+  CooperativeMatrixAccumulator<ComponentType, scope, rows, columns> result;
+  result._matrix = __builtin_spv_CooperativeMatrixMulAddKHR<
+      typename CooperativeMatrixAccumulator<ComponentType, scope, rows,
+                                            columns>::SpirvMatrixType>(
+      a._matrix, b._matrix, c._matrix, operands);
+  return result;
+}
+template <typename ComponentType, Scope scope, uint rows, uint columns, uint K>
+CooperativeMatrixAccumulator<ComponentType, scope, rows, columns>
+cooperativeMatrixSaturatingMultiplyAdd(
+    CooperativeMatrixA<ComponentType, scope, rows, K> a,
+    CooperativeMatrixB<ComponentType, scope, K, columns> b,
+    CooperativeMatrixAccumulator<ComponentType, scope, rows, columns> c) {
+  const vk::CooperativeMatrixOperandsMask allSignedComponents =
+      vk::CooperativeMatrixOperandsMatrixASignedComponentsKHRMask |
+      vk::CooperativeMatrixOperandsMatrixBSignedComponentsKHRMask |
+      vk::CooperativeMatrixOperandsMatrixCSignedComponentsKHRMask |
+      vk::CooperativeMatrixOperandsMatrixResultSignedComponentsKHRMask |
+      vk::CooperativeMatrixOperandsSaturatingAccumulationKHRMask;
+  const vk::CooperativeMatrixOperandsMask operands =
+      (vk::CooperativeMatrixOperandsMask)(
+          a.hasSignedIntegerComponentType
+              ? allSignedComponents
+              : vk::CooperativeMatrixOperandsSaturatingAccumulationKHRMask);
+  CooperativeMatrixAccumulator<ComponentType, scope, rows, columns> result;
+  result._matrix = __builtin_spv_CooperativeMatrixMulAddKHR<
+      typename CooperativeMatrixAccumulator<ComponentType, scope, rows,
+                                            columns>::SpirvMatrixType>(
+      a._matrix, b._matrix, c._matrix, operands);
+  return result;
+}
+} // namespace khr
+} // namespace vk
diff --git a/contrib/dxc_2025_07_14/inc/hlsl/vk/opcode_selector.h b/contrib/dxc_2025_07_14/inc/hlsl/vk/opcode_selector.h
new file mode 100644
index 0000000..bc8672c
--- /dev/null
+++ b/contrib/dxc_2025_07_14/inc/hlsl/vk/opcode_selector.h
@@ -0,0 +1,227 @@
+// Copyright (c) 2024 Google LLC
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#ifndef _HLSL_VK_KHR_OPCODE_SELECTOR_H_
+#define _HLSL_VK_KHR_OPCODE_SELECTOR_H_
+#define DECLARE_UNARY_OP(name, opcode)                                         \
+  template <typename ResultType>                                               \
+  [[vk::ext_instruction(opcode)]] ResultType __builtin_spv_##name(             \
+      ResultType a)
+DECLARE_UNARY_OP(CopyObject, 83);
+DECLARE_UNARY_OP(SNegate, 126);
+DECLARE_UNARY_OP(FNegate, 127);
+#define DECLARE_CONVERSION_OP(name, opcode)                                    \
+  template <typename ResultType, typename OperandType>                         \
+  [[vk::ext_instruction(opcode)]] ResultType __builtin_spv_##name(             \
+      OperandType a)
+DECLARE_CONVERSION_OP(ConvertFtoU, 109);
+DECLARE_CONVERSION_OP(ConvertFtoS, 110);
+DECLARE_CONVERSION_OP(ConvertSToF, 111);
+DECLARE_CONVERSION_OP(ConvertUToF, 112);
+DECLARE_CONVERSION_OP(UConvert, 113);
+DECLARE_CONVERSION_OP(SConvert, 114);
+DECLARE_CONVERSION_OP(FConvert, 115);
+DECLARE_CONVERSION_OP(Bitcast, 124);
+#undef DECLARY_UNARY_OP
+#define DECLARE_BINOP(name, opcode)                                            \
+  template <typename ResultType>                                               \
+  [[vk::ext_instruction(opcode)]] ResultType __builtin_spv_##name(             \
+      ResultType a, ResultType b)
+DECLARE_BINOP(IAdd, 128);
+DECLARE_BINOP(FAdd, 129);
+DECLARE_BINOP(ISub, 130);
+DECLARE_BINOP(FSub, 131);
+DECLARE_BINOP(IMul, 132);
+DECLARE_BINOP(FMul, 133);
+DECLARE_BINOP(UDiv, 134);
+DECLARE_BINOP(SDiv, 135);
+DECLARE_BINOP(FDiv, 136);
+#undef DECLARE_BINOP
+namespace vk {
+namespace util {
+template <class ComponentType> class ArithmeticSelector;
+#define ARITHMETIC_SELECTOR(BaseType, OpNegate, OpAdd, OpSub, OpMul, OpDiv,    \
+                            SIGNED_INTEGER_TYPE)                               \
+  template <> class ArithmeticSelector<BaseType> {                             \
+    template <class T> static T Negate(T a) { return OpNegate(a); }            \
+    template <class T> static T Add(T a, T b) { return OpAdd(a, b); }          \
+    template <class T> static T Sub(T a, T b) { return OpSub(a, b); }          \
+    template <class T> static T Mul(T a, T b) { return OpMul(a, b); }          \
+    template <class T> static T Div(T a, T b) { return OpDiv(a, b); }          \
+  };
+ARITHMETIC_SELECTOR(half, __builtin_spv_FNegate, __builtin_spv_FAdd,
+                    __builtin_spv_FSub, __builtin_spv_FMul, __builtin_spv_FDiv,
+                    false);
+ARITHMETIC_SELECTOR(float, __builtin_spv_FNegate, __builtin_spv_FAdd,
+                    __builtin_spv_FSub, __builtin_spv_FMul, __builtin_spv_FDiv,
+                    false);
+ARITHMETIC_SELECTOR(double, __builtin_spv_FNegate, __builtin_spv_FAdd,
+                    __builtin_spv_FSub, __builtin_spv_FMul, __builtin_spv_FDiv,
+                    false);
+#if __HLSL_ENABLE_16_BIT
+ARITHMETIC_SELECTOR(int16_t, __builtin_spv_SNegate, __builtin_spv_IAdd,
+                    __builtin_spv_ISub, __builtin_spv_IMul, __builtin_spv_SDiv,
+                    true);
+ARITHMETIC_SELECTOR(uint16_t, __builtin_spv_SNegate, __builtin_spv_IAdd,
+                    __builtin_spv_ISub, __builtin_spv_IMul, __builtin_spv_UDiv,
+                    false);
+#endif // __HLSL_ENABLE_16_BIT
+ARITHMETIC_SELECTOR(int32_t, __builtin_spv_SNegate, __builtin_spv_IAdd,
+                    __builtin_spv_ISub, __builtin_spv_IMul, __builtin_spv_SDiv,
+                    true);
+ARITHMETIC_SELECTOR(int64_t, __builtin_spv_SNegate, __builtin_spv_IAdd,
+                    __builtin_spv_ISub, __builtin_spv_IMul, __builtin_spv_SDiv,
+                    true);
+ARITHMETIC_SELECTOR(uint32_t, __builtin_spv_SNegate, __builtin_spv_IAdd,
+                    __builtin_spv_ISub, __builtin_spv_IMul, __builtin_spv_UDiv,
+                    false);
+ARITHMETIC_SELECTOR(uint64_t, __builtin_spv_SNegate, __builtin_spv_IAdd,
+                    __builtin_spv_ISub, __builtin_spv_IMul, __builtin_spv_UDiv,
+                    false);
+// The conversion selector is will be used to convert one type to another
+// using the SPIR-V conversion instructions. See
+// https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_conversion_instructions.
+// SourceType and TargetType must be integer or floating point scalar type.
+// ConversionSelector::Convert converts an object of type S to an object of type
+// T. S must be SourceType, a vector of SourceType, or a cooperative matrix of
+// SourceType. T must be TargetType, a vector of TargetType, or a cooperative
+// matrix of TargetType. T must have the same number of components as S. T is a
+// cooperative matrix if and only if S is a cooperative matrix.
+template <class SourceType, class TargetType> class ConversionSelector;
+#define CONVERSION_SELECTOR(SourceType, TargetType, OpConvert)                 \
+  template <> class ConversionSelector<SourceType, TargetType> {               \
+    template <class T, class S> static T Convert(S a) {                        \
+      return OpConvert<T>(a);                                                  \
+    }                                                                          \
+  };
+#if __HLSL_ENABLE_16_BIT
+CONVERSION_SELECTOR(uint16_t, uint16_t, __builtin_spv_CopyObject);
+CONVERSION_SELECTOR(uint16_t, int16_t, __builtin_spv_Bitcast);
+CONVERSION_SELECTOR(uint16_t, uint32_t, __builtin_spv_UConvert);
+CONVERSION_SELECTOR(uint16_t, int32_t, __builtin_spv_SConvert);
+CONVERSION_SELECTOR(uint16_t, uint64_t, __builtin_spv_UConvert);
+CONVERSION_SELECTOR(uint16_t, int64_t, __builtin_spv_SConvert);
+CONVERSION_SELECTOR(uint16_t, half, __builtin_spv_ConvertUToF);
+CONVERSION_SELECTOR(uint16_t, float, __builtin_spv_ConvertUToF);
+CONVERSION_SELECTOR(uint16_t, double, __builtin_spv_ConvertUToF);
+CONVERSION_SELECTOR(int16_t, uint16_t, __builtin_spv_Bitcast);
+CONVERSION_SELECTOR(int16_t, int16_t, __builtin_spv_CopyObject);
+CONVERSION_SELECTOR(int16_t, uint32_t, __builtin_spv_UConvert);
+CONVERSION_SELECTOR(int16_t, int32_t, __builtin_spv_SConvert);
+CONVERSION_SELECTOR(int16_t, uint64_t, __builtin_spv_UConvert);
+CONVERSION_SELECTOR(int16_t, int64_t, __builtin_spv_SConvert);
+CONVERSION_SELECTOR(int16_t, half, __builtin_spv_ConvertSToF);
+CONVERSION_SELECTOR(int16_t, float, __builtin_spv_ConvertSToF);
+CONVERSION_SELECTOR(int16_t, double, __builtin_spv_ConvertSToF);
+CONVERSION_SELECTOR(uint32_t, uint16_t, __builtin_spv_UConvert);
+CONVERSION_SELECTOR(uint32_t, int16_t, __builtin_spv_SConvert);
+CONVERSION_SELECTOR(int32_t, uint16_t, __builtin_spv_UConvert);
+CONVERSION_SELECTOR(int32_t, int16_t, __builtin_spv_SConvert);
+CONVERSION_SELECTOR(uint64_t, uint16_t, __builtin_spv_UConvert);
+CONVERSION_SELECTOR(uint64_t, int16_t, __builtin_spv_SConvert);
+CONVERSION_SELECTOR(int64_t, uint16_t, __builtin_spv_UConvert);
+CONVERSION_SELECTOR(int64_t, int16_t, __builtin_spv_SConvert);
+CONVERSION_SELECTOR(half, uint16_t, __builtin_spv_ConvertFtoU);
+CONVERSION_SELECTOR(half, int16_t, __builtin_spv_ConvertFtoS);
+CONVERSION_SELECTOR(float, uint16_t, __builtin_spv_ConvertFtoU);
+CONVERSION_SELECTOR(float, int16_t, __builtin_spv_ConvertFtoS);
+CONVERSION_SELECTOR(double, uint16_t, __builtin_spv_ConvertFtoU);
+CONVERSION_SELECTOR(double, int16_t, __builtin_spv_ConvertFtoS);
+#endif
+CONVERSION_SELECTOR(uint32_t, uint32_t, __builtin_spv_CopyObject);
+CONVERSION_SELECTOR(uint32_t, int32_t, __builtin_spv_Bitcast);
+CONVERSION_SELECTOR(uint32_t, uint64_t, __builtin_spv_UConvert);
+CONVERSION_SELECTOR(uint32_t, int64_t, __builtin_spv_SConvert);
+CONVERSION_SELECTOR(uint32_t, half, __builtin_spv_ConvertUToF);
+CONVERSION_SELECTOR(uint32_t, float, __builtin_spv_ConvertUToF);
+CONVERSION_SELECTOR(uint32_t, double, __builtin_spv_ConvertUToF);
+CONVERSION_SELECTOR(int32_t, uint32_t, __builtin_spv_Bitcast);
+CONVERSION_SELECTOR(int32_t, int32_t, __builtin_spv_CopyObject);
+CONVERSION_SELECTOR(int32_t, uint64_t, __builtin_spv_UConvert);
+CONVERSION_SELECTOR(int32_t, int64_t, __builtin_spv_SConvert);
+CONVERSION_SELECTOR(int32_t, half, __builtin_spv_ConvertSToF);
+CONVERSION_SELECTOR(int32_t, float, __builtin_spv_ConvertSToF);
+CONVERSION_SELECTOR(int32_t, double, __builtin_spv_ConvertSToF);
+CONVERSION_SELECTOR(uint64_t, uint32_t, __builtin_spv_UConvert);
+CONVERSION_SELECTOR(uint64_t, int32_t, __builtin_spv_SConvert);
+CONVERSION_SELECTOR(uint64_t, uint64_t, __builtin_spv_Bitcast);
+CONVERSION_SELECTOR(uint64_t, int64_t, __builtin_spv_CopyObject);
+CONVERSION_SELECTOR(uint64_t, half, __builtin_spv_ConvertUToF);
+CONVERSION_SELECTOR(uint64_t, float, __builtin_spv_ConvertUToF);
+CONVERSION_SELECTOR(uint64_t, double, __builtin_spv_ConvertUToF);
+CONVERSION_SELECTOR(int64_t, uint32_t, __builtin_spv_UConvert);
+CONVERSION_SELECTOR(int64_t, int32_t, __builtin_spv_SConvert);
+CONVERSION_SELECTOR(int64_t, uint64_t, __builtin_spv_Bitcast);
+CONVERSION_SELECTOR(int64_t, int64_t, __builtin_spv_CopyObject);
+CONVERSION_SELECTOR(int64_t, half, __builtin_spv_ConvertSToF);
+CONVERSION_SELECTOR(int64_t, float, __builtin_spv_ConvertSToF);
+CONVERSION_SELECTOR(int64_t, double, __builtin_spv_ConvertSToF);
+CONVERSION_SELECTOR(half, uint32_t, __builtin_spv_ConvertFtoU);
+CONVERSION_SELECTOR(half, int32_t, __builtin_spv_ConvertFtoS);
+CONVERSION_SELECTOR(half, uint64_t, __builtin_spv_ConvertFtoU);
+CONVERSION_SELECTOR(half, int64_t, __builtin_spv_ConvertFtoS);
+CONVERSION_SELECTOR(half, half, __builtin_spv_CopyObject);
+#if __HLSL_ENABLE_16_BIT
+CONVERSION_SELECTOR(half, float, __builtin_spv_FConvert);
+#else
+CONVERSION_SELECTOR(half, float, __builtin_spv_CopyObject);
+#endif
+CONVERSION_SELECTOR(half, double, __builtin_spv_FConvert);
+CONVERSION_SELECTOR(float, uint32_t, __builtin_spv_ConvertFtoU);
+CONVERSION_SELECTOR(float, int32_t, __builtin_spv_ConvertFtoS);
+CONVERSION_SELECTOR(float, uint64_t, __builtin_spv_ConvertFtoU);
+CONVERSION_SELECTOR(float, int64_t, __builtin_spv_ConvertFtoS);
+#if __HLSL_ENABLE_16_BIT
+CONVERSION_SELECTOR(float, half, __builtin_spv_FConvert);
+#else
+CONVERSION_SELECTOR(float, half, __builtin_spv_CopyObject);
+#endif
+CONVERSION_SELECTOR(float, float, __builtin_spv_CopyObject);
+CONVERSION_SELECTOR(float, double, __builtin_spv_FConvert);
+CONVERSION_SELECTOR(double, uint32_t, __builtin_spv_ConvertFtoU);
+CONVERSION_SELECTOR(double, int32_t, __builtin_spv_ConvertFtoS);
+CONVERSION_SELECTOR(double, uint64_t, __builtin_spv_ConvertFtoU);
+CONVERSION_SELECTOR(double, int64_t, __builtin_spv_ConvertFtoS);
+CONVERSION_SELECTOR(double, half, __builtin_spv_FConvert);
+CONVERSION_SELECTOR(double, float, __builtin_spv_FConvert);
+CONVERSION_SELECTOR(double, double, __builtin_spv_CopyObject);
+}; // namespace util
+} // namespace vk
+#endif // _HLSL_VK_KHR_OPCODE_SELECTOR_H_
diff --git a/contrib/dxc_2025_07_14/inc/hlsl/vk/spirv.h b/contrib/dxc_2025_07_14/inc/hlsl/vk/spirv.h
new file mode 100644
index 0000000..69bb53b
--- /dev/null
+++ b/contrib/dxc_2025_07_14/inc/hlsl/vk/spirv.h
@@ -0,0 +1,85 @@
+// Copyright (c) 2024 Google LLC
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#ifndef _HLSL_VK_SPIRV_H_
+#define _HLSL_VK_SPIRV_H_
+namespace vk {
+enum CooperativeMatrixUse {
+  CooperativeMatrixUseMatrixAKHR = 0,
+  CooperativeMatrixUseMatrixBKHR = 1,
+  CooperativeMatrixUseMatrixAccumulatorKHR = 2,
+  CooperativeMatrixUseMax = 0x7fffffff,
+};
+enum CooperativeMatrixLayout {
+  CooperativeMatrixLayoutRowMajorKHR = 0,
+  CooperativeMatrixLayoutColumnMajorKHR = 1,
+  CooperativeMatrixLayoutRowBlockedInterleavedARM = 4202,
+  CooperativeMatrixLayoutColumnBlockedInterleavedARM = 4203,
+  CooperativeMatrixLayoutMax = 0x7fffffff,
+};
+enum CooperativeMatrixOperandsMask {
+  CooperativeMatrixOperandsMaskNone = 0,
+  CooperativeMatrixOperandsMatrixASignedComponentsKHRMask = 0x00000001,
+  CooperativeMatrixOperandsMatrixBSignedComponentsKHRMask = 0x00000002,
+  CooperativeMatrixOperandsMatrixCSignedComponentsKHRMask = 0x00000004,
+  CooperativeMatrixOperandsMatrixResultSignedComponentsKHRMask = 0x00000008,
+  CooperativeMatrixOperandsSaturatingAccumulationKHRMask = 0x00000010,
+};
+enum MemoryAccessMask {
+  MemoryAccessMaskNone = 0,
+  MemoryAccessVolatileMask = 0x00000001,
+  MemoryAccessAlignedMask = 0x00000002,
+  MemoryAccessNontemporalMask = 0x00000004,
+  MemoryAccessMakePointerAvailableMask = 0x00000008,
+  MemoryAccessMakePointerAvailableKHRMask = 0x00000008,
+  MemoryAccessMakePointerVisibleMask = 0x00000010,
+  MemoryAccessMakePointerVisibleKHRMask = 0x00000010,
+  MemoryAccessNonPrivatePointerMask = 0x00000020,
+  MemoryAccessNonPrivatePointerKHRMask = 0x00000020,
+  MemoryAccessAliasScopeINTELMaskMask = 0x00010000,
+  MemoryAccessNoAliasINTELMaskMask = 0x00020000,
+};
+enum Scope {
+  ScopeCrossDevice = 0,
+  ScopeDevice = 1,
+  ScopeWorkgroup = 2,
+  ScopeSubgroup = 3,
+  ScopeInvocation = 4,
+  ScopeQueueFamily = 5,
+  ScopeQueueFamilyKHR = 5,
+  ScopeShaderCallKHR = 6,
+  ScopeMax = 0x7fffffff,
+};
+enum StorageClass {
+  StorageClassWorkgroup = 4,
+};
+// An opaque type to represent a Spir-V pointer to the workgroup storage class.
+// clang-format off
+template <typename PointeeType>
+using WorkgroupSpirvPointer = const vk::SpirvOpaqueType<
+    /* OpTypePointer */ 32,
+    vk::Literal<vk::integral_constant<uint, StorageClassWorkgroup> >,
+    PointeeType>;
+// clang-format on
+// Returns an opaque Spir-V pointer to v. The memory object v's storage class
+// modifier must be groupshared. If the incorrect storage class is used, then
+// there will be a validation error, and it will not show the correct
+template <typename T>
+[[vk::ext_instruction(/* OpCopyObject */ 83)]] WorkgroupSpirvPointer<T>
+GetGroupSharedAddress([[vk::ext_reference]] T v);
+} // namespace vk
+#endif // _HLSL_VK_SPIRV_H_