From 6c8ae19be66cee247980a48e736a4e05d14de179 Mon Sep 17 00:00:00 2001
From: 3gg <3gg@shellblade.net>
Date: Tue, 2 Dec 2025 16:39:36 -0800
Subject: Immediate-mode renderer, triangle demo, shader compilation in cmake,
 Agility SDK

---
 contrib/dxc_2025_07_14/inc/hlsl/LICENSE.txt        | 222 ++++++++++++
 contrib/dxc_2025_07_14/inc/hlsl/README.txt         |   7 +
 contrib/dxc_2025_07_14/inc/hlsl/dx/linalg.h        | 198 +++++++++++
 .../inc/hlsl/vk/khr/cooperative_matrix.h           | 275 +++++++++++++++
 .../inc/hlsl/vk/khr/cooperative_matrix.impl        | 377 +++++++++++++++++++++
 .../dxc_2025_07_14/inc/hlsl/vk/opcode_selector.h   | 227 +++++++++++++
 contrib/dxc_2025_07_14/inc/hlsl/vk/spirv.h         |  85 +++++
 7 files changed, 1391 insertions(+)
 create mode 100644 contrib/dxc_2025_07_14/inc/hlsl/LICENSE.txt
 create mode 100644 contrib/dxc_2025_07_14/inc/hlsl/README.txt
 create mode 100644 contrib/dxc_2025_07_14/inc/hlsl/dx/linalg.h
 create mode 100644 contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.h
 create mode 100644 contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.impl
 create mode 100644 contrib/dxc_2025_07_14/inc/hlsl/vk/opcode_selector.h
 create mode 100644 contrib/dxc_2025_07_14/inc/hlsl/vk/spirv.h

(limited to 'contrib/dxc_2025_07_14/inc/hlsl')

diff --git a/contrib/dxc_2025_07_14/inc/hlsl/LICENSE.txt b/contrib/dxc_2025_07_14/inc/hlsl/LICENSE.txt
new file mode 100644
index 0000000..94472c3
--- /dev/null
+++ b/contrib/dxc_2025_07_14/inc/hlsl/LICENSE.txt
@@ -0,0 +1,222 @@
+==============================================================================
+The LLVM Project is under the Apache License v2.0 with LLVM Exceptions:
+==============================================================================
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+    1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+    2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+    3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+    4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+    5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+    6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+    7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+    8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+    9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+    END OF TERMS AND CONDITIONS
+
+    APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+    Copyright [yyyy] [name of copyright owner]
+
+    Licensed under the Apache License, Version 2.0 (the "License");
+    you may not use this file except in compliance with the License.
+    You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+
+
+---- LLVM Exceptions to the Apache 2.0 License ----
+
+As an exception, if, as a result of your compiling your source code, portions
+of this Software are embedded into an Object form of such source code, you
+may redistribute such embedded portions in such Object form without complying
+with the conditions of Sections 4(a), 4(b) and 4(d) of the License.
+
+In addition, if you combine or link compiled forms of this Software with
+software that is licensed under the GPLv2 ("Combined Software") and if a
+court of competent jurisdiction determines that the patent provision (Section
+3), the indemnity provision (Section 9) or other Section of the License
+conflicts with the conditions of the GPLv2, you may retroactively and
+prospectively choose to deem waived or otherwise exclude such Section(s) of
+the License, but only in their entirety and only with respect to the Combined
+Software.
diff --git a/contrib/dxc_2025_07_14/inc/hlsl/README.txt b/contrib/dxc_2025_07_14/inc/hlsl/README.txt
new file mode 100644
index 0000000..a760bb9
--- /dev/null
+++ b/contrib/dxc_2025_07_14/inc/hlsl/README.txt
@@ -0,0 +1,7 @@
+HLSL Standard Header Library
+============================
+
+The contents of this directory and subdirectories are the HLSL Standard Header
+library. These headers are open source software. You may freely distribute all
+or parts of these headers under the terms of the license agreement found in
+LICENSE.txt.
diff --git a/contrib/dxc_2025_07_14/inc/hlsl/dx/linalg.h b/contrib/dxc_2025_07_14/inc/hlsl/dx/linalg.h
new file mode 100644
index 0000000..4f5e620
--- /dev/null
+++ b/contrib/dxc_2025_07_14/inc/hlsl/dx/linalg.h
@@ -0,0 +1,198 @@
+// Header for linear algebra APIs.
+
+#if __spirv__
+#error "Cooperative vectors not (yet) supported for SPIRV"
+#endif
+
+#if ((__SHADER_TARGET_MAJOR > 6) ||                                            \
+     (__SHADER_TARGET_MAJOR == 6 && __SHADER_TARGET_MINOR >= 9)) &&            \
+    (__HLSL_VERSION >= 2021)
+
+namespace dx {
+namespace linalg {
+
+// NOTE: can't be an enum class because we get this error:
+//     error: non-type template argument of type 'dx::linalg::DataType' is not
+//     an integral constant expression
+//
+enum DataType {
+  DATA_TYPE_SINT16 = 2,           // ComponentType::I16
+  DATA_TYPE_UINT16 = 3,           // ComponentType::U16
+  DATA_TYPE_SINT32 = 4,           // ComponentType::I32
+  DATA_TYPE_UINT32 = 5,           // ComponentType::U32
+  DATA_TYPE_FLOAT16 = 8,          // ComponentType::F16
+  DATA_TYPE_FLOAT32 = 9,          // ComponentType::F32
+  DATA_TYPE_SINT8_T4_PACKED = 17, // ComponentType::PackedS8x32
+  DATA_TYPE_UINT8_T4_PACKED = 18, // ComponentType::PackedU8x32
+  DATA_TYPE_UINT8 = 19,           // ComponentType::U8
+  DATA_TYPE_SINT8 = 20,           // ComponentType::I8
+  DATA_TYPE_FLOAT8_E4M3 = 21,     // ComponentType::F8_E4M3
+                                  // (1 sign, 4 exp, 3 mantissa bits)
+  DATA_TYPE_FLOAT8_E5M2 = 22,     // ComponentType::F8_E5M2
+                                  // (1 sign, 5 exp, 2 mantissa bits)
+};
+
+enum MatrixLayout {
+  MATRIX_LAYOUT_ROW_MAJOR = 0,
+  MATRIX_LAYOUT_COLUMN_MAJOR = 1,
+  MATRIX_LAYOUT_MUL_OPTIMAL = 2,
+  MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL = 3
+};
+
+//
+// Helper for signedness
+//
+namespace details {
+
+template <typename T> struct IsUnsigned {};
+
+#define _SPECIALIZE_ISUNSIGNED(type, value)                                    \
+  template <> struct IsUnsigned<type> {                                        \
+    static const bool Value = value;                                           \
+  }
+
+_SPECIALIZE_ISUNSIGNED(uint8_t4_packed, true);
+_SPECIALIZE_ISUNSIGNED(int8_t4_packed, true);
+_SPECIALIZE_ISUNSIGNED(uint32_t, true);
+_SPECIALIZE_ISUNSIGNED(int32_t, false);
+_SPECIALIZE_ISUNSIGNED(float32_t, false);
+
+#ifdef __HLSL_ENABLE_16_BIT
+_SPECIALIZE_ISUNSIGNED(uint16_t, true);
+_SPECIALIZE_ISUNSIGNED(int16_t, false);
+_SPECIALIZE_ISUNSIGNED(float16_t, false);
+#else  // //__HLSL_ENABLE_16_BIT
+_SPECIALIZE_ISUNSIGNED(half, false);
+#endif //__HLSL_ENABLE_16_BIT
+
+#undef _SPECIALIZE_ISUNSIGNED
+
+} // namespace details
+
+//
+// (RW)MatrixRef
+//
+
+template <typename BufferTy, DataType DT, uint M, uint K, MatrixLayout ML,
+          bool Transpose>
+struct MatrixRefImpl {
+  BufferTy Buffer;
+  uint StartOffset;
+  uint Stride;
+};
+
+template <DataType DT, uint M, uint K, MatrixLayout ML, bool Transpose = false>
+using MatrixRef = MatrixRefImpl<ByteAddressBuffer, DT, M, K, ML, Transpose>;
+
+template <DataType DT, uint M, uint K, MatrixLayout ML, bool Transpose = false>
+using RWMatrixRef = MatrixRefImpl<RWByteAddressBuffer, DT, M, K, ML, Transpose>;
+
+//
+// (RW)VectorRef
+//
+
+template <typename BufferTy, DataType DT> struct VectorRefImpl {
+  BufferTy Buffer;
+  uint StartOffset;
+};
+
+template <DataType DT> using VectorRef = VectorRefImpl<ByteAddressBuffer, DT>;
+
+template <DataType DT>
+using RWVectorRef = VectorRefImpl<RWByteAddressBuffer, DT>;
+
+//
+// Vector
+//
+
+template <typename T, int N, DataType DT> struct InterpretedVector {
+  vector<T, N> Data;
+};
+
+template <DataType DT, typename T, int N>
+InterpretedVector<T, N, DT> MakeInterpretedVector(vector<T, N> Vec) {
+  InterpretedVector<T, N, DT> IV = {Vec};
+  return IV;
+}
+
+//
+// Mul
+//
+
+template <typename OutputElTy, typename InputElTy, int InputElCount,
+          typename MatrixBufferTy, DataType InputDT, DataType MatrixDT,
+          uint MatrixM, uint MatrixK, MatrixLayout MatrixLayout,
+          bool MatrixTranspose>
+vector<OutputElTy, MatrixM>
+Mul(MatrixRefImpl<MatrixBufferTy, MatrixDT, MatrixM, MatrixK, MatrixLayout,
+                  MatrixTranspose>
+        Matrix,
+    InterpretedVector<InputElTy, InputElCount, InputDT> InputVector) {
+
+  vector<OutputElTy, MatrixM> OutputVector;
+
+  __builtin_MatVecMul(
+      /*out*/ OutputVector, details::IsUnsigned<OutputElTy>::Value,
+      InputVector.Data, details::IsUnsigned<InputElTy>::Value, InputDT,
+      Matrix.Buffer, Matrix.StartOffset, MatrixDT, MatrixM, MatrixK,
+      MatrixLayout, MatrixTranspose, Matrix.Stride);
+
+  return OutputVector;
+}
+
+//
+// MulAdd
+//
+
+template <typename OutputElTy, typename InputElTy, int InputElCount,
+          typename MatrixBufferTy, DataType InputDT, DataType MatrixDT,
+          uint MatrixM, uint MatrixK, MatrixLayout MatrixLayout,
+          bool MatrixTranspose, typename BiasVectorBufferTy,
+          DataType BiasVectorDT>
+vector<OutputElTy, MatrixM>
+MulAdd(MatrixRefImpl<MatrixBufferTy, MatrixDT, MatrixM, MatrixK, MatrixLayout,
+                     MatrixTranspose>
+           Matrix,
+       InterpretedVector<InputElTy, InputElCount, InputDT> InputVector,
+       VectorRefImpl<BiasVectorBufferTy, BiasVectorDT> BiasVector) {
+
+  vector<OutputElTy, MatrixM> OutputVector;
+
+  __builtin_MatVecMulAdd(
+      /*out*/ OutputVector, details::IsUnsigned<OutputElTy>::Value,
+      InputVector.Data, details::IsUnsigned<InputElTy>::Value, InputDT,
+      Matrix.Buffer, Matrix.StartOffset, MatrixDT, MatrixM, MatrixK,
+      MatrixLayout, MatrixTranspose, Matrix.Stride, BiasVector.Buffer,
+      BiasVector.StartOffset, BiasVectorDT);
+
+  return OutputVector;
+}
+
+//
+// OuterProductAccumulate
+//
+
+template <typename ElTy, int MatrixM, int MatrixN, DataType MatrixDT,
+          MatrixLayout MatrixLayout>
+void OuterProductAccumulate(
+    vector<ElTy, MatrixM> InputVector1, vector<ElTy, MatrixN> InputVector2,
+    RWMatrixRef<MatrixDT, MatrixM, MatrixN, MatrixLayout, false> Matrix) {
+  __builtin_OuterProductAccumulate(InputVector1, InputVector2, Matrix.Buffer,
+                                   Matrix.StartOffset, MatrixDT, MatrixLayout,
+                                   Matrix.Stride);
+}
+
+//
+// VectorAccumulate
+//
+
+template <typename ElTy, int ElCount>
+void VectorAccumulate(vector<ElTy, ElCount> InputVector,
+                      RWByteAddressBuffer Buffer, uint Offset) {
+  __builtin_VectorAccumulate(InputVector, Buffer, Offset);
+}
+
+} // namespace linalg
+} // namespace dx
+
+#endif // SM 6.9 check and HV version check
diff --git a/contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.h b/contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.h
new file mode 100644
index 0000000..a53ab4c
--- /dev/null
+++ b/contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.h
@@ -0,0 +1,275 @@
+// Copyright (c) 2024 Google LLC
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef _HLSL_VK_KHR_COOPERATIVE_MATRIX_H_
+#define _HLSL_VK_KHR_COOPERATIVE_MATRIX_H_
+
+#if __SPIRV_MAJOR_VERSION__ == 1 && __SPIRV_MINOR_VERSION__ < 6
+#error "CooperativeMatrix requires a minimum of SPIR-V 1.6"
+#endif
+
+#include "vk/spirv.h"
+
+namespace vk {
+namespace khr {
+
+// The base cooperative matrix class. The template arguments correspond to the
+// operands in the OpTypeCooperativeMatrixKHR instruction.
+template <typename ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+class CooperativeMatrix {
+  template <class NewComponentType>
+  CooperativeMatrix<NewComponentType, scope, rows, columns, use> cast();
+
+  // Apply OpSNegate or OFNegate, depending on ComponentType, in a element by
+  // element manner.
+  CooperativeMatrix negate();
+
+  // Apply OpIAdd or OFAdd, depending on ComponentType, in a element by element
+  // manner.
+  CooperativeMatrix operator+(CooperativeMatrix other);
+
+  // Apply OpISub or OFSub, depending on ComponentType, in a element by element
+  // manner.
+  CooperativeMatrix operator-(CooperativeMatrix other);
+
+  // Apply OpIMul or OFMul, depending on ComponentType, in a element by element
+  // manner.
+  CooperativeMatrix operator*(CooperativeMatrix other);
+
+  // Apply OpSDiv, OpUDiv or OFDiv, depending on ComponentType, in a element by
+  // element manner.
+  CooperativeMatrix operator/(CooperativeMatrix other);
+
+  // Apply OpMatrixTimesScalar in a element by element manner.
+  CooperativeMatrix operator*(ComponentType scalar);
+
+  // Store the cooperative matrix using OpCooperativeMatrixStoreKHR to
+  // data using the given memory layout, stride, and memory access operands.
+  // `NonPrivatePointer` and `MakePointerAvailable` with the workgroup scope
+  // will be added to the memory access operands to make the memory coherent.
+  //
+  // This function uses a SPIR-V pointer because HLSL does not allow groupshared
+  // memory object to be passed by reference. The pointer is a hack to get
+  // around that.
+  //
+  // The layout and stride will be passed to the SPIR-V instruction as is. The
+  // precise meaning can be found in the specification for
+  // SPV_KHR_cooperative_matrix.
+  template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+            class Type>
+  void Store(WorkgroupSpirvPointer<Type> data, uint32_t stride);
+
+  // Same as above, but uses MemoryAccessMaskNone for the memory access
+  // operands.
+  template <CooperativeMatrixLayout layout, class Type>
+  void Store(WorkgroupSpirvPointer<Type> data, uint32_t stride) {
+    Store<MemoryAccessMaskNone, layout>(data, stride);
+  }
+
+  // Store the cooperative matrix using OpCooperativeMatrixStoreKHR to
+  // data[index] using the given memory layout, stride, and memory access
+  // operands. The layout and stride will be passed to the SPIR-V instruction as
+  // is. The precise meaning can be found in the specification for
+  // SPV_KHR_cooperative_matrix.
+  template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+            class Type>
+  void Store(RWStructuredBuffer<Type> data, uint32_t index, uint32_t stride);
+
+  // Same as above, but uses MemoryAccessMaskNone for the memory access
+  // operands.
+  template <CooperativeMatrixLayout layout, class Type>
+  void Store(RWStructuredBuffer<Type> data, uint32_t index, uint32_t stride) {
+    Store<MemoryAccessMaskNone, layout>(data, index, stride);
+  }
+
+  // Store the cooperative matrix using OpCooperativeMatrixStoreKHR to
+  // data[index] using the given memory layout, stride, and memory access
+  // operands. `NonPrivatePointer` and `MakePointerAvailable` with the
+  // QueueFamily scope will be added to the memory access operands to make the
+  // memory coherent.
+  //
+  // The layout and stride will be passed to the SPIR-V instruction as is. The
+  // precise meaning can be found in the specification for
+  // SPV_KHR_cooperative_matrix.
+  template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+            class Type>
+  void CoherentStore(globallycoherent RWStructuredBuffer<Type> data,
+                     uint32_t index, uint32_t stride);
+
+  // Same as above, but uses MemoryAccessMaskNone for the memory access operands
+  // template argument.
+  template <CooperativeMatrixLayout layout, class Type>
+  void CoherentStore(globallycoherent RWStructuredBuffer<Type> data,
+                     uint32_t index, uint32_t stride) {
+    CoherentStore<MemoryAccessMaskNone, layout>(data, index, stride);
+  }
+
+  // Loads a cooperative matrix using OpCooperativeMatrixLoadKHR from
+  // data using the given memory layout, stride, and memory access operands.
+  // `NonPrivatePointer` and `MakePointerVisible` with the workgroup scope
+  // will be added to the memory access operands to make the memory coherent.
+  //
+  // This function uses a SPIR-V pointer because HLSL does not allow groupshared
+  // memory object to be passed by reference. The pointer is a hack to get
+  // around that.
+  //
+  // The layout and stride will be passed to the SPIR-V instruction as is. The
+  // precise meaning can be found in the specification for
+  // SPV_KHR_cooperative_matrix.
+  template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+            class Type>
+  static CooperativeMatrix Load(WorkgroupSpirvPointer<Type> data,
+                                uint32_t stride);
+
+  // Same as above, but uses MemoryAccessMaskNone for the memory access
+  // operands.
+  template <CooperativeMatrixLayout layout, class Type>
+  static CooperativeMatrix Load(WorkgroupSpirvPointer<Type> data,
+                                uint32_t stride) {
+    return Load<MemoryAccessMaskNone, layout>(data, stride);
+  }
+
+  // Loads a cooperative matrix using OpCooperativeMatrixLoadKHR from
+  // data[index] using the given memory layout, stride, and memory access
+  // operands.
+  //
+  // The layout and stride will be passed to the SPIR-V instruction as is. The
+  // precise meaning can be found in the specification for
+  // SPV_KHR_cooperative_matrix.
+  template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+            class Type>
+  static CooperativeMatrix Load(RWStructuredBuffer<Type> data, uint32_t index,
+                                uint32_t stride);
+
+  // Same as above, but uses MemoryAccessMaskNone for the memory access
+  // operands.
+  template <CooperativeMatrixLayout layout, class Type>
+  static CooperativeMatrix Load(RWStructuredBuffer<Type> data, uint32_t index,
+                                uint32_t stride) {
+    return Load<MemoryAccessMaskNone, layout>(data, index, stride);
+  }
+
+  // Loads a cooperative matrix using OpCooperativeMatrixLoadKHR from
+  // data[index] using the given memory layout, stride, and memory access
+  // operands. `NonPrivatePointer` and `MakePointerVisible` with the QueueFamily
+  // scope will be added to the memory access operands to make the memory
+  // coherent.
+  //
+  //
+  // The layout and stride will be passed to the SPIR-V instruction as is. The
+  // precise meaning can be found in the specification for
+  // SPV_KHR_cooperative_matrix.
+  template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+            class Type>
+  static CooperativeMatrix
+  CoherentLoad(globallycoherent RWStructuredBuffer<Type> data, uint32_t index,
+               uint32_t stride);
+
+  // Same as above, but uses MemoryAccessMaskNone for the memory access operands
+  // template argument.
+  template <CooperativeMatrixLayout layout, class Type>
+  static CooperativeMatrix
+  CoherentLoad(globallycoherent RWStructuredBuffer<Type> data, uint32_t index,
+               uint32_t stride) {
+    return CoherentLoad<MemoryAccessMaskNone, layout>(data, index, stride);
+  }
+
+  // Loads a cooperative matrix using OpCooperativeMatrixLoadKHR from
+  // data[index] using the given memory layout, stride, and memory access
+  // operands. No memory access bits are added to the operands. Since the memory
+  // is readonly, there should be no need.
+  //
+  // The layout and stride will be passed to the SPIR-V instruction as is. The
+  // precise meaning can be found in the specification for
+  // SPV_KHR_cooperative_matrix.
+  template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+            class Type>
+  static CooperativeMatrix Load(StructuredBuffer<Type> data, uint32_t index,
+                                uint32_t stride);
+
+  // Same as above, but uses MemoryAccessMaskNone for the memory access
+  // operands.
+  template <CooperativeMatrixLayout layout, class Type>
+  static CooperativeMatrix Load(StructuredBuffer<Type> data, uint32_t index,
+                                uint32_t stride) {
+    return Load<MemoryAccessMaskNone, layout>(data, index, stride);
+  }
+
+  // Constructs a cooperative matrix with all values initialized to v. Note that
+  // all threads in scope must have the same value for v.
+  static CooperativeMatrix Splat(ComponentType v);
+
+  // Returns the result of OpCooperativeMatrixLengthKHR on the current type.￼
+  static uint32_t GetLength();
+
+  // Functions to access the elements of the cooperative matrix. The index must
+  // be less than GetLength().
+  void Set(ComponentType value, uint32_t index);
+  ComponentType Get(uint32_t index);
+
+  static const bool hasSignedIntegerComponentType =
+      (ComponentType(0) - ComponentType(1) < ComponentType(0));
+
+  // clang-format off
+  using SpirvMatrixType = vk::SpirvOpaqueType<
+      /* OpTypeCooperativeMatrixKHR */ 4456, ComponentType,
+      vk::integral_constant<uint, scope>, vk::integral_constant<uint, rows>,
+      vk::integral_constant<uint, columns>, vk::integral_constant<uint, use> >;
+
+  [[vk::ext_extension("SPV_KHR_cooperative_matrix")]]
+  [[vk::ext_capability(/* CooperativeMatrixKHRCapability */ 6022)]]
+  [[vk::ext_capability(/* VulkanMemoryModel */ 5345)]]
+  SpirvMatrixType _matrix;
+  // clang-format on
+};
+
+// Cooperative matrix that can be used in the "a" position of a multiply add
+// instruction (r = (a * b) + c).
+template <typename ComponentType, Scope scope, uint rows, uint columns>
+using CooperativeMatrixA =
+    CooperativeMatrix<ComponentType, scope, rows, columns,
+                      CooperativeMatrixUseMatrixAKHR>;
+
+// Cooperative matrix that can be used in the "b" position of a multiply add
+// instruction (r = (a * b) + c).
+template <typename ComponentType, Scope scope, uint rows, uint columns>
+using CooperativeMatrixB =
+    CooperativeMatrix<ComponentType, scope, rows, columns,
+                      CooperativeMatrixUseMatrixBKHR>;
+
+// Cooperative matrix that can be used in the "r" and "c" position of a multiply
+// add instruction (r = (a * b) + c).
+template <typename ComponentType, Scope scope, uint rows, uint columns>
+using CooperativeMatrixAccumulator =
+    CooperativeMatrix<ComponentType, scope, rows, columns,
+                      CooperativeMatrixUseMatrixAccumulatorKHR>;
+
+// Returns the result of OpCooperativeMatrixMulAddKHR when applied to a, b, and
+// c. The cooperative matrix operands are inferred, with the
+// SaturatingAccumulationKHR bit not set.
+template <typename ComponentType, Scope scope, uint rows, uint columns, uint K>
+CooperativeMatrixAccumulator<ComponentType, scope, rows, columns>
+cooperativeMatrixMultiplyAdd(
+    CooperativeMatrixA<ComponentType, scope, rows, K> a,
+    CooperativeMatrixB<ComponentType, scope, K, columns> b,
+    CooperativeMatrixAccumulator<ComponentType, scope, rows, columns> c);
+
+// Returns the result of OpCooperativeMatrixMulAddKHR when applied to a, b, and
+// c. The cooperative matrix operands are inferred, with the
+// SaturatingAccumulationKHR bit set.
+template <typename ComponentType, Scope scope, uint rows, uint columns, uint K>
+CooperativeMatrixAccumulator<ComponentType, scope, rows, columns>
+cooperativeMatrixSaturatingMultiplyAdd(
+    CooperativeMatrixA<ComponentType, scope, rows, K> a,
+    CooperativeMatrixB<ComponentType, scope, K, columns> b,
+    CooperativeMatrixAccumulator<ComponentType, scope, rows, columns> c);
+
+} // namespace khr
+} // namespace vk
+
+#include "cooperative_matrix.impl"
+#endif // _HLSL_VK_KHR_COOPERATIVE_MATRIX_H_
diff --git a/contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.impl b/contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.impl
new file mode 100644
index 0000000..2acae8e
--- /dev/null
+++ b/contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.impl
@@ -0,0 +1,377 @@
+// Copyright (c) 2024 Google LLC
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#include "vk/opcode_selector.h"
+
+template <typename ResultType, typename ComponentType>
+[[vk::ext_instruction(/* OpMatrixTimesScalar */ 143)]] ResultType
+__builtin_spv_MatrixTimesScalar(ResultType a, ComponentType b);
+
+template <typename ComponentType, vk::Scope scope, uint rows, uint columns,
+          vk::CooperativeMatrixUse use>
+[[vk::ext_instruction(/* OpCompositeExtract */ 81)]] ComponentType
+__builtin_spv_ExtractFromCooperativeMatrix(
+    typename vk::khr::CooperativeMatrix<ComponentType, scope, rows, columns,
+                                        use>::SpirvMatrixType matrix,
+    uint32_t index);
+
+template <typename CoopMatrixType, typename ComponentType>
+[[vk::ext_instruction(/* OpCompositeConstruct */ 80)]] CoopMatrixType
+__builtin_spv_ConstructCooperativeMatrix(ComponentType value);
+
+template <class ResultPointerType, class BaseType>
+[[vk::ext_instruction(/* OpAccessChain */ 65)]] ResultPointerType
+__builtin_spv_AccessChain([[vk::ext_reference]] BaseType base, uint32_t index);
+
+template <class ObjectType, class PointerType>
+[[vk::ext_instruction(/* OpLoad */ 61)]] ObjectType
+__builtin_spv_LoadPointer(PointerType base);
+
+template <class PointerType, class ObjectType>
+[[vk::ext_instruction(/* OpLoad */ 62)]] void
+__builtin_spv_StorePointer(PointerType base, ObjectType object);
+
+template <typename ComponentType, vk::Scope scope, uint rows, uint columns,
+          vk::CooperativeMatrixUse use>
+[[vk::ext_instruction(/* OpCompositeInsert */ 82)]]
+typename vk::khr::CooperativeMatrix<ComponentType, scope, rows, columns,
+                                    use>::SpirvMatrixType
+__builtin_spv_InsertIntoCooperativeMatrix(
+    ComponentType value,
+    typename vk::khr::CooperativeMatrix<ComponentType, scope, rows, columns,
+                                        use>::SpirvMatrixType matrix,
+    uint32_t index);
+
+// Define the load and store instructions
+template <typename ResultType, typename PointerType>
+[[vk::ext_instruction(/* OpCooperativeMatrixLoadKHR */ 4457)]] ResultType
+__builtin_spv_CooperativeMatrixLoadKHR(
+    [[vk::ext_reference]] PointerType pointer,
+    vk::CooperativeMatrixLayout memory_layout, uint stride,
+    [[vk::ext_literal]] uint32_t memory_operand);
+
+template <typename ResultType, typename PointerType>
+[[vk::ext_instruction(/* OpCooperativeMatrixLoadKHR */ 4457)]] ResultType
+__builtin_spv_CooperativeMatrixLoadKHR(
+    [[vk::ext_reference]] PointerType pointer,
+    vk::CooperativeMatrixLayout memory_layout, uint stride,
+    [[vk::ext_literal]] uint32_t memory_operand, vk::Scope scope);
+
+template <typename ResultType, typename PointerType>
+[[vk::ext_instruction(/* OpCooperativeMatrixLoadKHR */ 4457)]] ResultType
+__builtin_spv_CooperativeMatrixWorkgroupLoadKHR(
+    vk::WorkgroupSpirvPointer<PointerType> pointer,
+    vk::CooperativeMatrixLayout memory_layout, uint stride,
+    [[vk::ext_literal]] uint32_t memory_operand, vk::Scope scope);
+
+template <typename ObjectType, typename PointerType>
+[[vk::ext_instruction(/* OpCooperativeMatrixStoreKHR */ 4458)]] void
+__builtin_spv_CooperativeMatrixStoreKHR(
+    [[vk::ext_reference]] PointerType pointer, ObjectType object,
+    vk::CooperativeMatrixLayout memory_layout, uint stride,
+    [[vk::ext_literal]] uint32_t memory_operand, vk::Scope scope);
+
+template <typename ObjectType, typename PointerType>
+[[vk::ext_instruction(/* OpCooperativeMatrixStoreKHR */ 4458)]] void
+__builtin_spv_CooperativeMatrixStoreKHR(
+    [[vk::ext_reference]] PointerType pointer, ObjectType object,
+    vk::CooperativeMatrixLayout memory_layout, uint stride,
+    [[vk::ext_literal]] uint32_t memory_operand);
+
+template <typename ObjectType, typename PointerType>
+[[vk::ext_instruction(/* OpCooperativeMatrixStoreKHR */ 4458)]] void
+__builtin_spv_CooperativeMatrixWorkgroupStoreKHR(
+    vk::WorkgroupSpirvPointer<PointerType> pointer, ObjectType object,
+    vk::CooperativeMatrixLayout memory_layout, uint stride,
+    [[vk::ext_literal]] uint32_t memory_operand, vk::Scope scope);
+
+// We cannot define `OpCooperativeMatrixLengthKHR` using ext_instruction because
+// one of the operands is a type id. This builtin will have specific code in the
+// compiler to expand it.
+template <class MatrixType> uint __builtin_spv_CooperativeMatrixLengthKHR();
+
+// Arithmetic Instructions
+template <typename ResultType, typename MatrixTypeA, typename MatrixTypeB,
+          typename MatrixTypeC>
+[[vk::ext_instruction(/* OpCooperativeMatrixMulAddKHR */ 4459)]] ResultType
+__builtin_spv_CooperativeMatrixMulAddKHR(MatrixTypeA a, MatrixTypeB b,
+                                         MatrixTypeC c,
+                                         [[vk::ext_literal]] int operands);
+namespace vk {
+namespace khr {
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+template <class NewComponentType>
+CooperativeMatrix<NewComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::cast() {
+  using ResultType =
+      CooperativeMatrix<NewComponentType, scope, rows, columns, use>;
+  ResultType result;
+  result._matrix = util::ConversionSelector<ComponentType, NewComponentType>::
+      template Convert<typename ResultType::SpirvMatrixType>(_matrix);
+  return result;
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::negate() {
+  CooperativeMatrix result;
+  result._matrix = util::ArithmeticSelector<ComponentType>::Negate(_matrix);
+  return result;
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::operator+(
+    CooperativeMatrix other) {
+  CooperativeMatrix result;
+  result._matrix =
+      util::ArithmeticSelector<ComponentType>::Add(_matrix, other._matrix);
+  return result;
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::operator-(
+    CooperativeMatrix other) {
+  CooperativeMatrix result;
+  result._matrix =
+      util::ArithmeticSelector<ComponentType>::Sub(_matrix, other._matrix);
+  return result;
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::operator*(
+    CooperativeMatrix other) {
+  CooperativeMatrix result;
+  result._matrix =
+      util::ArithmeticSelector<ComponentType>::Mul(_matrix, other._matrix);
+  return result;
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::operator/(
+    CooperativeMatrix other) {
+  CooperativeMatrix result;
+  result._matrix =
+      util::ArithmeticSelector<ComponentType>::Div(_matrix, other._matrix);
+  return result;
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::operator*(
+    ComponentType scalar) {
+  CooperativeMatrix result;
+  result._matrix = __builtin_spv_MatrixTimesScalar(_matrix, scalar);
+  return result;
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+          class Type>
+void CooperativeMatrix<ComponentType, scope, rows, columns, use>::Store(
+    WorkgroupSpirvPointer<Type> data, uint32_t stride) {
+  __builtin_spv_CooperativeMatrixWorkgroupStoreKHR(
+      data, _matrix, layout, stride,
+      memoryAccessOperands | MemoryAccessNonPrivatePointerMask |
+          MemoryAccessMakePointerAvailableMask,
+      ScopeWorkgroup);
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+          class Type>
+void CooperativeMatrix<ComponentType, scope, rows, columns, use>::Store(
+    RWStructuredBuffer<Type> data, uint32_t index, uint32_t stride) {
+  __builtin_spv_CooperativeMatrixStoreKHR(data[index], _matrix, layout, stride,
+                                          memoryAccessOperands);
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+          class Type>
+void CooperativeMatrix<ComponentType, scope, rows, columns, use>::CoherentStore(
+    globallycoherent RWStructuredBuffer<Type> data, uint32_t index,
+    uint32_t stride) {
+  __builtin_spv_CooperativeMatrixStoreKHR(
+      data[index], _matrix, layout, stride,
+      memoryAccessOperands | MemoryAccessNonPrivatePointerMask |
+          MemoryAccessMakePointerAvailableMask,
+      ScopeQueueFamily);
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+          class Type>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::Load(
+    vk::WorkgroupSpirvPointer<Type> buffer, uint32_t stride) {
+  CooperativeMatrix result;
+  result._matrix =
+      __builtin_spv_CooperativeMatrixWorkgroupLoadKHR<SpirvMatrixType>(
+          buffer, layout, stride,
+          memoryAccessOperands | MemoryAccessNonPrivatePointerMask |
+              MemoryAccessMakePointerVisibleMask,
+          ScopeWorkgroup);
+  return result;
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+          class Type>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::Load(
+    RWStructuredBuffer<Type> buffer, uint32_t index, uint32_t stride) {
+  CooperativeMatrix result;
+  result._matrix = __builtin_spv_CooperativeMatrixLoadKHR<SpirvMatrixType>(
+      buffer[index], layout, stride, memoryAccessOperands);
+  return result;
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+          class Type>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::CoherentLoad(
+    RWStructuredBuffer<Type> buffer, uint32_t index, uint32_t stride) {
+  CooperativeMatrix result;
+  result._matrix = __builtin_spv_CooperativeMatrixLoadKHR<SpirvMatrixType>(
+      buffer[index], layout, stride,
+      memoryAccessOperands | MemoryAccessNonPrivatePointerMask |
+          MemoryAccessMakePointerVisibleMask,
+      ScopeQueueFamily);
+  return result;
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
+          class Type>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::Load(
+    StructuredBuffer<Type> buffer, uint32_t index, uint32_t stride) {
+  CooperativeMatrix result;
+  result._matrix = __builtin_spv_CooperativeMatrixLoadKHR<SpirvMatrixType>(
+      buffer[index], layout, stride, MemoryAccessMaskNone);
+  return result;
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>
+CooperativeMatrix<ComponentType, scope, rows, columns, use>::Splat(
+    ComponentType v) {
+  CooperativeMatrix result;
+  result._matrix = __builtin_spv_ConstructCooperativeMatrix<SpirvMatrixType>(v);
+  return result;
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+uint CooperativeMatrix<ComponentType, scope, rows, columns, use>::GetLength() {
+  return __builtin_spv_CooperativeMatrixLengthKHR<SpirvMatrixType>();
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+ComponentType CooperativeMatrix<ComponentType, scope, rows, columns, use>::Get(
+    uint32_t index) {
+  // clang-format off
+  using ComponentPtr = vk::SpirvOpaqueType<
+      /* OpTypePointer */ 32,
+      /* function storage class */ vk::Literal<vk::integral_constant<uint, 7> >,
+      ComponentType>;
+  // clang-format on
+  ComponentPtr ptr = __builtin_spv_AccessChain<ComponentPtr>(_matrix, index);
+  return __builtin_spv_LoadPointer<ComponentType>(ptr);
+}
+
+template <class ComponentType, Scope scope, uint rows, uint columns,
+          CooperativeMatrixUse use>
+void CooperativeMatrix<ComponentType, scope, rows, columns, use>::Set(
+    ComponentType value, uint32_t index) {
+  // clang-format off
+  using ComponentPtr = vk::SpirvOpaqueType<
+      /* OpTypePointer */ 32,
+      /* function storage class */ vk::Literal<vk::integral_constant<uint, 7> >,
+      ComponentType>;
+  // clang-format on
+  ComponentPtr ptr = __builtin_spv_AccessChain<ComponentPtr>(_matrix, index);
+  return __builtin_spv_StorePointer(ptr, value);
+}
+
+template <typename ComponentType, Scope scope, uint rows, uint columns, uint K>
+CooperativeMatrixAccumulator<ComponentType, scope, rows, columns>
+cooperativeMatrixMultiplyAdd(
+    CooperativeMatrixA<ComponentType, scope, rows, K> a,
+    CooperativeMatrixB<ComponentType, scope, K, columns> b,
+    CooperativeMatrixAccumulator<ComponentType, scope, rows, columns> c) {
+
+  const vk::CooperativeMatrixOperandsMask allSignedComponents =
+      vk::CooperativeMatrixOperandsMatrixASignedComponentsKHRMask |
+      vk::CooperativeMatrixOperandsMatrixBSignedComponentsKHRMask |
+      vk::CooperativeMatrixOperandsMatrixCSignedComponentsKHRMask |
+      vk::CooperativeMatrixOperandsMatrixResultSignedComponentsKHRMask;
+
+  const vk::CooperativeMatrixOperandsMask operands =
+      (vk::CooperativeMatrixOperandsMask)(
+          a.hasSignedIntegerComponentType
+              ? allSignedComponents
+              : vk::CooperativeMatrixOperandsMaskNone);
+
+  CooperativeMatrixAccumulator<ComponentType, scope, rows, columns> result;
+  result._matrix = __builtin_spv_CooperativeMatrixMulAddKHR<
+      typename CooperativeMatrixAccumulator<ComponentType, scope, rows,
+                                            columns>::SpirvMatrixType>(
+      a._matrix, b._matrix, c._matrix, operands);
+  return result;
+}
+
+template <typename ComponentType, Scope scope, uint rows, uint columns, uint K>
+CooperativeMatrixAccumulator<ComponentType, scope, rows, columns>
+cooperativeMatrixSaturatingMultiplyAdd(
+    CooperativeMatrixA<ComponentType, scope, rows, K> a,
+    CooperativeMatrixB<ComponentType, scope, K, columns> b,
+    CooperativeMatrixAccumulator<ComponentType, scope, rows, columns> c) {
+
+  const vk::CooperativeMatrixOperandsMask allSignedComponents =
+      vk::CooperativeMatrixOperandsMatrixASignedComponentsKHRMask |
+      vk::CooperativeMatrixOperandsMatrixBSignedComponentsKHRMask |
+      vk::CooperativeMatrixOperandsMatrixCSignedComponentsKHRMask |
+      vk::CooperativeMatrixOperandsMatrixResultSignedComponentsKHRMask |
+      vk::CooperativeMatrixOperandsSaturatingAccumulationKHRMask;
+
+  const vk::CooperativeMatrixOperandsMask operands =
+      (vk::CooperativeMatrixOperandsMask)(
+          a.hasSignedIntegerComponentType
+              ? allSignedComponents
+              : vk::CooperativeMatrixOperandsSaturatingAccumulationKHRMask);
+  CooperativeMatrixAccumulator<ComponentType, scope, rows, columns> result;
+  result._matrix = __builtin_spv_CooperativeMatrixMulAddKHR<
+      typename CooperativeMatrixAccumulator<ComponentType, scope, rows,
+                                            columns>::SpirvMatrixType>(
+      a._matrix, b._matrix, c._matrix, operands);
+  return result;
+}
+
+} // namespace khr
+} // namespace vk
diff --git a/contrib/dxc_2025_07_14/inc/hlsl/vk/opcode_selector.h b/contrib/dxc_2025_07_14/inc/hlsl/vk/opcode_selector.h
new file mode 100644
index 0000000..bc8672c
--- /dev/null
+++ b/contrib/dxc_2025_07_14/inc/hlsl/vk/opcode_selector.h
@@ -0,0 +1,227 @@
+// Copyright (c) 2024 Google LLC
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef _HLSL_VK_KHR_OPCODE_SELECTOR_H_
+#define _HLSL_VK_KHR_OPCODE_SELECTOR_H_
+
+#define DECLARE_UNARY_OP(name, opcode)                                         \
+  template <typename ResultType>                                               \
+  [[vk::ext_instruction(opcode)]] ResultType __builtin_spv_##name(             \
+      ResultType a)
+
+DECLARE_UNARY_OP(CopyObject, 83);
+DECLARE_UNARY_OP(SNegate, 126);
+DECLARE_UNARY_OP(FNegate, 127);
+
+#define DECLARE_CONVERSION_OP(name, opcode)                                    \
+  template <typename ResultType, typename OperandType>                         \
+  [[vk::ext_instruction(opcode)]] ResultType __builtin_spv_##name(             \
+      OperandType a)
+
+DECLARE_CONVERSION_OP(ConvertFtoU, 109);
+DECLARE_CONVERSION_OP(ConvertFtoS, 110);
+DECLARE_CONVERSION_OP(ConvertSToF, 111);
+DECLARE_CONVERSION_OP(ConvertUToF, 112);
+DECLARE_CONVERSION_OP(UConvert, 113);
+DECLARE_CONVERSION_OP(SConvert, 114);
+DECLARE_CONVERSION_OP(FConvert, 115);
+DECLARE_CONVERSION_OP(Bitcast, 124);
+
+#undef DECLARY_UNARY_OP
+
+#define DECLARE_BINOP(name, opcode)                                            \
+  template <typename ResultType>                                               \
+  [[vk::ext_instruction(opcode)]] ResultType __builtin_spv_##name(             \
+      ResultType a, ResultType b)
+
+DECLARE_BINOP(IAdd, 128);
+DECLARE_BINOP(FAdd, 129);
+DECLARE_BINOP(ISub, 130);
+DECLARE_BINOP(FSub, 131);
+DECLARE_BINOP(IMul, 132);
+DECLARE_BINOP(FMul, 133);
+DECLARE_BINOP(UDiv, 134);
+DECLARE_BINOP(SDiv, 135);
+DECLARE_BINOP(FDiv, 136);
+
+#undef DECLARE_BINOP
+namespace vk {
+namespace util {
+
+template <class ComponentType> class ArithmeticSelector;
+
+#define ARITHMETIC_SELECTOR(BaseType, OpNegate, OpAdd, OpSub, OpMul, OpDiv,    \
+                            SIGNED_INTEGER_TYPE)                               \
+  template <> class ArithmeticSelector<BaseType> {                             \
+    template <class T> static T Negate(T a) { return OpNegate(a); }            \
+    template <class T> static T Add(T a, T b) { return OpAdd(a, b); }          \
+    template <class T> static T Sub(T a, T b) { return OpSub(a, b); }          \
+    template <class T> static T Mul(T a, T b) { return OpMul(a, b); }          \
+    template <class T> static T Div(T a, T b) { return OpDiv(a, b); }          \
+  };
+
+ARITHMETIC_SELECTOR(half, __builtin_spv_FNegate, __builtin_spv_FAdd,
+                    __builtin_spv_FSub, __builtin_spv_FMul, __builtin_spv_FDiv,
+                    false);
+ARITHMETIC_SELECTOR(float, __builtin_spv_FNegate, __builtin_spv_FAdd,
+                    __builtin_spv_FSub, __builtin_spv_FMul, __builtin_spv_FDiv,
+                    false);
+ARITHMETIC_SELECTOR(double, __builtin_spv_FNegate, __builtin_spv_FAdd,
+                    __builtin_spv_FSub, __builtin_spv_FMul, __builtin_spv_FDiv,
+                    false);
+
+#if __HLSL_ENABLE_16_BIT
+ARITHMETIC_SELECTOR(int16_t, __builtin_spv_SNegate, __builtin_spv_IAdd,
+                    __builtin_spv_ISub, __builtin_spv_IMul, __builtin_spv_SDiv,
+                    true);
+ARITHMETIC_SELECTOR(uint16_t, __builtin_spv_SNegate, __builtin_spv_IAdd,
+                    __builtin_spv_ISub, __builtin_spv_IMul, __builtin_spv_UDiv,
+                    false);
+#endif // __HLSL_ENABLE_16_BIT
+
+ARITHMETIC_SELECTOR(int32_t, __builtin_spv_SNegate, __builtin_spv_IAdd,
+                    __builtin_spv_ISub, __builtin_spv_IMul, __builtin_spv_SDiv,
+                    true);
+ARITHMETIC_SELECTOR(int64_t, __builtin_spv_SNegate, __builtin_spv_IAdd,
+                    __builtin_spv_ISub, __builtin_spv_IMul, __builtin_spv_SDiv,
+                    true);
+ARITHMETIC_SELECTOR(uint32_t, __builtin_spv_SNegate, __builtin_spv_IAdd,
+                    __builtin_spv_ISub, __builtin_spv_IMul, __builtin_spv_UDiv,
+                    false);
+ARITHMETIC_SELECTOR(uint64_t, __builtin_spv_SNegate, __builtin_spv_IAdd,
+                    __builtin_spv_ISub, __builtin_spv_IMul, __builtin_spv_UDiv,
+                    false);
+
+// The conversion selector is will be used to convert one type to another
+// using the SPIR-V conversion instructions. See
+// https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_conversion_instructions.
+// SourceType and TargetType must be integer or floating point scalar type.
+
+// ConversionSelector::Convert converts an object of type S to an object of type
+// T. S must be SourceType, a vector of SourceType, or a cooperative matrix of
+// SourceType. T must be TargetType, a vector of TargetType, or a cooperative
+// matrix of TargetType. T must have the same number of components as S. T is a
+// cooperative matrix if and only if S is a cooperative matrix.
+template <class SourceType, class TargetType> class ConversionSelector;
+
+#define CONVERSION_SELECTOR(SourceType, TargetType, OpConvert)                 \
+  template <> class ConversionSelector<SourceType, TargetType> {               \
+    template <class T, class S> static T Convert(S a) {                        \
+      return OpConvert<T>(a);                                                  \
+    }                                                                          \
+  };
+
+#if __HLSL_ENABLE_16_BIT
+CONVERSION_SELECTOR(uint16_t, uint16_t, __builtin_spv_CopyObject);
+CONVERSION_SELECTOR(uint16_t, int16_t, __builtin_spv_Bitcast);
+CONVERSION_SELECTOR(uint16_t, uint32_t, __builtin_spv_UConvert);
+CONVERSION_SELECTOR(uint16_t, int32_t, __builtin_spv_SConvert);
+CONVERSION_SELECTOR(uint16_t, uint64_t, __builtin_spv_UConvert);
+CONVERSION_SELECTOR(uint16_t, int64_t, __builtin_spv_SConvert);
+CONVERSION_SELECTOR(uint16_t, half, __builtin_spv_ConvertUToF);
+CONVERSION_SELECTOR(uint16_t, float, __builtin_spv_ConvertUToF);
+CONVERSION_SELECTOR(uint16_t, double, __builtin_spv_ConvertUToF);
+
+CONVERSION_SELECTOR(int16_t, uint16_t, __builtin_spv_Bitcast);
+CONVERSION_SELECTOR(int16_t, int16_t, __builtin_spv_CopyObject);
+CONVERSION_SELECTOR(int16_t, uint32_t, __builtin_spv_UConvert);
+CONVERSION_SELECTOR(int16_t, int32_t, __builtin_spv_SConvert);
+CONVERSION_SELECTOR(int16_t, uint64_t, __builtin_spv_UConvert);
+CONVERSION_SELECTOR(int16_t, int64_t, __builtin_spv_SConvert);
+CONVERSION_SELECTOR(int16_t, half, __builtin_spv_ConvertSToF);
+CONVERSION_SELECTOR(int16_t, float, __builtin_spv_ConvertSToF);
+CONVERSION_SELECTOR(int16_t, double, __builtin_spv_ConvertSToF);
+
+CONVERSION_SELECTOR(uint32_t, uint16_t, __builtin_spv_UConvert);
+CONVERSION_SELECTOR(uint32_t, int16_t, __builtin_spv_SConvert);
+
+CONVERSION_SELECTOR(int32_t, uint16_t, __builtin_spv_UConvert);
+CONVERSION_SELECTOR(int32_t, int16_t, __builtin_spv_SConvert);
+
+CONVERSION_SELECTOR(uint64_t, uint16_t, __builtin_spv_UConvert);
+CONVERSION_SELECTOR(uint64_t, int16_t, __builtin_spv_SConvert);
+
+CONVERSION_SELECTOR(int64_t, uint16_t, __builtin_spv_UConvert);
+CONVERSION_SELECTOR(int64_t, int16_t, __builtin_spv_SConvert);
+
+CONVERSION_SELECTOR(half, uint16_t, __builtin_spv_ConvertFtoU);
+CONVERSION_SELECTOR(half, int16_t, __builtin_spv_ConvertFtoS);
+
+CONVERSION_SELECTOR(float, uint16_t, __builtin_spv_ConvertFtoU);
+CONVERSION_SELECTOR(float, int16_t, __builtin_spv_ConvertFtoS);
+
+CONVERSION_SELECTOR(double, uint16_t, __builtin_spv_ConvertFtoU);
+CONVERSION_SELECTOR(double, int16_t, __builtin_spv_ConvertFtoS);
+#endif
+
+CONVERSION_SELECTOR(uint32_t, uint32_t, __builtin_spv_CopyObject);
+CONVERSION_SELECTOR(uint32_t, int32_t, __builtin_spv_Bitcast);
+CONVERSION_SELECTOR(uint32_t, uint64_t, __builtin_spv_UConvert);
+CONVERSION_SELECTOR(uint32_t, int64_t, __builtin_spv_SConvert);
+CONVERSION_SELECTOR(uint32_t, half, __builtin_spv_ConvertUToF);
+CONVERSION_SELECTOR(uint32_t, float, __builtin_spv_ConvertUToF);
+CONVERSION_SELECTOR(uint32_t, double, __builtin_spv_ConvertUToF);
+
+CONVERSION_SELECTOR(int32_t, uint32_t, __builtin_spv_Bitcast);
+CONVERSION_SELECTOR(int32_t, int32_t, __builtin_spv_CopyObject);
+CONVERSION_SELECTOR(int32_t, uint64_t, __builtin_spv_UConvert);
+CONVERSION_SELECTOR(int32_t, int64_t, __builtin_spv_SConvert);
+CONVERSION_SELECTOR(int32_t, half, __builtin_spv_ConvertSToF);
+CONVERSION_SELECTOR(int32_t, float, __builtin_spv_ConvertSToF);
+CONVERSION_SELECTOR(int32_t, double, __builtin_spv_ConvertSToF);
+
+CONVERSION_SELECTOR(uint64_t, uint32_t, __builtin_spv_UConvert);
+CONVERSION_SELECTOR(uint64_t, int32_t, __builtin_spv_SConvert);
+CONVERSION_SELECTOR(uint64_t, uint64_t, __builtin_spv_Bitcast);
+CONVERSION_SELECTOR(uint64_t, int64_t, __builtin_spv_CopyObject);
+CONVERSION_SELECTOR(uint64_t, half, __builtin_spv_ConvertUToF);
+CONVERSION_SELECTOR(uint64_t, float, __builtin_spv_ConvertUToF);
+CONVERSION_SELECTOR(uint64_t, double, __builtin_spv_ConvertUToF);
+
+CONVERSION_SELECTOR(int64_t, uint32_t, __builtin_spv_UConvert);
+CONVERSION_SELECTOR(int64_t, int32_t, __builtin_spv_SConvert);
+CONVERSION_SELECTOR(int64_t, uint64_t, __builtin_spv_Bitcast);
+CONVERSION_SELECTOR(int64_t, int64_t, __builtin_spv_CopyObject);
+CONVERSION_SELECTOR(int64_t, half, __builtin_spv_ConvertSToF);
+CONVERSION_SELECTOR(int64_t, float, __builtin_spv_ConvertSToF);
+CONVERSION_SELECTOR(int64_t, double, __builtin_spv_ConvertSToF);
+
+CONVERSION_SELECTOR(half, uint32_t, __builtin_spv_ConvertFtoU);
+CONVERSION_SELECTOR(half, int32_t, __builtin_spv_ConvertFtoS);
+CONVERSION_SELECTOR(half, uint64_t, __builtin_spv_ConvertFtoU);
+CONVERSION_SELECTOR(half, int64_t, __builtin_spv_ConvertFtoS);
+CONVERSION_SELECTOR(half, half, __builtin_spv_CopyObject);
+#if __HLSL_ENABLE_16_BIT
+CONVERSION_SELECTOR(half, float, __builtin_spv_FConvert);
+#else
+CONVERSION_SELECTOR(half, float, __builtin_spv_CopyObject);
+#endif
+
+CONVERSION_SELECTOR(half, double, __builtin_spv_FConvert);
+
+CONVERSION_SELECTOR(float, uint32_t, __builtin_spv_ConvertFtoU);
+CONVERSION_SELECTOR(float, int32_t, __builtin_spv_ConvertFtoS);
+CONVERSION_SELECTOR(float, uint64_t, __builtin_spv_ConvertFtoU);
+CONVERSION_SELECTOR(float, int64_t, __builtin_spv_ConvertFtoS);
+#if __HLSL_ENABLE_16_BIT
+CONVERSION_SELECTOR(float, half, __builtin_spv_FConvert);
+#else
+CONVERSION_SELECTOR(float, half, __builtin_spv_CopyObject);
+#endif
+CONVERSION_SELECTOR(float, float, __builtin_spv_CopyObject);
+CONVERSION_SELECTOR(float, double, __builtin_spv_FConvert);
+
+CONVERSION_SELECTOR(double, uint32_t, __builtin_spv_ConvertFtoU);
+CONVERSION_SELECTOR(double, int32_t, __builtin_spv_ConvertFtoS);
+CONVERSION_SELECTOR(double, uint64_t, __builtin_spv_ConvertFtoU);
+CONVERSION_SELECTOR(double, int64_t, __builtin_spv_ConvertFtoS);
+CONVERSION_SELECTOR(double, half, __builtin_spv_FConvert);
+CONVERSION_SELECTOR(double, float, __builtin_spv_FConvert);
+CONVERSION_SELECTOR(double, double, __builtin_spv_CopyObject);
+}; // namespace util
+} // namespace vk
+
+#endif // _HLSL_VK_KHR_OPCODE_SELECTOR_H_
diff --git a/contrib/dxc_2025_07_14/inc/hlsl/vk/spirv.h b/contrib/dxc_2025_07_14/inc/hlsl/vk/spirv.h
new file mode 100644
index 0000000..69bb53b
--- /dev/null
+++ b/contrib/dxc_2025_07_14/inc/hlsl/vk/spirv.h
@@ -0,0 +1,85 @@
+// Copyright (c) 2024 Google LLC
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+#ifndef _HLSL_VK_SPIRV_H_
+#define _HLSL_VK_SPIRV_H_
+
+namespace vk {
+
+enum CooperativeMatrixUse {
+  CooperativeMatrixUseMatrixAKHR = 0,
+  CooperativeMatrixUseMatrixBKHR = 1,
+  CooperativeMatrixUseMatrixAccumulatorKHR = 2,
+  CooperativeMatrixUseMax = 0x7fffffff,
+};
+
+enum CooperativeMatrixLayout {
+  CooperativeMatrixLayoutRowMajorKHR = 0,
+  CooperativeMatrixLayoutColumnMajorKHR = 1,
+  CooperativeMatrixLayoutRowBlockedInterleavedARM = 4202,
+  CooperativeMatrixLayoutColumnBlockedInterleavedARM = 4203,
+  CooperativeMatrixLayoutMax = 0x7fffffff,
+};
+
+enum CooperativeMatrixOperandsMask {
+  CooperativeMatrixOperandsMaskNone = 0,
+  CooperativeMatrixOperandsMatrixASignedComponentsKHRMask = 0x00000001,
+  CooperativeMatrixOperandsMatrixBSignedComponentsKHRMask = 0x00000002,
+  CooperativeMatrixOperandsMatrixCSignedComponentsKHRMask = 0x00000004,
+  CooperativeMatrixOperandsMatrixResultSignedComponentsKHRMask = 0x00000008,
+  CooperativeMatrixOperandsSaturatingAccumulationKHRMask = 0x00000010,
+};
+
+enum MemoryAccessMask {
+  MemoryAccessMaskNone = 0,
+  MemoryAccessVolatileMask = 0x00000001,
+  MemoryAccessAlignedMask = 0x00000002,
+  MemoryAccessNontemporalMask = 0x00000004,
+  MemoryAccessMakePointerAvailableMask = 0x00000008,
+  MemoryAccessMakePointerAvailableKHRMask = 0x00000008,
+  MemoryAccessMakePointerVisibleMask = 0x00000010,
+  MemoryAccessMakePointerVisibleKHRMask = 0x00000010,
+  MemoryAccessNonPrivatePointerMask = 0x00000020,
+  MemoryAccessNonPrivatePointerKHRMask = 0x00000020,
+  MemoryAccessAliasScopeINTELMaskMask = 0x00010000,
+  MemoryAccessNoAliasINTELMaskMask = 0x00020000,
+};
+
+enum Scope {
+  ScopeCrossDevice = 0,
+  ScopeDevice = 1,
+  ScopeWorkgroup = 2,
+  ScopeSubgroup = 3,
+  ScopeInvocation = 4,
+  ScopeQueueFamily = 5,
+  ScopeQueueFamilyKHR = 5,
+  ScopeShaderCallKHR = 6,
+  ScopeMax = 0x7fffffff,
+};
+
+enum StorageClass {
+  StorageClassWorkgroup = 4,
+};
+
+// An opaque type to represent a Spir-V pointer to the workgroup storage class.
+// clang-format off
+template <typename PointeeType>
+using WorkgroupSpirvPointer = const vk::SpirvOpaqueType<
+    /* OpTypePointer */ 32,
+    vk::Literal<vk::integral_constant<uint, StorageClassWorkgroup> >,
+    PointeeType>;
+// clang-format on
+
+// Returns an opaque Spir-V pointer to v. The memory object v's storage class
+// modifier must be groupshared. If the incorrect storage class is used, then
+// there will be a validation error, and it will not show the correct
+template <typename T>
+[[vk::ext_instruction(/* OpCopyObject */ 83)]] WorkgroupSpirvPointer<T>
+GetGroupSharedAddress([[vk::ext_reference]] T v);
+
+} // namespace vk
+
+#endif // _HLSL_VK_SPIRV_H_
-- 
cgit v1.2.3