contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275

// Copyright (c) 2024 Google LLC
//
// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

#ifndef _HLSL_VK_KHR_COOPERATIVE_MATRIX_H_
#define _HLSL_VK_KHR_COOPERATIVE_MATRIX_H_

#if __SPIRV_MAJOR_VERSION__ == 1 && __SPIRV_MINOR_VERSION__ < 6
#error "CooperativeMatrix requires a minimum of SPIR-V 1.6"
#endif

#include "vk/spirv.h"

namespace vk {
namespace khr {

// The base cooperative matrix class. The template arguments correspond to the
// operands in the OpTypeCooperativeMatrixKHR instruction.
template <typename ComponentType, Scope scope, uint rows, uint columns,
          CooperativeMatrixUse use>
class CooperativeMatrix {
  template <class NewComponentType>
  CooperativeMatrix<NewComponentType, scope, rows, columns, use> cast();

  // Apply OpSNegate or OFNegate, depending on ComponentType, in a element by
  // element manner.
  CooperativeMatrix negate();

  // Apply OpIAdd or OFAdd, depending on ComponentType, in a element by element
  // manner.
  CooperativeMatrix operator+(CooperativeMatrix other);

  // Apply OpISub or OFSub, depending on ComponentType, in a element by element
  // manner.
  CooperativeMatrix operator-(CooperativeMatrix other);

  // Apply OpIMul or OFMul, depending on ComponentType, in a element by element
  // manner.
  CooperativeMatrix operator*(CooperativeMatrix other);

  // Apply OpSDiv, OpUDiv or OFDiv, depending on ComponentType, in a element by
  // element manner.
  CooperativeMatrix operator/(CooperativeMatrix other);

  // Apply OpMatrixTimesScalar in a element by element manner.
  CooperativeMatrix operator*(ComponentType scalar);

  // Store the cooperative matrix using OpCooperativeMatrixStoreKHR to
  // data using the given memory layout, stride, and memory access operands.
  // `NonPrivatePointer` and `MakePointerAvailable` with the workgroup scope
  // will be added to the memory access operands to make the memory coherent.
  //
  // This function uses a SPIR-V pointer because HLSL does not allow groupshared
  // memory object to be passed by reference. The pointer is a hack to get
  // around that.
  //
  // The layout and stride will be passed to the SPIR-V instruction as is. The
  // precise meaning can be found in the specification for
  // SPV_KHR_cooperative_matrix.
  template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
            class Type>
  void Store(WorkgroupSpirvPointer<Type> data, uint32_t stride);

  // Same as above, but uses MemoryAccessMaskNone for the memory access
  // operands.
  template <CooperativeMatrixLayout layout, class Type>
  void Store(WorkgroupSpirvPointer<Type> data, uint32_t stride) {
    Store<MemoryAccessMaskNone, layout>(data, stride);
  }

  // Store the cooperative matrix using OpCooperativeMatrixStoreKHR to
  // data[index] using the given memory layout, stride, and memory access
  // operands. The layout and stride will be passed to the SPIR-V instruction as
  // is. The precise meaning can be found in the specification for
  // SPV_KHR_cooperative_matrix.
  template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
            class Type>
  void Store(RWStructuredBuffer<Type> data, uint32_t index, uint32_t stride);

  // Same as above, but uses MemoryAccessMaskNone for the memory access
  // operands.
  template <CooperativeMatrixLayout layout, class Type>
  void Store(RWStructuredBuffer<Type> data, uint32_t index, uint32_t stride) {
    Store<MemoryAccessMaskNone, layout>(data, index, stride);
  }

  // Store the cooperative matrix using OpCooperativeMatrixStoreKHR to
  // data[index] using the given memory layout, stride, and memory access
  // operands. `NonPrivatePointer` and `MakePointerAvailable` with the
  // QueueFamily scope will be added to the memory access operands to make the
  // memory coherent.
  //
  // The layout and stride will be passed to the SPIR-V instruction as is. The
  // precise meaning can be found in the specification for
  // SPV_KHR_cooperative_matrix.
  template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
            class Type>
  void CoherentStore(globallycoherent RWStructuredBuffer<Type> data,
                     uint32_t index, uint32_t stride);

  // Same as above, but uses MemoryAccessMaskNone for the memory access operands
  // template argument.
  template <CooperativeMatrixLayout layout, class Type>
  void CoherentStore(globallycoherent RWStructuredBuffer<Type> data,
                     uint32_t index, uint32_t stride) {
    CoherentStore<MemoryAccessMaskNone, layout>(data, index, stride);
  }

  // Loads a cooperative matrix using OpCooperativeMatrixLoadKHR from
  // data using the given memory layout, stride, and memory access operands.
  // `NonPrivatePointer` and `MakePointerVisible` with the workgroup scope
  // will be added to the memory access operands to make the memory coherent.
  //
  // This function uses a SPIR-V pointer because HLSL does not allow groupshared
  // memory object to be passed by reference. The pointer is a hack to get
  // around that.
  //
  // The layout and stride will be passed to the SPIR-V instruction as is. The
  // precise meaning can be found in the specification for
  // SPV_KHR_cooperative_matrix.
  template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
            class Type>
  static CooperativeMatrix Load(WorkgroupSpirvPointer<Type> data,
                                uint32_t stride);

  // Same as above, but uses MemoryAccessMaskNone for the memory access
  // operands.
  template <CooperativeMatrixLayout layout, class Type>
  static CooperativeMatrix Load(WorkgroupSpirvPointer<Type> data,
                                uint32_t stride) {
    return Load<MemoryAccessMaskNone, layout>(data, stride);
  }

  // Loads a cooperative matrix using OpCooperativeMatrixLoadKHR from
  // data[index] using the given memory layout, stride, and memory access
  // operands.
  //
  // The layout and stride will be passed to the SPIR-V instruction as is. The
  // precise meaning can be found in the specification for
  // SPV_KHR_cooperative_matrix.
  template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
            class Type>
  static CooperativeMatrix Load(RWStructuredBuffer<Type> data, uint32_t index,
                                uint32_t stride);

  // Same as above, but uses MemoryAccessMaskNone for the memory access
  // operands.
  template <CooperativeMatrixLayout layout, class Type>
  static CooperativeMatrix Load(RWStructuredBuffer<Type> data, uint32_t index,
                                uint32_t stride) {
    return Load<MemoryAccessMaskNone, layout>(data, index, stride);
  }

  // Loads a cooperative matrix using OpCooperativeMatrixLoadKHR from
  // data[index] using the given memory layout, stride, and memory access
  // operands. `NonPrivatePointer` and `MakePointerVisible` with the QueueFamily
  // scope will be added to the memory access operands to make the memory
  // coherent.
  //
  //
  // The layout and stride will be passed to the SPIR-V instruction as is. The
  // precise meaning can be found in the specification for
  // SPV_KHR_cooperative_matrix.
  template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
            class Type>
  static CooperativeMatrix
  CoherentLoad(globallycoherent RWStructuredBuffer<Type> data, uint32_t index,
               uint32_t stride);

  // Same as above, but uses MemoryAccessMaskNone for the memory access operands
  // template argument.
  template <CooperativeMatrixLayout layout, class Type>
  static CooperativeMatrix
  CoherentLoad(globallycoherent RWStructuredBuffer<Type> data, uint32_t index,
               uint32_t stride) {
    return CoherentLoad<MemoryAccessMaskNone, layout>(data, index, stride);
  }

  // Loads a cooperative matrix using OpCooperativeMatrixLoadKHR from
  // data[index] using the given memory layout, stride, and memory access
  // operands. No memory access bits are added to the operands. Since the memory
  // is readonly, there should be no need.
  //
  // The layout and stride will be passed to the SPIR-V instruction as is. The
  // precise meaning can be found in the specification for
  // SPV_KHR_cooperative_matrix.
  template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout,
            class Type>
  static CooperativeMatrix Load(StructuredBuffer<Type> data, uint32_t index,
                                uint32_t stride);

  // Same as above, but uses MemoryAccessMaskNone for the memory access
  // operands.
  template <CooperativeMatrixLayout layout, class Type>
  static CooperativeMatrix Load(StructuredBuffer<Type> data, uint32_t index,
                                uint32_t stride) {
    return Load<MemoryAccessMaskNone, layout>(data, index, stride);
  }

  // Constructs a cooperative matrix with all values initialized to v. Note that
  // all threads in scope must have the same value for v.
  static CooperativeMatrix Splat(ComponentType v);

  // Returns the result of OpCooperativeMatrixLengthKHR on the current type.
  static uint32_t GetLength();

  // Functions to access the elements of the cooperative matrix. The index must
  // be less than GetLength().
  void Set(ComponentType value, uint32_t index);
  ComponentType Get(uint32_t index);

  static const bool hasSignedIntegerComponentType =
      (ComponentType(0) - ComponentType(1) < ComponentType(0));

  // clang-format off
  using SpirvMatrixType = vk::SpirvOpaqueType<
      /* OpTypeCooperativeMatrixKHR */ 4456, ComponentType,
      vk::integral_constant<uint, scope>, vk::integral_constant<uint, rows>,
      vk::integral_constant<uint, columns>, vk::integral_constant<uint, use> >;

  [[vk::ext_extension("SPV_KHR_cooperative_matrix")]]
  [[vk::ext_capability(/* CooperativeMatrixKHRCapability */ 6022)]]
  [[vk::ext_capability(/* VulkanMemoryModel */ 5345)]]
  SpirvMatrixType _matrix;
  // clang-format on
};

// Cooperative matrix that can be used in the "a" position of a multiply add
// instruction (r = (a * b) + c).
template <typename ComponentType, Scope scope, uint rows, uint columns>
using CooperativeMatrixA =
    CooperativeMatrix<ComponentType, scope, rows, columns,
                      CooperativeMatrixUseMatrixAKHR>;

// Cooperative matrix that can be used in the "b" position of a multiply add
// instruction (r = (a * b) + c).
template <typename ComponentType, Scope scope, uint rows, uint columns>
using CooperativeMatrixB =
    CooperativeMatrix<ComponentType, scope, rows, columns,
                      CooperativeMatrixUseMatrixBKHR>;

// Cooperative matrix that can be used in the "r" and "c" position of a multiply
// add instruction (r = (a * b) + c).
template <typename ComponentType, Scope scope, uint rows, uint columns>
using CooperativeMatrixAccumulator =
    CooperativeMatrix<ComponentType, scope, rows, columns,
                      CooperativeMatrixUseMatrixAccumulatorKHR>;

// Returns the result of OpCooperativeMatrixMulAddKHR when applied to a, b, and
// c. The cooperative matrix operands are inferred, with the
// SaturatingAccumulationKHR bit not set.
template <typename ComponentType, Scope scope, uint rows, uint columns, uint K>
CooperativeMatrixAccumulator<ComponentType, scope, rows, columns>
cooperativeMatrixMultiplyAdd(
    CooperativeMatrixA<ComponentType, scope, rows, K> a,
    CooperativeMatrixB<ComponentType, scope, K, columns> b,
    CooperativeMatrixAccumulator<ComponentType, scope, rows, columns> c);

// Returns the result of OpCooperativeMatrixMulAddKHR when applied to a, b, and
// c. The cooperative matrix operands are inferred, with the
// SaturatingAccumulationKHR bit set.
template <typename ComponentType, Scope scope, uint rows, uint columns, uint K>
CooperativeMatrixAccumulator<ComponentType, scope, rows, columns>
cooperativeMatrixSaturatingMultiplyAdd(
    CooperativeMatrixA<ComponentType, scope, rows, K> a,
    CooperativeMatrixB<ComponentType, scope, K, columns> b,
    CooperativeMatrixAccumulator<ComponentType, scope, rows, columns> c);

} // namespace khr
} // namespace vk

#include "cooperative_matrix.impl"
#endif // _HLSL_VK_KHR_COOPERATIVE_MATRIX_H_