diff options
Diffstat (limited to 'contrib/dxc_2025_07_14/inc/hlsl')
| -rw-r--r-- | contrib/dxc_2025_07_14/inc/hlsl/LICENSE.txt | 222 | ||||
| -rw-r--r-- | contrib/dxc_2025_07_14/inc/hlsl/README.txt | 7 | ||||
| -rw-r--r-- | contrib/dxc_2025_07_14/inc/hlsl/dx/linalg.h | 198 | ||||
| -rw-r--r-- | contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.h | 275 | ||||
| -rw-r--r-- | contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.impl | 377 | ||||
| -rw-r--r-- | contrib/dxc_2025_07_14/inc/hlsl/vk/opcode_selector.h | 227 | ||||
| -rw-r--r-- | contrib/dxc_2025_07_14/inc/hlsl/vk/spirv.h | 85 |
7 files changed, 1391 insertions, 0 deletions
diff --git a/contrib/dxc_2025_07_14/inc/hlsl/LICENSE.txt b/contrib/dxc_2025_07_14/inc/hlsl/LICENSE.txt new file mode 100644 index 0000000..94472c3 --- /dev/null +++ b/contrib/dxc_2025_07_14/inc/hlsl/LICENSE.txt | |||
| @@ -0,0 +1,222 @@ | |||
| 1 | ============================================================================== | ||
| 2 | The LLVM Project is under the Apache License v2.0 with LLVM Exceptions: | ||
| 3 | ============================================================================== | ||
| 4 | |||
| 5 | Apache License | ||
| 6 | Version 2.0, January 2004 | ||
| 7 | http://www.apache.org/licenses/ | ||
| 8 | |||
| 9 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION | ||
| 10 | |||
| 11 | 1. Definitions. | ||
| 12 | |||
| 13 | "License" shall mean the terms and conditions for use, reproduction, | ||
| 14 | and distribution as defined by Sections 1 through 9 of this document. | ||
| 15 | |||
| 16 | "Licensor" shall mean the copyright owner or entity authorized by | ||
| 17 | the copyright owner that is granting the License. | ||
| 18 | |||
| 19 | "Legal Entity" shall mean the union of the acting entity and all | ||
| 20 | other entities that control, are controlled by, or are under common | ||
| 21 | control with that entity. For the purposes of this definition, | ||
| 22 | "control" means (i) the power, direct or indirect, to cause the | ||
| 23 | direction or management of such entity, whether by contract or | ||
| 24 | otherwise, or (ii) ownership of fifty percent (50%) or more of the | ||
| 25 | outstanding shares, or (iii) beneficial ownership of such entity. | ||
| 26 | |||
| 27 | "You" (or "Your") shall mean an individual or Legal Entity | ||
| 28 | exercising permissions granted by this License. | ||
| 29 | |||
| 30 | "Source" form shall mean the preferred form for making modifications, | ||
| 31 | including but not limited to software source code, documentation | ||
| 32 | source, and configuration files. | ||
| 33 | |||
| 34 | "Object" form shall mean any form resulting from mechanical | ||
| 35 | transformation or translation of a Source form, including but | ||
| 36 | not limited to compiled object code, generated documentation, | ||
| 37 | and conversions to other media types. | ||
| 38 | |||
| 39 | "Work" shall mean the work of authorship, whether in Source or | ||
| 40 | Object form, made available under the License, as indicated by a | ||
| 41 | copyright notice that is included in or attached to the work | ||
| 42 | (an example is provided in the Appendix below). | ||
| 43 | |||
| 44 | "Derivative Works" shall mean any work, whether in Source or Object | ||
| 45 | form, that is based on (or derived from) the Work and for which the | ||
| 46 | editorial revisions, annotations, elaborations, or other modifications | ||
| 47 | represent, as a whole, an original work of authorship. For the purposes | ||
| 48 | of this License, Derivative Works shall not include works that remain | ||
| 49 | separable from, or merely link (or bind by name) to the interfaces of, | ||
| 50 | the Work and Derivative Works thereof. | ||
| 51 | |||
| 52 | "Contribution" shall mean any work of authorship, including | ||
| 53 | the original version of the Work and any modifications or additions | ||
| 54 | to that Work or Derivative Works thereof, that is intentionally | ||
| 55 | submitted to Licensor for inclusion in the Work by the copyright owner | ||
| 56 | or by an individual or Legal Entity authorized to submit on behalf of | ||
| 57 | the copyright owner. For the purposes of this definition, "submitted" | ||
| 58 | means any form of electronic, verbal, or written communication sent | ||
| 59 | to the Licensor or its representatives, including but not limited to | ||
| 60 | communication on electronic mailing lists, source code control systems, | ||
| 61 | and issue tracking systems that are managed by, or on behalf of, the | ||
| 62 | Licensor for the purpose of discussing and improving the Work, but | ||
| 63 | excluding communication that is conspicuously marked or otherwise | ||
| 64 | designated in writing by the copyright owner as "Not a Contribution." | ||
| 65 | |||
| 66 | "Contributor" shall mean Licensor and any individual or Legal Entity | ||
| 67 | on behalf of whom a Contribution has been received by Licensor and | ||
| 68 | subsequently incorporated within the Work. | ||
| 69 | |||
| 70 | 2. Grant of Copyright License. Subject to the terms and conditions of | ||
| 71 | this License, each Contributor hereby grants to You a perpetual, | ||
| 72 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable | ||
| 73 | copyright license to reproduce, prepare Derivative Works of, | ||
| 74 | publicly display, publicly perform, sublicense, and distribute the | ||
| 75 | Work and such Derivative Works in Source or Object form. | ||
| 76 | |||
| 77 | 3. Grant of Patent License. Subject to the terms and conditions of | ||
| 78 | this License, each Contributor hereby grants to You a perpetual, | ||
| 79 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable | ||
| 80 | (except as stated in this section) patent license to make, have made, | ||
| 81 | use, offer to sell, sell, import, and otherwise transfer the Work, | ||
| 82 | where such license applies only to those patent claims licensable | ||
| 83 | by such Contributor that are necessarily infringed by their | ||
| 84 | Contribution(s) alone or by combination of their Contribution(s) | ||
| 85 | with the Work to which such Contribution(s) was submitted. If You | ||
| 86 | institute patent litigation against any entity (including a | ||
| 87 | cross-claim or counterclaim in a lawsuit) alleging that the Work | ||
| 88 | or a Contribution incorporated within the Work constitutes direct | ||
| 89 | or contributory patent infringement, then any patent licenses | ||
| 90 | granted to You under this License for that Work shall terminate | ||
| 91 | as of the date such litigation is filed. | ||
| 92 | |||
| 93 | 4. Redistribution. You may reproduce and distribute copies of the | ||
| 94 | Work or Derivative Works thereof in any medium, with or without | ||
| 95 | modifications, and in Source or Object form, provided that You | ||
| 96 | meet the following conditions: | ||
| 97 | |||
| 98 | (a) You must give any other recipients of the Work or | ||
| 99 | Derivative Works a copy of this License; and | ||
| 100 | |||
| 101 | (b) You must cause any modified files to carry prominent notices | ||
| 102 | stating that You changed the files; and | ||
| 103 | |||
| 104 | (c) You must retain, in the Source form of any Derivative Works | ||
| 105 | that You distribute, all copyright, patent, trademark, and | ||
| 106 | attribution notices from the Source form of the Work, | ||
| 107 | excluding those notices that do not pertain to any part of | ||
| 108 | the Derivative Works; and | ||
| 109 | |||
| 110 | (d) If the Work includes a "NOTICE" text file as part of its | ||
| 111 | distribution, then any Derivative Works that You distribute must | ||
| 112 | include a readable copy of the attribution notices contained | ||
| 113 | within such NOTICE file, excluding those notices that do not | ||
| 114 | pertain to any part of the Derivative Works, in at least one | ||
| 115 | of the following places: within a NOTICE text file distributed | ||
| 116 | as part of the Derivative Works; within the Source form or | ||
| 117 | documentation, if provided along with the Derivative Works; or, | ||
| 118 | within a display generated by the Derivative Works, if and | ||
| 119 | wherever such third-party notices normally appear. The contents | ||
| 120 | of the NOTICE file are for informational purposes only and | ||
| 121 | do not modify the License. You may add Your own attribution | ||
| 122 | notices within Derivative Works that You distribute, alongside | ||
| 123 | or as an addendum to the NOTICE text from the Work, provided | ||
| 124 | that such additional attribution notices cannot be construed | ||
| 125 | as modifying the License. | ||
| 126 | |||
| 127 | You may add Your own copyright statement to Your modifications and | ||
| 128 | may provide additional or different license terms and conditions | ||
| 129 | for use, reproduction, or distribution of Your modifications, or | ||
| 130 | for any such Derivative Works as a whole, provided Your use, | ||
| 131 | reproduction, and distribution of the Work otherwise complies with | ||
| 132 | the conditions stated in this License. | ||
| 133 | |||
| 134 | 5. Submission of Contributions. Unless You explicitly state otherwise, | ||
| 135 | any Contribution intentionally submitted for inclusion in the Work | ||
| 136 | by You to the Licensor shall be under the terms and conditions of | ||
| 137 | this License, without any additional terms or conditions. | ||
| 138 | Notwithstanding the above, nothing herein shall supersede or modify | ||
| 139 | the terms of any separate license agreement you may have executed | ||
| 140 | with Licensor regarding such Contributions. | ||
| 141 | |||
| 142 | 6. Trademarks. This License does not grant permission to use the trade | ||
| 143 | names, trademarks, service marks, or product names of the Licensor, | ||
| 144 | except as required for reasonable and customary use in describing the | ||
| 145 | origin of the Work and reproducing the content of the NOTICE file. | ||
| 146 | |||
| 147 | 7. Disclaimer of Warranty. Unless required by applicable law or | ||
| 148 | agreed to in writing, Licensor provides the Work (and each | ||
| 149 | Contributor provides its Contributions) on an "AS IS" BASIS, | ||
| 150 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or | ||
| 151 | implied, including, without limitation, any warranties or conditions | ||
| 152 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A | ||
| 153 | PARTICULAR PURPOSE. You are solely responsible for determining the | ||
| 154 | appropriateness of using or redistributing the Work and assume any | ||
| 155 | risks associated with Your exercise of permissions under this License. | ||
| 156 | |||
| 157 | 8. Limitation of Liability. In no event and under no legal theory, | ||
| 158 | whether in tort (including negligence), contract, or otherwise, | ||
| 159 | unless required by applicable law (such as deliberate and grossly | ||
| 160 | negligent acts) or agreed to in writing, shall any Contributor be | ||
| 161 | liable to You for damages, including any direct, indirect, special, | ||
| 162 | incidental, or consequential damages of any character arising as a | ||
| 163 | result of this License or out of the use or inability to use the | ||
| 164 | Work (including but not limited to damages for loss of goodwill, | ||
| 165 | work stoppage, computer failure or malfunction, or any and all | ||
| 166 | other commercial damages or losses), even if such Contributor | ||
| 167 | has been advised of the possibility of such damages. | ||
| 168 | |||
| 169 | 9. Accepting Warranty or Additional Liability. While redistributing | ||
| 170 | the Work or Derivative Works thereof, You may choose to offer, | ||
| 171 | and charge a fee for, acceptance of support, warranty, indemnity, | ||
| 172 | or other liability obligations and/or rights consistent with this | ||
| 173 | License. However, in accepting such obligations, You may act only | ||
| 174 | on Your own behalf and on Your sole responsibility, not on behalf | ||
| 175 | of any other Contributor, and only if You agree to indemnify, | ||
| 176 | defend, and hold each Contributor harmless for any liability | ||
| 177 | incurred by, or claims asserted against, such Contributor by reason | ||
| 178 | of your accepting any such warranty or additional liability. | ||
| 179 | |||
| 180 | END OF TERMS AND CONDITIONS | ||
| 181 | |||
| 182 | APPENDIX: How to apply the Apache License to your work. | ||
| 183 | |||
| 184 | To apply the Apache License to your work, attach the following | ||
| 185 | boilerplate notice, with the fields enclosed by brackets "[]" | ||
| 186 | replaced with your own identifying information. (Don't include | ||
| 187 | the brackets!) The text should be enclosed in the appropriate | ||
| 188 | comment syntax for the file format. We also recommend that a | ||
| 189 | file or class name and description of purpose be included on the | ||
| 190 | same "printed page" as the copyright notice for easier | ||
| 191 | identification within third-party archives. | ||
| 192 | |||
| 193 | Copyright [yyyy] [name of copyright owner] | ||
| 194 | |||
| 195 | Licensed under the Apache License, Version 2.0 (the "License"); | ||
| 196 | you may not use this file except in compliance with the License. | ||
| 197 | You may obtain a copy of the License at | ||
| 198 | |||
| 199 | http://www.apache.org/licenses/LICENSE-2.0 | ||
| 200 | |||
| 201 | Unless required by applicable law or agreed to in writing, software | ||
| 202 | distributed under the License is distributed on an "AS IS" BASIS, | ||
| 203 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| 204 | See the License for the specific language governing permissions and | ||
| 205 | limitations under the License. | ||
| 206 | |||
| 207 | |||
| 208 | ---- LLVM Exceptions to the Apache 2.0 License ---- | ||
| 209 | |||
| 210 | As an exception, if, as a result of your compiling your source code, portions | ||
| 211 | of this Software are embedded into an Object form of such source code, you | ||
| 212 | may redistribute such embedded portions in such Object form without complying | ||
| 213 | with the conditions of Sections 4(a), 4(b) and 4(d) of the License. | ||
| 214 | |||
| 215 | In addition, if you combine or link compiled forms of this Software with | ||
| 216 | software that is licensed under the GPLv2 ("Combined Software") and if a | ||
| 217 | court of competent jurisdiction determines that the patent provision (Section | ||
| 218 | 3), the indemnity provision (Section 9) or other Section of the License | ||
| 219 | conflicts with the conditions of the GPLv2, you may retroactively and | ||
| 220 | prospectively choose to deem waived or otherwise exclude such Section(s) of | ||
| 221 | the License, but only in their entirety and only with respect to the Combined | ||
| 222 | Software. | ||
diff --git a/contrib/dxc_2025_07_14/inc/hlsl/README.txt b/contrib/dxc_2025_07_14/inc/hlsl/README.txt new file mode 100644 index 0000000..a760bb9 --- /dev/null +++ b/contrib/dxc_2025_07_14/inc/hlsl/README.txt | |||
| @@ -0,0 +1,7 @@ | |||
| 1 | HLSL Standard Header Library | ||
| 2 | ============================ | ||
| 3 | |||
| 4 | The contents of this directory and subdirectories are the HLSL Standard Header | ||
| 5 | library. These headers are open source software. You may freely distribute all | ||
| 6 | or parts of these headers under the terms of the license agreement found in | ||
| 7 | LICENSE.txt. | ||
diff --git a/contrib/dxc_2025_07_14/inc/hlsl/dx/linalg.h b/contrib/dxc_2025_07_14/inc/hlsl/dx/linalg.h new file mode 100644 index 0000000..4f5e620 --- /dev/null +++ b/contrib/dxc_2025_07_14/inc/hlsl/dx/linalg.h | |||
| @@ -0,0 +1,198 @@ | |||
| 1 | // Header for linear algebra APIs. | ||
| 2 | |||
| 3 | #if __spirv__ | ||
| 4 | #error "Cooperative vectors not (yet) supported for SPIRV" | ||
| 5 | #endif | ||
| 6 | |||
| 7 | #if ((__SHADER_TARGET_MAJOR > 6) || \ | ||
| 8 | (__SHADER_TARGET_MAJOR == 6 && __SHADER_TARGET_MINOR >= 9)) && \ | ||
| 9 | (__HLSL_VERSION >= 2021) | ||
| 10 | |||
| 11 | namespace dx { | ||
| 12 | namespace linalg { | ||
| 13 | |||
| 14 | // NOTE: can't be an enum class because we get this error: | ||
| 15 | // error: non-type template argument of type 'dx::linalg::DataType' is not | ||
| 16 | // an integral constant expression | ||
| 17 | // | ||
| 18 | enum DataType { | ||
| 19 | DATA_TYPE_SINT16 = 2, // ComponentType::I16 | ||
| 20 | DATA_TYPE_UINT16 = 3, // ComponentType::U16 | ||
| 21 | DATA_TYPE_SINT32 = 4, // ComponentType::I32 | ||
| 22 | DATA_TYPE_UINT32 = 5, // ComponentType::U32 | ||
| 23 | DATA_TYPE_FLOAT16 = 8, // ComponentType::F16 | ||
| 24 | DATA_TYPE_FLOAT32 = 9, // ComponentType::F32 | ||
| 25 | DATA_TYPE_SINT8_T4_PACKED = 17, // ComponentType::PackedS8x32 | ||
| 26 | DATA_TYPE_UINT8_T4_PACKED = 18, // ComponentType::PackedU8x32 | ||
| 27 | DATA_TYPE_UINT8 = 19, // ComponentType::U8 | ||
| 28 | DATA_TYPE_SINT8 = 20, // ComponentType::I8 | ||
| 29 | DATA_TYPE_FLOAT8_E4M3 = 21, // ComponentType::F8_E4M3 | ||
| 30 | // (1 sign, 4 exp, 3 mantissa bits) | ||
| 31 | DATA_TYPE_FLOAT8_E5M2 = 22, // ComponentType::F8_E5M2 | ||
| 32 | // (1 sign, 5 exp, 2 mantissa bits) | ||
| 33 | }; | ||
| 34 | |||
| 35 | enum MatrixLayout { | ||
| 36 | MATRIX_LAYOUT_ROW_MAJOR = 0, | ||
| 37 | MATRIX_LAYOUT_COLUMN_MAJOR = 1, | ||
| 38 | MATRIX_LAYOUT_MUL_OPTIMAL = 2, | ||
| 39 | MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL = 3 | ||
| 40 | }; | ||
| 41 | |||
| 42 | // | ||
| 43 | // Helper for signedness | ||
| 44 | // | ||
| 45 | namespace details { | ||
| 46 | |||
| 47 | template <typename T> struct IsUnsigned {}; | ||
| 48 | |||
| 49 | #define _SPECIALIZE_ISUNSIGNED(type, value) \ | ||
| 50 | template <> struct IsUnsigned<type> { \ | ||
| 51 | static const bool Value = value; \ | ||
| 52 | } | ||
| 53 | |||
| 54 | _SPECIALIZE_ISUNSIGNED(uint8_t4_packed, true); | ||
| 55 | _SPECIALIZE_ISUNSIGNED(int8_t4_packed, true); | ||
| 56 | _SPECIALIZE_ISUNSIGNED(uint32_t, true); | ||
| 57 | _SPECIALIZE_ISUNSIGNED(int32_t, false); | ||
| 58 | _SPECIALIZE_ISUNSIGNED(float32_t, false); | ||
| 59 | |||
| 60 | #ifdef __HLSL_ENABLE_16_BIT | ||
| 61 | _SPECIALIZE_ISUNSIGNED(uint16_t, true); | ||
| 62 | _SPECIALIZE_ISUNSIGNED(int16_t, false); | ||
| 63 | _SPECIALIZE_ISUNSIGNED(float16_t, false); | ||
| 64 | #else // //__HLSL_ENABLE_16_BIT | ||
| 65 | _SPECIALIZE_ISUNSIGNED(half, false); | ||
| 66 | #endif //__HLSL_ENABLE_16_BIT | ||
| 67 | |||
| 68 | #undef _SPECIALIZE_ISUNSIGNED | ||
| 69 | |||
| 70 | } // namespace details | ||
| 71 | |||
| 72 | // | ||
| 73 | // (RW)MatrixRef | ||
| 74 | // | ||
| 75 | |||
| 76 | template <typename BufferTy, DataType DT, uint M, uint K, MatrixLayout ML, | ||
| 77 | bool Transpose> | ||
| 78 | struct MatrixRefImpl { | ||
| 79 | BufferTy Buffer; | ||
| 80 | uint StartOffset; | ||
| 81 | uint Stride; | ||
| 82 | }; | ||
| 83 | |||
| 84 | template <DataType DT, uint M, uint K, MatrixLayout ML, bool Transpose = false> | ||
| 85 | using MatrixRef = MatrixRefImpl<ByteAddressBuffer, DT, M, K, ML, Transpose>; | ||
| 86 | |||
| 87 | template <DataType DT, uint M, uint K, MatrixLayout ML, bool Transpose = false> | ||
| 88 | using RWMatrixRef = MatrixRefImpl<RWByteAddressBuffer, DT, M, K, ML, Transpose>; | ||
| 89 | |||
| 90 | // | ||
| 91 | // (RW)VectorRef | ||
| 92 | // | ||
| 93 | |||
| 94 | template <typename BufferTy, DataType DT> struct VectorRefImpl { | ||
| 95 | BufferTy Buffer; | ||
| 96 | uint StartOffset; | ||
| 97 | }; | ||
| 98 | |||
| 99 | template <DataType DT> using VectorRef = VectorRefImpl<ByteAddressBuffer, DT>; | ||
| 100 | |||
| 101 | template <DataType DT> | ||
| 102 | using RWVectorRef = VectorRefImpl<RWByteAddressBuffer, DT>; | ||
| 103 | |||
| 104 | // | ||
| 105 | // Vector | ||
| 106 | // | ||
| 107 | |||
| 108 | template <typename T, int N, DataType DT> struct InterpretedVector { | ||
| 109 | vector<T, N> Data; | ||
| 110 | }; | ||
| 111 | |||
| 112 | template <DataType DT, typename T, int N> | ||
| 113 | InterpretedVector<T, N, DT> MakeInterpretedVector(vector<T, N> Vec) { | ||
| 114 | InterpretedVector<T, N, DT> IV = {Vec}; | ||
| 115 | return IV; | ||
| 116 | } | ||
| 117 | |||
| 118 | // | ||
| 119 | // Mul | ||
| 120 | // | ||
| 121 | |||
| 122 | template <typename OutputElTy, typename InputElTy, int InputElCount, | ||
| 123 | typename MatrixBufferTy, DataType InputDT, DataType MatrixDT, | ||
| 124 | uint MatrixM, uint MatrixK, MatrixLayout MatrixLayout, | ||
| 125 | bool MatrixTranspose> | ||
| 126 | vector<OutputElTy, MatrixM> | ||
| 127 | Mul(MatrixRefImpl<MatrixBufferTy, MatrixDT, MatrixM, MatrixK, MatrixLayout, | ||
| 128 | MatrixTranspose> | ||
| 129 | Matrix, | ||
| 130 | InterpretedVector<InputElTy, InputElCount, InputDT> InputVector) { | ||
| 131 | |||
| 132 | vector<OutputElTy, MatrixM> OutputVector; | ||
| 133 | |||
| 134 | __builtin_MatVecMul( | ||
| 135 | /*out*/ OutputVector, details::IsUnsigned<OutputElTy>::Value, | ||
| 136 | InputVector.Data, details::IsUnsigned<InputElTy>::Value, InputDT, | ||
| 137 | Matrix.Buffer, Matrix.StartOffset, MatrixDT, MatrixM, MatrixK, | ||
| 138 | MatrixLayout, MatrixTranspose, Matrix.Stride); | ||
| 139 | |||
| 140 | return OutputVector; | ||
| 141 | } | ||
| 142 | |||
| 143 | // | ||
| 144 | // MulAdd | ||
| 145 | // | ||
| 146 | |||
| 147 | template <typename OutputElTy, typename InputElTy, int InputElCount, | ||
| 148 | typename MatrixBufferTy, DataType InputDT, DataType MatrixDT, | ||
| 149 | uint MatrixM, uint MatrixK, MatrixLayout MatrixLayout, | ||
| 150 | bool MatrixTranspose, typename BiasVectorBufferTy, | ||
| 151 | DataType BiasVectorDT> | ||
| 152 | vector<OutputElTy, MatrixM> | ||
| 153 | MulAdd(MatrixRefImpl<MatrixBufferTy, MatrixDT, MatrixM, MatrixK, MatrixLayout, | ||
| 154 | MatrixTranspose> | ||
| 155 | Matrix, | ||
| 156 | InterpretedVector<InputElTy, InputElCount, InputDT> InputVector, | ||
| 157 | VectorRefImpl<BiasVectorBufferTy, BiasVectorDT> BiasVector) { | ||
| 158 | |||
| 159 | vector<OutputElTy, MatrixM> OutputVector; | ||
| 160 | |||
| 161 | __builtin_MatVecMulAdd( | ||
| 162 | /*out*/ OutputVector, details::IsUnsigned<OutputElTy>::Value, | ||
| 163 | InputVector.Data, details::IsUnsigned<InputElTy>::Value, InputDT, | ||
| 164 | Matrix.Buffer, Matrix.StartOffset, MatrixDT, MatrixM, MatrixK, | ||
| 165 | MatrixLayout, MatrixTranspose, Matrix.Stride, BiasVector.Buffer, | ||
| 166 | BiasVector.StartOffset, BiasVectorDT); | ||
| 167 | |||
| 168 | return OutputVector; | ||
| 169 | } | ||
| 170 | |||
| 171 | // | ||
| 172 | // OuterProductAccumulate | ||
| 173 | // | ||
| 174 | |||
| 175 | template <typename ElTy, int MatrixM, int MatrixN, DataType MatrixDT, | ||
| 176 | MatrixLayout MatrixLayout> | ||
| 177 | void OuterProductAccumulate( | ||
| 178 | vector<ElTy, MatrixM> InputVector1, vector<ElTy, MatrixN> InputVector2, | ||
| 179 | RWMatrixRef<MatrixDT, MatrixM, MatrixN, MatrixLayout, false> Matrix) { | ||
| 180 | __builtin_OuterProductAccumulate(InputVector1, InputVector2, Matrix.Buffer, | ||
| 181 | Matrix.StartOffset, MatrixDT, MatrixLayout, | ||
| 182 | Matrix.Stride); | ||
| 183 | } | ||
| 184 | |||
| 185 | // | ||
| 186 | // VectorAccumulate | ||
| 187 | // | ||
| 188 | |||
| 189 | template <typename ElTy, int ElCount> | ||
| 190 | void VectorAccumulate(vector<ElTy, ElCount> InputVector, | ||
| 191 | RWByteAddressBuffer Buffer, uint Offset) { | ||
| 192 | __builtin_VectorAccumulate(InputVector, Buffer, Offset); | ||
| 193 | } | ||
| 194 | |||
| 195 | } // namespace linalg | ||
| 196 | } // namespace dx | ||
| 197 | |||
| 198 | #endif // SM 6.9 check and HV version check | ||
diff --git a/contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.h b/contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.h new file mode 100644 index 0000000..a53ab4c --- /dev/null +++ b/contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.h | |||
| @@ -0,0 +1,275 @@ | |||
| 1 | // Copyright (c) 2024 Google LLC | ||
| 2 | // | ||
| 3 | // This file is licensed under the Apache License v2.0 with LLVM Exceptions. | ||
| 4 | // See https://llvm.org/LICENSE.txt for license information. | ||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| 6 | |||
| 7 | #ifndef _HLSL_VK_KHR_COOPERATIVE_MATRIX_H_ | ||
| 8 | #define _HLSL_VK_KHR_COOPERATIVE_MATRIX_H_ | ||
| 9 | |||
| 10 | #if __SPIRV_MAJOR_VERSION__ == 1 && __SPIRV_MINOR_VERSION__ < 6 | ||
| 11 | #error "CooperativeMatrix requires a minimum of SPIR-V 1.6" | ||
| 12 | #endif | ||
| 13 | |||
| 14 | #include "vk/spirv.h" | ||
| 15 | |||
| 16 | namespace vk { | ||
| 17 | namespace khr { | ||
| 18 | |||
| 19 | // The base cooperative matrix class. The template arguments correspond to the | ||
| 20 | // operands in the OpTypeCooperativeMatrixKHR instruction. | ||
| 21 | template <typename ComponentType, Scope scope, uint rows, uint columns, | ||
| 22 | CooperativeMatrixUse use> | ||
| 23 | class CooperativeMatrix { | ||
| 24 | template <class NewComponentType> | ||
| 25 | CooperativeMatrix<NewComponentType, scope, rows, columns, use> cast(); | ||
| 26 | |||
| 27 | // Apply OpSNegate or OFNegate, depending on ComponentType, in a element by | ||
| 28 | // element manner. | ||
| 29 | CooperativeMatrix negate(); | ||
| 30 | |||
| 31 | // Apply OpIAdd or OFAdd, depending on ComponentType, in a element by element | ||
| 32 | // manner. | ||
| 33 | CooperativeMatrix operator+(CooperativeMatrix other); | ||
| 34 | |||
| 35 | // Apply OpISub or OFSub, depending on ComponentType, in a element by element | ||
| 36 | // manner. | ||
| 37 | CooperativeMatrix operator-(CooperativeMatrix other); | ||
| 38 | |||
| 39 | // Apply OpIMul or OFMul, depending on ComponentType, in a element by element | ||
| 40 | // manner. | ||
| 41 | CooperativeMatrix operator*(CooperativeMatrix other); | ||
| 42 | |||
| 43 | // Apply OpSDiv, OpUDiv or OFDiv, depending on ComponentType, in a element by | ||
| 44 | // element manner. | ||
| 45 | CooperativeMatrix operator/(CooperativeMatrix other); | ||
| 46 | |||
| 47 | // Apply OpMatrixTimesScalar in a element by element manner. | ||
| 48 | CooperativeMatrix operator*(ComponentType scalar); | ||
| 49 | |||
| 50 | // Store the cooperative matrix using OpCooperativeMatrixStoreKHR to | ||
| 51 | // data using the given memory layout, stride, and memory access operands. | ||
| 52 | // `NonPrivatePointer` and `MakePointerAvailable` with the workgroup scope | ||
| 53 | // will be added to the memory access operands to make the memory coherent. | ||
| 54 | // | ||
| 55 | // This function uses a SPIR-V pointer because HLSL does not allow groupshared | ||
| 56 | // memory object to be passed by reference. The pointer is a hack to get | ||
| 57 | // around that. | ||
| 58 | // | ||
| 59 | // The layout and stride will be passed to the SPIR-V instruction as is. The | ||
| 60 | // precise meaning can be found in the specification for | ||
| 61 | // SPV_KHR_cooperative_matrix. | ||
| 62 | template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout, | ||
| 63 | class Type> | ||
| 64 | void Store(WorkgroupSpirvPointer<Type> data, uint32_t stride); | ||
| 65 | |||
| 66 | // Same as above, but uses MemoryAccessMaskNone for the memory access | ||
| 67 | // operands. | ||
| 68 | template <CooperativeMatrixLayout layout, class Type> | ||
| 69 | void Store(WorkgroupSpirvPointer<Type> data, uint32_t stride) { | ||
| 70 | Store<MemoryAccessMaskNone, layout>(data, stride); | ||
| 71 | } | ||
| 72 | |||
| 73 | // Store the cooperative matrix using OpCooperativeMatrixStoreKHR to | ||
| 74 | // data[index] using the given memory layout, stride, and memory access | ||
| 75 | // operands. The layout and stride will be passed to the SPIR-V instruction as | ||
| 76 | // is. The precise meaning can be found in the specification for | ||
| 77 | // SPV_KHR_cooperative_matrix. | ||
| 78 | template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout, | ||
| 79 | class Type> | ||
| 80 | void Store(RWStructuredBuffer<Type> data, uint32_t index, uint32_t stride); | ||
| 81 | |||
| 82 | // Same as above, but uses MemoryAccessMaskNone for the memory access | ||
| 83 | // operands. | ||
| 84 | template <CooperativeMatrixLayout layout, class Type> | ||
| 85 | void Store(RWStructuredBuffer<Type> data, uint32_t index, uint32_t stride) { | ||
| 86 | Store<MemoryAccessMaskNone, layout>(data, index, stride); | ||
| 87 | } | ||
| 88 | |||
| 89 | // Store the cooperative matrix using OpCooperativeMatrixStoreKHR to | ||
| 90 | // data[index] using the given memory layout, stride, and memory access | ||
| 91 | // operands. `NonPrivatePointer` and `MakePointerAvailable` with the | ||
| 92 | // QueueFamily scope will be added to the memory access operands to make the | ||
| 93 | // memory coherent. | ||
| 94 | // | ||
| 95 | // The layout and stride will be passed to the SPIR-V instruction as is. The | ||
| 96 | // precise meaning can be found in the specification for | ||
| 97 | // SPV_KHR_cooperative_matrix. | ||
| 98 | template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout, | ||
| 99 | class Type> | ||
| 100 | void CoherentStore(globallycoherent RWStructuredBuffer<Type> data, | ||
| 101 | uint32_t index, uint32_t stride); | ||
| 102 | |||
| 103 | // Same as above, but uses MemoryAccessMaskNone for the memory access operands | ||
| 104 | // template argument. | ||
| 105 | template <CooperativeMatrixLayout layout, class Type> | ||
| 106 | void CoherentStore(globallycoherent RWStructuredBuffer<Type> data, | ||
| 107 | uint32_t index, uint32_t stride) { | ||
| 108 | CoherentStore<MemoryAccessMaskNone, layout>(data, index, stride); | ||
| 109 | } | ||
| 110 | |||
| 111 | // Loads a cooperative matrix using OpCooperativeMatrixLoadKHR from | ||
| 112 | // data using the given memory layout, stride, and memory access operands. | ||
| 113 | // `NonPrivatePointer` and `MakePointerVisible` with the workgroup scope | ||
| 114 | // will be added to the memory access operands to make the memory coherent. | ||
| 115 | // | ||
| 116 | // This function uses a SPIR-V pointer because HLSL does not allow groupshared | ||
| 117 | // memory object to be passed by reference. The pointer is a hack to get | ||
| 118 | // around that. | ||
| 119 | // | ||
| 120 | // The layout and stride will be passed to the SPIR-V instruction as is. The | ||
| 121 | // precise meaning can be found in the specification for | ||
| 122 | // SPV_KHR_cooperative_matrix. | ||
| 123 | template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout, | ||
| 124 | class Type> | ||
| 125 | static CooperativeMatrix Load(WorkgroupSpirvPointer<Type> data, | ||
| 126 | uint32_t stride); | ||
| 127 | |||
| 128 | // Same as above, but uses MemoryAccessMaskNone for the memory access | ||
| 129 | // operands. | ||
| 130 | template <CooperativeMatrixLayout layout, class Type> | ||
| 131 | static CooperativeMatrix Load(WorkgroupSpirvPointer<Type> data, | ||
| 132 | uint32_t stride) { | ||
| 133 | return Load<MemoryAccessMaskNone, layout>(data, stride); | ||
| 134 | } | ||
| 135 | |||
| 136 | // Loads a cooperative matrix using OpCooperativeMatrixLoadKHR from | ||
| 137 | // data[index] using the given memory layout, stride, and memory access | ||
| 138 | // operands. | ||
| 139 | // | ||
| 140 | // The layout and stride will be passed to the SPIR-V instruction as is. The | ||
| 141 | // precise meaning can be found in the specification for | ||
| 142 | // SPV_KHR_cooperative_matrix. | ||
| 143 | template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout, | ||
| 144 | class Type> | ||
| 145 | static CooperativeMatrix Load(RWStructuredBuffer<Type> data, uint32_t index, | ||
| 146 | uint32_t stride); | ||
| 147 | |||
| 148 | // Same as above, but uses MemoryAccessMaskNone for the memory access | ||
| 149 | // operands. | ||
| 150 | template <CooperativeMatrixLayout layout, class Type> | ||
| 151 | static CooperativeMatrix Load(RWStructuredBuffer<Type> data, uint32_t index, | ||
| 152 | uint32_t stride) { | ||
| 153 | return Load<MemoryAccessMaskNone, layout>(data, index, stride); | ||
| 154 | } | ||
| 155 | |||
| 156 | // Loads a cooperative matrix using OpCooperativeMatrixLoadKHR from | ||
| 157 | // data[index] using the given memory layout, stride, and memory access | ||
| 158 | // operands. `NonPrivatePointer` and `MakePointerVisible` with the QueueFamily | ||
| 159 | // scope will be added to the memory access operands to make the memory | ||
| 160 | // coherent. | ||
| 161 | // | ||
| 162 | // | ||
| 163 | // The layout and stride will be passed to the SPIR-V instruction as is. The | ||
| 164 | // precise meaning can be found in the specification for | ||
| 165 | // SPV_KHR_cooperative_matrix. | ||
| 166 | template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout, | ||
| 167 | class Type> | ||
| 168 | static CooperativeMatrix | ||
| 169 | CoherentLoad(globallycoherent RWStructuredBuffer<Type> data, uint32_t index, | ||
| 170 | uint32_t stride); | ||
| 171 | |||
| 172 | // Same as above, but uses MemoryAccessMaskNone for the memory access operands | ||
| 173 | // template argument. | ||
| 174 | template <CooperativeMatrixLayout layout, class Type> | ||
| 175 | static CooperativeMatrix | ||
| 176 | CoherentLoad(globallycoherent RWStructuredBuffer<Type> data, uint32_t index, | ||
| 177 | uint32_t stride) { | ||
| 178 | return CoherentLoad<MemoryAccessMaskNone, layout>(data, index, stride); | ||
| 179 | } | ||
| 180 | |||
| 181 | // Loads a cooperative matrix using OpCooperativeMatrixLoadKHR from | ||
| 182 | // data[index] using the given memory layout, stride, and memory access | ||
| 183 | // operands. No memory access bits are added to the operands. Since the memory | ||
| 184 | // is readonly, there should be no need. | ||
| 185 | // | ||
| 186 | // The layout and stride will be passed to the SPIR-V instruction as is. The | ||
| 187 | // precise meaning can be found in the specification for | ||
| 188 | // SPV_KHR_cooperative_matrix. | ||
| 189 | template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout, | ||
| 190 | class Type> | ||
| 191 | static CooperativeMatrix Load(StructuredBuffer<Type> data, uint32_t index, | ||
| 192 | uint32_t stride); | ||
| 193 | |||
| 194 | // Same as above, but uses MemoryAccessMaskNone for the memory access | ||
| 195 | // operands. | ||
| 196 | template <CooperativeMatrixLayout layout, class Type> | ||
| 197 | static CooperativeMatrix Load(StructuredBuffer<Type> data, uint32_t index, | ||
| 198 | uint32_t stride) { | ||
| 199 | return Load<MemoryAccessMaskNone, layout>(data, index, stride); | ||
| 200 | } | ||
| 201 | |||
| 202 | // Constructs a cooperative matrix with all values initialized to v. Note that | ||
| 203 | // all threads in scope must have the same value for v. | ||
| 204 | static CooperativeMatrix Splat(ComponentType v); | ||
| 205 | |||
| 206 | // Returns the result of OpCooperativeMatrixLengthKHR on the current type. | ||
| 207 | static uint32_t GetLength(); | ||
| 208 | |||
| 209 | // Functions to access the elements of the cooperative matrix. The index must | ||
| 210 | // be less than GetLength(). | ||
| 211 | void Set(ComponentType value, uint32_t index); | ||
| 212 | ComponentType Get(uint32_t index); | ||
| 213 | |||
| 214 | static const bool hasSignedIntegerComponentType = | ||
| 215 | (ComponentType(0) - ComponentType(1) < ComponentType(0)); | ||
| 216 | |||
| 217 | // clang-format off | ||
| 218 | using SpirvMatrixType = vk::SpirvOpaqueType< | ||
| 219 | /* OpTypeCooperativeMatrixKHR */ 4456, ComponentType, | ||
| 220 | vk::integral_constant<uint, scope>, vk::integral_constant<uint, rows>, | ||
| 221 | vk::integral_constant<uint, columns>, vk::integral_constant<uint, use> >; | ||
| 222 | |||
| 223 | [[vk::ext_extension("SPV_KHR_cooperative_matrix")]] | ||
| 224 | [[vk::ext_capability(/* CooperativeMatrixKHRCapability */ 6022)]] | ||
| 225 | [[vk::ext_capability(/* VulkanMemoryModel */ 5345)]] | ||
| 226 | SpirvMatrixType _matrix; | ||
| 227 | // clang-format on | ||
| 228 | }; | ||
| 229 | |||
| 230 | // Cooperative matrix that can be used in the "a" position of a multiply add | ||
| 231 | // instruction (r = (a * b) + c). | ||
| 232 | template <typename ComponentType, Scope scope, uint rows, uint columns> | ||
| 233 | using CooperativeMatrixA = | ||
| 234 | CooperativeMatrix<ComponentType, scope, rows, columns, | ||
| 235 | CooperativeMatrixUseMatrixAKHR>; | ||
| 236 | |||
| 237 | // Cooperative matrix that can be used in the "b" position of a multiply add | ||
| 238 | // instruction (r = (a * b) + c). | ||
| 239 | template <typename ComponentType, Scope scope, uint rows, uint columns> | ||
| 240 | using CooperativeMatrixB = | ||
| 241 | CooperativeMatrix<ComponentType, scope, rows, columns, | ||
| 242 | CooperativeMatrixUseMatrixBKHR>; | ||
| 243 | |||
| 244 | // Cooperative matrix that can be used in the "r" and "c" position of a multiply | ||
| 245 | // add instruction (r = (a * b) + c). | ||
| 246 | template <typename ComponentType, Scope scope, uint rows, uint columns> | ||
| 247 | using CooperativeMatrixAccumulator = | ||
| 248 | CooperativeMatrix<ComponentType, scope, rows, columns, | ||
| 249 | CooperativeMatrixUseMatrixAccumulatorKHR>; | ||
| 250 | |||
| 251 | // Returns the result of OpCooperativeMatrixMulAddKHR when applied to a, b, and | ||
| 252 | // c. The cooperative matrix operands are inferred, with the | ||
| 253 | // SaturatingAccumulationKHR bit not set. | ||
| 254 | template <typename ComponentType, Scope scope, uint rows, uint columns, uint K> | ||
| 255 | CooperativeMatrixAccumulator<ComponentType, scope, rows, columns> | ||
| 256 | cooperativeMatrixMultiplyAdd( | ||
| 257 | CooperativeMatrixA<ComponentType, scope, rows, K> a, | ||
| 258 | CooperativeMatrixB<ComponentType, scope, K, columns> b, | ||
| 259 | CooperativeMatrixAccumulator<ComponentType, scope, rows, columns> c); | ||
| 260 | |||
| 261 | // Returns the result of OpCooperativeMatrixMulAddKHR when applied to a, b, and | ||
| 262 | // c. The cooperative matrix operands are inferred, with the | ||
| 263 | // SaturatingAccumulationKHR bit set. | ||
| 264 | template <typename ComponentType, Scope scope, uint rows, uint columns, uint K> | ||
| 265 | CooperativeMatrixAccumulator<ComponentType, scope, rows, columns> | ||
| 266 | cooperativeMatrixSaturatingMultiplyAdd( | ||
| 267 | CooperativeMatrixA<ComponentType, scope, rows, K> a, | ||
| 268 | CooperativeMatrixB<ComponentType, scope, K, columns> b, | ||
| 269 | CooperativeMatrixAccumulator<ComponentType, scope, rows, columns> c); | ||
| 270 | |||
| 271 | } // namespace khr | ||
| 272 | } // namespace vk | ||
| 273 | |||
| 274 | #include "cooperative_matrix.impl" | ||
| 275 | #endif // _HLSL_VK_KHR_COOPERATIVE_MATRIX_H_ | ||
diff --git a/contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.impl b/contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.impl new file mode 100644 index 0000000..2acae8e --- /dev/null +++ b/contrib/dxc_2025_07_14/inc/hlsl/vk/khr/cooperative_matrix.impl | |||
| @@ -0,0 +1,377 @@ | |||
| 1 | // Copyright (c) 2024 Google LLC | ||
| 2 | // | ||
| 3 | // This file is licensed under the Apache License v2.0 with LLVM Exceptions. | ||
| 4 | // See https://llvm.org/LICENSE.txt for license information. | ||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| 6 | |||
| 7 | #include "vk/opcode_selector.h" | ||
| 8 | |||
| 9 | template <typename ResultType, typename ComponentType> | ||
| 10 | [[vk::ext_instruction(/* OpMatrixTimesScalar */ 143)]] ResultType | ||
| 11 | __builtin_spv_MatrixTimesScalar(ResultType a, ComponentType b); | ||
| 12 | |||
| 13 | template <typename ComponentType, vk::Scope scope, uint rows, uint columns, | ||
| 14 | vk::CooperativeMatrixUse use> | ||
| 15 | [[vk::ext_instruction(/* OpCompositeExtract */ 81)]] ComponentType | ||
| 16 | __builtin_spv_ExtractFromCooperativeMatrix( | ||
| 17 | typename vk::khr::CooperativeMatrix<ComponentType, scope, rows, columns, | ||
| 18 | use>::SpirvMatrixType matrix, | ||
| 19 | uint32_t index); | ||
| 20 | |||
| 21 | template <typename CoopMatrixType, typename ComponentType> | ||
| 22 | [[vk::ext_instruction(/* OpCompositeConstruct */ 80)]] CoopMatrixType | ||
| 23 | __builtin_spv_ConstructCooperativeMatrix(ComponentType value); | ||
| 24 | |||
| 25 | template <class ResultPointerType, class BaseType> | ||
| 26 | [[vk::ext_instruction(/* OpAccessChain */ 65)]] ResultPointerType | ||
| 27 | __builtin_spv_AccessChain([[vk::ext_reference]] BaseType base, uint32_t index); | ||
| 28 | |||
| 29 | template <class ObjectType, class PointerType> | ||
| 30 | [[vk::ext_instruction(/* OpLoad */ 61)]] ObjectType | ||
| 31 | __builtin_spv_LoadPointer(PointerType base); | ||
| 32 | |||
| 33 | template <class PointerType, class ObjectType> | ||
| 34 | [[vk::ext_instruction(/* OpLoad */ 62)]] void | ||
| 35 | __builtin_spv_StorePointer(PointerType base, ObjectType object); | ||
| 36 | |||
| 37 | template <typename ComponentType, vk::Scope scope, uint rows, uint columns, | ||
| 38 | vk::CooperativeMatrixUse use> | ||
| 39 | [[vk::ext_instruction(/* OpCompositeInsert */ 82)]] | ||
| 40 | typename vk::khr::CooperativeMatrix<ComponentType, scope, rows, columns, | ||
| 41 | use>::SpirvMatrixType | ||
| 42 | __builtin_spv_InsertIntoCooperativeMatrix( | ||
| 43 | ComponentType value, | ||
| 44 | typename vk::khr::CooperativeMatrix<ComponentType, scope, rows, columns, | ||
| 45 | use>::SpirvMatrixType matrix, | ||
| 46 | uint32_t index); | ||
| 47 | |||
| 48 | // Define the load and store instructions | ||
| 49 | template <typename ResultType, typename PointerType> | ||
| 50 | [[vk::ext_instruction(/* OpCooperativeMatrixLoadKHR */ 4457)]] ResultType | ||
| 51 | __builtin_spv_CooperativeMatrixLoadKHR( | ||
| 52 | [[vk::ext_reference]] PointerType pointer, | ||
| 53 | vk::CooperativeMatrixLayout memory_layout, uint stride, | ||
| 54 | [[vk::ext_literal]] uint32_t memory_operand); | ||
| 55 | |||
| 56 | template <typename ResultType, typename PointerType> | ||
| 57 | [[vk::ext_instruction(/* OpCooperativeMatrixLoadKHR */ 4457)]] ResultType | ||
| 58 | __builtin_spv_CooperativeMatrixLoadKHR( | ||
| 59 | [[vk::ext_reference]] PointerType pointer, | ||
| 60 | vk::CooperativeMatrixLayout memory_layout, uint stride, | ||
| 61 | [[vk::ext_literal]] uint32_t memory_operand, vk::Scope scope); | ||
| 62 | |||
| 63 | template <typename ResultType, typename PointerType> | ||
| 64 | [[vk::ext_instruction(/* OpCooperativeMatrixLoadKHR */ 4457)]] ResultType | ||
| 65 | __builtin_spv_CooperativeMatrixWorkgroupLoadKHR( | ||
| 66 | vk::WorkgroupSpirvPointer<PointerType> pointer, | ||
| 67 | vk::CooperativeMatrixLayout memory_layout, uint stride, | ||
| 68 | [[vk::ext_literal]] uint32_t memory_operand, vk::Scope scope); | ||
| 69 | |||
| 70 | template <typename ObjectType, typename PointerType> | ||
| 71 | [[vk::ext_instruction(/* OpCooperativeMatrixStoreKHR */ 4458)]] void | ||
| 72 | __builtin_spv_CooperativeMatrixStoreKHR( | ||
| 73 | [[vk::ext_reference]] PointerType pointer, ObjectType object, | ||
| 74 | vk::CooperativeMatrixLayout memory_layout, uint stride, | ||
| 75 | [[vk::ext_literal]] uint32_t memory_operand, vk::Scope scope); | ||
| 76 | |||
| 77 | template <typename ObjectType, typename PointerType> | ||
| 78 | [[vk::ext_instruction(/* OpCooperativeMatrixStoreKHR */ 4458)]] void | ||
| 79 | __builtin_spv_CooperativeMatrixStoreKHR( | ||
| 80 | [[vk::ext_reference]] PointerType pointer, ObjectType object, | ||
| 81 | vk::CooperativeMatrixLayout memory_layout, uint stride, | ||
| 82 | [[vk::ext_literal]] uint32_t memory_operand); | ||
| 83 | |||
| 84 | template <typename ObjectType, typename PointerType> | ||
| 85 | [[vk::ext_instruction(/* OpCooperativeMatrixStoreKHR */ 4458)]] void | ||
| 86 | __builtin_spv_CooperativeMatrixWorkgroupStoreKHR( | ||
| 87 | vk::WorkgroupSpirvPointer<PointerType> pointer, ObjectType object, | ||
| 88 | vk::CooperativeMatrixLayout memory_layout, uint stride, | ||
| 89 | [[vk::ext_literal]] uint32_t memory_operand, vk::Scope scope); | ||
| 90 | |||
| 91 | // We cannot define `OpCooperativeMatrixLengthKHR` using ext_instruction because | ||
| 92 | // one of the operands is a type id. This builtin will have specific code in the | ||
| 93 | // compiler to expand it. | ||
| 94 | template <class MatrixType> uint __builtin_spv_CooperativeMatrixLengthKHR(); | ||
| 95 | |||
| 96 | // Arithmetic Instructions | ||
| 97 | template <typename ResultType, typename MatrixTypeA, typename MatrixTypeB, | ||
| 98 | typename MatrixTypeC> | ||
| 99 | [[vk::ext_instruction(/* OpCooperativeMatrixMulAddKHR */ 4459)]] ResultType | ||
| 100 | __builtin_spv_CooperativeMatrixMulAddKHR(MatrixTypeA a, MatrixTypeB b, | ||
| 101 | MatrixTypeC c, | ||
| 102 | [[vk::ext_literal]] int operands); | ||
| 103 | namespace vk { | ||
| 104 | namespace khr { | ||
| 105 | |||
| 106 | template <class ComponentType, Scope scope, uint rows, uint columns, | ||
| 107 | CooperativeMatrixUse use> | ||
| 108 | template <class NewComponentType> | ||
| 109 | CooperativeMatrix<NewComponentType, scope, rows, columns, use> | ||
| 110 | CooperativeMatrix<ComponentType, scope, rows, columns, use>::cast() { | ||
| 111 | using ResultType = | ||
| 112 | CooperativeMatrix<NewComponentType, scope, rows, columns, use>; | ||
| 113 | ResultType result; | ||
| 114 | result._matrix = util::ConversionSelector<ComponentType, NewComponentType>:: | ||
| 115 | template Convert<typename ResultType::SpirvMatrixType>(_matrix); | ||
| 116 | return result; | ||
| 117 | } | ||
| 118 | |||
| 119 | template <class ComponentType, Scope scope, uint rows, uint columns, | ||
| 120 | CooperativeMatrixUse use> | ||
| 121 | CooperativeMatrix<ComponentType, scope, rows, columns, use> | ||
| 122 | CooperativeMatrix<ComponentType, scope, rows, columns, use>::negate() { | ||
| 123 | CooperativeMatrix result; | ||
| 124 | result._matrix = util::ArithmeticSelector<ComponentType>::Negate(_matrix); | ||
| 125 | return result; | ||
| 126 | } | ||
| 127 | |||
| 128 | template <class ComponentType, Scope scope, uint rows, uint columns, | ||
| 129 | CooperativeMatrixUse use> | ||
| 130 | CooperativeMatrix<ComponentType, scope, rows, columns, use> | ||
| 131 | CooperativeMatrix<ComponentType, scope, rows, columns, use>::operator+( | ||
| 132 | CooperativeMatrix other) { | ||
| 133 | CooperativeMatrix result; | ||
| 134 | result._matrix = | ||
| 135 | util::ArithmeticSelector<ComponentType>::Add(_matrix, other._matrix); | ||
| 136 | return result; | ||
| 137 | } | ||
| 138 | |||
| 139 | template <class ComponentType, Scope scope, uint rows, uint columns, | ||
| 140 | CooperativeMatrixUse use> | ||
| 141 | CooperativeMatrix<ComponentType, scope, rows, columns, use> | ||
| 142 | CooperativeMatrix<ComponentType, scope, rows, columns, use>::operator-( | ||
| 143 | CooperativeMatrix other) { | ||
| 144 | CooperativeMatrix result; | ||
| 145 | result._matrix = | ||
| 146 | util::ArithmeticSelector<ComponentType>::Sub(_matrix, other._matrix); | ||
| 147 | return result; | ||
| 148 | } | ||
| 149 | |||
| 150 | template <class ComponentType, Scope scope, uint rows, uint columns, | ||
| 151 | CooperativeMatrixUse use> | ||
| 152 | CooperativeMatrix<ComponentType, scope, rows, columns, use> | ||
| 153 | CooperativeMatrix<ComponentType, scope, rows, columns, use>::operator*( | ||
| 154 | CooperativeMatrix other) { | ||
| 155 | CooperativeMatrix result; | ||
| 156 | result._matrix = | ||
| 157 | util::ArithmeticSelector<ComponentType>::Mul(_matrix, other._matrix); | ||
| 158 | return result; | ||
| 159 | } | ||
| 160 | |||
| 161 | template <class ComponentType, Scope scope, uint rows, uint columns, | ||
| 162 | CooperativeMatrixUse use> | ||
| 163 | CooperativeMatrix<ComponentType, scope, rows, columns, use> | ||
| 164 | CooperativeMatrix<ComponentType, scope, rows, columns, use>::operator/( | ||
| 165 | CooperativeMatrix other) { | ||
| 166 | CooperativeMatrix result; | ||
| 167 | result._matrix = | ||
| 168 | util::ArithmeticSelector<ComponentType>::Div(_matrix, other._matrix); | ||
| 169 | return result; | ||
| 170 | } | ||
| 171 | |||
| 172 | template <class ComponentType, Scope scope, uint rows, uint columns, | ||
| 173 | CooperativeMatrixUse use> | ||
| 174 | CooperativeMatrix<ComponentType, scope, rows, columns, use> | ||
| 175 | CooperativeMatrix<ComponentType, scope, rows, columns, use>::operator*( | ||
| 176 | ComponentType scalar) { | ||
| 177 | CooperativeMatrix result; | ||
| 178 | result._matrix = __builtin_spv_MatrixTimesScalar(_matrix, scalar); | ||
| 179 | return result; | ||
| 180 | } | ||
| 181 | |||
| 182 | template <class ComponentType, Scope scope, uint rows, uint columns, | ||
| 183 | CooperativeMatrixUse use> | ||
| 184 | template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout, | ||
| 185 | class Type> | ||
| 186 | void CooperativeMatrix<ComponentType, scope, rows, columns, use>::Store( | ||
| 187 | WorkgroupSpirvPointer<Type> data, uint32_t stride) { | ||
| 188 | __builtin_spv_CooperativeMatrixWorkgroupStoreKHR( | ||
| 189 | data, _matrix, layout, stride, | ||
| 190 | memoryAccessOperands | MemoryAccessNonPrivatePointerMask | | ||
| 191 | MemoryAccessMakePointerAvailableMask, | ||
| 192 | ScopeWorkgroup); | ||
| 193 | } | ||
| 194 | |||
| 195 | template <class ComponentType, Scope scope, uint rows, uint columns, | ||
| 196 | CooperativeMatrixUse use> | ||
| 197 | template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout, | ||
| 198 | class Type> | ||
| 199 | void CooperativeMatrix<ComponentType, scope, rows, columns, use>::Store( | ||
| 200 | RWStructuredBuffer<Type> data, uint32_t index, uint32_t stride) { | ||
| 201 | __builtin_spv_CooperativeMatrixStoreKHR(data[index], _matrix, layout, stride, | ||
| 202 | memoryAccessOperands); | ||
| 203 | } | ||
| 204 | |||
| 205 | template <class ComponentType, Scope scope, uint rows, uint columns, | ||
| 206 | CooperativeMatrixUse use> | ||
| 207 | template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout, | ||
| 208 | class Type> | ||
| 209 | void CooperativeMatrix<ComponentType, scope, rows, columns, use>::CoherentStore( | ||
| 210 | globallycoherent RWStructuredBuffer<Type> data, uint32_t index, | ||
| 211 | uint32_t stride) { | ||
| 212 | __builtin_spv_CooperativeMatrixStoreKHR( | ||
| 213 | data[index], _matrix, layout, stride, | ||
| 214 | memoryAccessOperands | MemoryAccessNonPrivatePointerMask | | ||
| 215 | MemoryAccessMakePointerAvailableMask, | ||
| 216 | ScopeQueueFamily); | ||
| 217 | } | ||
| 218 | |||
| 219 | template <class ComponentType, Scope scope, uint rows, uint columns, | ||
| 220 | CooperativeMatrixUse use> | ||
| 221 | template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout, | ||
| 222 | class Type> | ||
| 223 | CooperativeMatrix<ComponentType, scope, rows, columns, use> | ||
| 224 | CooperativeMatrix<ComponentType, scope, rows, columns, use>::Load( | ||
| 225 | vk::WorkgroupSpirvPointer<Type> buffer, uint32_t stride) { | ||
| 226 | CooperativeMatrix result; | ||
| 227 | result._matrix = | ||
| 228 | __builtin_spv_CooperativeMatrixWorkgroupLoadKHR<SpirvMatrixType>( | ||
| 229 | buffer, layout, stride, | ||
| 230 | memoryAccessOperands | MemoryAccessNonPrivatePointerMask | | ||
| 231 | MemoryAccessMakePointerVisibleMask, | ||
| 232 | ScopeWorkgroup); | ||
| 233 | return result; | ||
| 234 | } | ||
| 235 | |||
| 236 | template <class ComponentType, Scope scope, uint rows, uint columns, | ||
| 237 | CooperativeMatrixUse use> | ||
| 238 | template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout, | ||
| 239 | class Type> | ||
| 240 | CooperativeMatrix<ComponentType, scope, rows, columns, use> | ||
| 241 | CooperativeMatrix<ComponentType, scope, rows, columns, use>::Load( | ||
| 242 | RWStructuredBuffer<Type> buffer, uint32_t index, uint32_t stride) { | ||
| 243 | CooperativeMatrix result; | ||
| 244 | result._matrix = __builtin_spv_CooperativeMatrixLoadKHR<SpirvMatrixType>( | ||
| 245 | buffer[index], layout, stride, memoryAccessOperands); | ||
| 246 | return result; | ||
| 247 | } | ||
| 248 | |||
| 249 | template <class ComponentType, Scope scope, uint rows, uint columns, | ||
| 250 | CooperativeMatrixUse use> | ||
| 251 | template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout, | ||
| 252 | class Type> | ||
| 253 | CooperativeMatrix<ComponentType, scope, rows, columns, use> | ||
| 254 | CooperativeMatrix<ComponentType, scope, rows, columns, use>::CoherentLoad( | ||
| 255 | RWStructuredBuffer<Type> buffer, uint32_t index, uint32_t stride) { | ||
| 256 | CooperativeMatrix result; | ||
| 257 | result._matrix = __builtin_spv_CooperativeMatrixLoadKHR<SpirvMatrixType>( | ||
| 258 | buffer[index], layout, stride, | ||
| 259 | memoryAccessOperands | MemoryAccessNonPrivatePointerMask | | ||
| 260 | MemoryAccessMakePointerVisibleMask, | ||
| 261 | ScopeQueueFamily); | ||
| 262 | return result; | ||
| 263 | } | ||
| 264 | |||
| 265 | template <class ComponentType, Scope scope, uint rows, uint columns, | ||
| 266 | CooperativeMatrixUse use> | ||
| 267 | template <uint32_t memoryAccessOperands, CooperativeMatrixLayout layout, | ||
| 268 | class Type> | ||
| 269 | CooperativeMatrix<ComponentType, scope, rows, columns, use> | ||
| 270 | CooperativeMatrix<ComponentType, scope, rows, columns, use>::Load( | ||
| 271 | StructuredBuffer<Type> buffer, uint32_t index, uint32_t stride) { | ||
| 272 | CooperativeMatrix result; | ||
| 273 | result._matrix = __builtin_spv_CooperativeMatrixLoadKHR<SpirvMatrixType>( | ||
| 274 | buffer[index], layout, stride, MemoryAccessMaskNone); | ||
| 275 | return result; | ||
| 276 | } | ||
| 277 | |||
| 278 | template <class ComponentType, Scope scope, uint rows, uint columns, | ||
| 279 | CooperativeMatrixUse use> | ||
| 280 | CooperativeMatrix<ComponentType, scope, rows, columns, use> | ||
| 281 | CooperativeMatrix<ComponentType, scope, rows, columns, use>::Splat( | ||
| 282 | ComponentType v) { | ||
| 283 | CooperativeMatrix result; | ||
| 284 | result._matrix = __builtin_spv_ConstructCooperativeMatrix<SpirvMatrixType>(v); | ||
| 285 | return result; | ||
| 286 | } | ||
| 287 | |||
| 288 | template <class ComponentType, Scope scope, uint rows, uint columns, | ||
| 289 | CooperativeMatrixUse use> | ||
| 290 | uint CooperativeMatrix<ComponentType, scope, rows, columns, use>::GetLength() { | ||
| 291 | return __builtin_spv_CooperativeMatrixLengthKHR<SpirvMatrixType>(); | ||
| 292 | } | ||
| 293 | |||
| 294 | template <class ComponentType, Scope scope, uint rows, uint columns, | ||
| 295 | CooperativeMatrixUse use> | ||
| 296 | ComponentType CooperativeMatrix<ComponentType, scope, rows, columns, use>::Get( | ||
| 297 | uint32_t index) { | ||
| 298 | // clang-format off | ||
| 299 | using ComponentPtr = vk::SpirvOpaqueType< | ||
| 300 | /* OpTypePointer */ 32, | ||
| 301 | /* function storage class */ vk::Literal<vk::integral_constant<uint, 7> >, | ||
| 302 | ComponentType>; | ||
| 303 | // clang-format on | ||
| 304 | ComponentPtr ptr = __builtin_spv_AccessChain<ComponentPtr>(_matrix, index); | ||
| 305 | return __builtin_spv_LoadPointer<ComponentType>(ptr); | ||
| 306 | } | ||
| 307 | |||
| 308 | template <class ComponentType, Scope scope, uint rows, uint columns, | ||
| 309 | CooperativeMatrixUse use> | ||
| 310 | void CooperativeMatrix<ComponentType, scope, rows, columns, use>::Set( | ||
| 311 | ComponentType value, uint32_t index) { | ||
| 312 | // clang-format off | ||
| 313 | using ComponentPtr = vk::SpirvOpaqueType< | ||
| 314 | /* OpTypePointer */ 32, | ||
| 315 | /* function storage class */ vk::Literal<vk::integral_constant<uint, 7> >, | ||
| 316 | ComponentType>; | ||
| 317 | // clang-format on | ||
| 318 | ComponentPtr ptr = __builtin_spv_AccessChain<ComponentPtr>(_matrix, index); | ||
| 319 | return __builtin_spv_StorePointer(ptr, value); | ||
| 320 | } | ||
| 321 | |||
| 322 | template <typename ComponentType, Scope scope, uint rows, uint columns, uint K> | ||
| 323 | CooperativeMatrixAccumulator<ComponentType, scope, rows, columns> | ||
| 324 | cooperativeMatrixMultiplyAdd( | ||
| 325 | CooperativeMatrixA<ComponentType, scope, rows, K> a, | ||
| 326 | CooperativeMatrixB<ComponentType, scope, K, columns> b, | ||
| 327 | CooperativeMatrixAccumulator<ComponentType, scope, rows, columns> c) { | ||
| 328 | |||
| 329 | const vk::CooperativeMatrixOperandsMask allSignedComponents = | ||
| 330 | vk::CooperativeMatrixOperandsMatrixASignedComponentsKHRMask | | ||
| 331 | vk::CooperativeMatrixOperandsMatrixBSignedComponentsKHRMask | | ||
| 332 | vk::CooperativeMatrixOperandsMatrixCSignedComponentsKHRMask | | ||
| 333 | vk::CooperativeMatrixOperandsMatrixResultSignedComponentsKHRMask; | ||
| 334 | |||
| 335 | const vk::CooperativeMatrixOperandsMask operands = | ||
| 336 | (vk::CooperativeMatrixOperandsMask)( | ||
| 337 | a.hasSignedIntegerComponentType | ||
| 338 | ? allSignedComponents | ||
| 339 | : vk::CooperativeMatrixOperandsMaskNone); | ||
| 340 | |||
| 341 | CooperativeMatrixAccumulator<ComponentType, scope, rows, columns> result; | ||
| 342 | result._matrix = __builtin_spv_CooperativeMatrixMulAddKHR< | ||
| 343 | typename CooperativeMatrixAccumulator<ComponentType, scope, rows, | ||
| 344 | columns>::SpirvMatrixType>( | ||
| 345 | a._matrix, b._matrix, c._matrix, operands); | ||
| 346 | return result; | ||
| 347 | } | ||
| 348 | |||
| 349 | template <typename ComponentType, Scope scope, uint rows, uint columns, uint K> | ||
| 350 | CooperativeMatrixAccumulator<ComponentType, scope, rows, columns> | ||
| 351 | cooperativeMatrixSaturatingMultiplyAdd( | ||
| 352 | CooperativeMatrixA<ComponentType, scope, rows, K> a, | ||
| 353 | CooperativeMatrixB<ComponentType, scope, K, columns> b, | ||
| 354 | CooperativeMatrixAccumulator<ComponentType, scope, rows, columns> c) { | ||
| 355 | |||
| 356 | const vk::CooperativeMatrixOperandsMask allSignedComponents = | ||
| 357 | vk::CooperativeMatrixOperandsMatrixASignedComponentsKHRMask | | ||
| 358 | vk::CooperativeMatrixOperandsMatrixBSignedComponentsKHRMask | | ||
| 359 | vk::CooperativeMatrixOperandsMatrixCSignedComponentsKHRMask | | ||
| 360 | vk::CooperativeMatrixOperandsMatrixResultSignedComponentsKHRMask | | ||
| 361 | vk::CooperativeMatrixOperandsSaturatingAccumulationKHRMask; | ||
| 362 | |||
| 363 | const vk::CooperativeMatrixOperandsMask operands = | ||
| 364 | (vk::CooperativeMatrixOperandsMask)( | ||
| 365 | a.hasSignedIntegerComponentType | ||
| 366 | ? allSignedComponents | ||
| 367 | : vk::CooperativeMatrixOperandsSaturatingAccumulationKHRMask); | ||
| 368 | CooperativeMatrixAccumulator<ComponentType, scope, rows, columns> result; | ||
| 369 | result._matrix = __builtin_spv_CooperativeMatrixMulAddKHR< | ||
| 370 | typename CooperativeMatrixAccumulator<ComponentType, scope, rows, | ||
| 371 | columns>::SpirvMatrixType>( | ||
| 372 | a._matrix, b._matrix, c._matrix, operands); | ||
| 373 | return result; | ||
| 374 | } | ||
| 375 | |||
| 376 | } // namespace khr | ||
| 377 | } // namespace vk | ||
diff --git a/contrib/dxc_2025_07_14/inc/hlsl/vk/opcode_selector.h b/contrib/dxc_2025_07_14/inc/hlsl/vk/opcode_selector.h new file mode 100644 index 0000000..bc8672c --- /dev/null +++ b/contrib/dxc_2025_07_14/inc/hlsl/vk/opcode_selector.h | |||
| @@ -0,0 +1,227 @@ | |||
| 1 | // Copyright (c) 2024 Google LLC | ||
| 2 | // | ||
| 3 | // This file is licensed under the Apache License v2.0 with LLVM Exceptions. | ||
| 4 | // See https://llvm.org/LICENSE.txt for license information. | ||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| 6 | |||
| 7 | #ifndef _HLSL_VK_KHR_OPCODE_SELECTOR_H_ | ||
| 8 | #define _HLSL_VK_KHR_OPCODE_SELECTOR_H_ | ||
| 9 | |||
| 10 | #define DECLARE_UNARY_OP(name, opcode) \ | ||
| 11 | template <typename ResultType> \ | ||
| 12 | [[vk::ext_instruction(opcode)]] ResultType __builtin_spv_##name( \ | ||
| 13 | ResultType a) | ||
| 14 | |||
| 15 | DECLARE_UNARY_OP(CopyObject, 83); | ||
| 16 | DECLARE_UNARY_OP(SNegate, 126); | ||
| 17 | DECLARE_UNARY_OP(FNegate, 127); | ||
| 18 | |||
| 19 | #define DECLARE_CONVERSION_OP(name, opcode) \ | ||
| 20 | template <typename ResultType, typename OperandType> \ | ||
| 21 | [[vk::ext_instruction(opcode)]] ResultType __builtin_spv_##name( \ | ||
| 22 | OperandType a) | ||
| 23 | |||
| 24 | DECLARE_CONVERSION_OP(ConvertFtoU, 109); | ||
| 25 | DECLARE_CONVERSION_OP(ConvertFtoS, 110); | ||
| 26 | DECLARE_CONVERSION_OP(ConvertSToF, 111); | ||
| 27 | DECLARE_CONVERSION_OP(ConvertUToF, 112); | ||
| 28 | DECLARE_CONVERSION_OP(UConvert, 113); | ||
| 29 | DECLARE_CONVERSION_OP(SConvert, 114); | ||
| 30 | DECLARE_CONVERSION_OP(FConvert, 115); | ||
| 31 | DECLARE_CONVERSION_OP(Bitcast, 124); | ||
| 32 | |||
| 33 | #undef DECLARY_UNARY_OP | ||
| 34 | |||
| 35 | #define DECLARE_BINOP(name, opcode) \ | ||
| 36 | template <typename ResultType> \ | ||
| 37 | [[vk::ext_instruction(opcode)]] ResultType __builtin_spv_##name( \ | ||
| 38 | ResultType a, ResultType b) | ||
| 39 | |||
| 40 | DECLARE_BINOP(IAdd, 128); | ||
| 41 | DECLARE_BINOP(FAdd, 129); | ||
| 42 | DECLARE_BINOP(ISub, 130); | ||
| 43 | DECLARE_BINOP(FSub, 131); | ||
| 44 | DECLARE_BINOP(IMul, 132); | ||
| 45 | DECLARE_BINOP(FMul, 133); | ||
| 46 | DECLARE_BINOP(UDiv, 134); | ||
| 47 | DECLARE_BINOP(SDiv, 135); | ||
| 48 | DECLARE_BINOP(FDiv, 136); | ||
| 49 | |||
| 50 | #undef DECLARE_BINOP | ||
| 51 | namespace vk { | ||
| 52 | namespace util { | ||
| 53 | |||
| 54 | template <class ComponentType> class ArithmeticSelector; | ||
| 55 | |||
| 56 | #define ARITHMETIC_SELECTOR(BaseType, OpNegate, OpAdd, OpSub, OpMul, OpDiv, \ | ||
| 57 | SIGNED_INTEGER_TYPE) \ | ||
| 58 | template <> class ArithmeticSelector<BaseType> { \ | ||
| 59 | template <class T> static T Negate(T a) { return OpNegate(a); } \ | ||
| 60 | template <class T> static T Add(T a, T b) { return OpAdd(a, b); } \ | ||
| 61 | template <class T> static T Sub(T a, T b) { return OpSub(a, b); } \ | ||
| 62 | template <class T> static T Mul(T a, T b) { return OpMul(a, b); } \ | ||
| 63 | template <class T> static T Div(T a, T b) { return OpDiv(a, b); } \ | ||
| 64 | }; | ||
| 65 | |||
| 66 | ARITHMETIC_SELECTOR(half, __builtin_spv_FNegate, __builtin_spv_FAdd, | ||
| 67 | __builtin_spv_FSub, __builtin_spv_FMul, __builtin_spv_FDiv, | ||
| 68 | false); | ||
| 69 | ARITHMETIC_SELECTOR(float, __builtin_spv_FNegate, __builtin_spv_FAdd, | ||
| 70 | __builtin_spv_FSub, __builtin_spv_FMul, __builtin_spv_FDiv, | ||
| 71 | false); | ||
| 72 | ARITHMETIC_SELECTOR(double, __builtin_spv_FNegate, __builtin_spv_FAdd, | ||
| 73 | __builtin_spv_FSub, __builtin_spv_FMul, __builtin_spv_FDiv, | ||
| 74 | false); | ||
| 75 | |||
| 76 | #if __HLSL_ENABLE_16_BIT | ||
| 77 | ARITHMETIC_SELECTOR(int16_t, __builtin_spv_SNegate, __builtin_spv_IAdd, | ||
| 78 | __builtin_spv_ISub, __builtin_spv_IMul, __builtin_spv_SDiv, | ||
| 79 | true); | ||
| 80 | ARITHMETIC_SELECTOR(uint16_t, __builtin_spv_SNegate, __builtin_spv_IAdd, | ||
| 81 | __builtin_spv_ISub, __builtin_spv_IMul, __builtin_spv_UDiv, | ||
| 82 | false); | ||
| 83 | #endif // __HLSL_ENABLE_16_BIT | ||
| 84 | |||
| 85 | ARITHMETIC_SELECTOR(int32_t, __builtin_spv_SNegate, __builtin_spv_IAdd, | ||
| 86 | __builtin_spv_ISub, __builtin_spv_IMul, __builtin_spv_SDiv, | ||
| 87 | true); | ||
| 88 | ARITHMETIC_SELECTOR(int64_t, __builtin_spv_SNegate, __builtin_spv_IAdd, | ||
| 89 | __builtin_spv_ISub, __builtin_spv_IMul, __builtin_spv_SDiv, | ||
| 90 | true); | ||
| 91 | ARITHMETIC_SELECTOR(uint32_t, __builtin_spv_SNegate, __builtin_spv_IAdd, | ||
| 92 | __builtin_spv_ISub, __builtin_spv_IMul, __builtin_spv_UDiv, | ||
| 93 | false); | ||
| 94 | ARITHMETIC_SELECTOR(uint64_t, __builtin_spv_SNegate, __builtin_spv_IAdd, | ||
| 95 | __builtin_spv_ISub, __builtin_spv_IMul, __builtin_spv_UDiv, | ||
| 96 | false); | ||
| 97 | |||
| 98 | // The conversion selector is will be used to convert one type to another | ||
| 99 | // using the SPIR-V conversion instructions. See | ||
| 100 | // https://registry.khronos.org/SPIR-V/specs/unified1/SPIRV.html#_conversion_instructions. | ||
| 101 | // SourceType and TargetType must be integer or floating point scalar type. | ||
| 102 | |||
| 103 | // ConversionSelector::Convert converts an object of type S to an object of type | ||
| 104 | // T. S must be SourceType, a vector of SourceType, or a cooperative matrix of | ||
| 105 | // SourceType. T must be TargetType, a vector of TargetType, or a cooperative | ||
| 106 | // matrix of TargetType. T must have the same number of components as S. T is a | ||
| 107 | // cooperative matrix if and only if S is a cooperative matrix. | ||
| 108 | template <class SourceType, class TargetType> class ConversionSelector; | ||
| 109 | |||
| 110 | #define CONVERSION_SELECTOR(SourceType, TargetType, OpConvert) \ | ||
| 111 | template <> class ConversionSelector<SourceType, TargetType> { \ | ||
| 112 | template <class T, class S> static T Convert(S a) { \ | ||
| 113 | return OpConvert<T>(a); \ | ||
| 114 | } \ | ||
| 115 | }; | ||
| 116 | |||
| 117 | #if __HLSL_ENABLE_16_BIT | ||
| 118 | CONVERSION_SELECTOR(uint16_t, uint16_t, __builtin_spv_CopyObject); | ||
| 119 | CONVERSION_SELECTOR(uint16_t, int16_t, __builtin_spv_Bitcast); | ||
| 120 | CONVERSION_SELECTOR(uint16_t, uint32_t, __builtin_spv_UConvert); | ||
| 121 | CONVERSION_SELECTOR(uint16_t, int32_t, __builtin_spv_SConvert); | ||
| 122 | CONVERSION_SELECTOR(uint16_t, uint64_t, __builtin_spv_UConvert); | ||
| 123 | CONVERSION_SELECTOR(uint16_t, int64_t, __builtin_spv_SConvert); | ||
| 124 | CONVERSION_SELECTOR(uint16_t, half, __builtin_spv_ConvertUToF); | ||
| 125 | CONVERSION_SELECTOR(uint16_t, float, __builtin_spv_ConvertUToF); | ||
| 126 | CONVERSION_SELECTOR(uint16_t, double, __builtin_spv_ConvertUToF); | ||
| 127 | |||
| 128 | CONVERSION_SELECTOR(int16_t, uint16_t, __builtin_spv_Bitcast); | ||
| 129 | CONVERSION_SELECTOR(int16_t, int16_t, __builtin_spv_CopyObject); | ||
| 130 | CONVERSION_SELECTOR(int16_t, uint32_t, __builtin_spv_UConvert); | ||
| 131 | CONVERSION_SELECTOR(int16_t, int32_t, __builtin_spv_SConvert); | ||
| 132 | CONVERSION_SELECTOR(int16_t, uint64_t, __builtin_spv_UConvert); | ||
| 133 | CONVERSION_SELECTOR(int16_t, int64_t, __builtin_spv_SConvert); | ||
| 134 | CONVERSION_SELECTOR(int16_t, half, __builtin_spv_ConvertSToF); | ||
| 135 | CONVERSION_SELECTOR(int16_t, float, __builtin_spv_ConvertSToF); | ||
| 136 | CONVERSION_SELECTOR(int16_t, double, __builtin_spv_ConvertSToF); | ||
| 137 | |||
| 138 | CONVERSION_SELECTOR(uint32_t, uint16_t, __builtin_spv_UConvert); | ||
| 139 | CONVERSION_SELECTOR(uint32_t, int16_t, __builtin_spv_SConvert); | ||
| 140 | |||
| 141 | CONVERSION_SELECTOR(int32_t, uint16_t, __builtin_spv_UConvert); | ||
| 142 | CONVERSION_SELECTOR(int32_t, int16_t, __builtin_spv_SConvert); | ||
| 143 | |||
| 144 | CONVERSION_SELECTOR(uint64_t, uint16_t, __builtin_spv_UConvert); | ||
| 145 | CONVERSION_SELECTOR(uint64_t, int16_t, __builtin_spv_SConvert); | ||
| 146 | |||
| 147 | CONVERSION_SELECTOR(int64_t, uint16_t, __builtin_spv_UConvert); | ||
| 148 | CONVERSION_SELECTOR(int64_t, int16_t, __builtin_spv_SConvert); | ||
| 149 | |||
| 150 | CONVERSION_SELECTOR(half, uint16_t, __builtin_spv_ConvertFtoU); | ||
| 151 | CONVERSION_SELECTOR(half, int16_t, __builtin_spv_ConvertFtoS); | ||
| 152 | |||
| 153 | CONVERSION_SELECTOR(float, uint16_t, __builtin_spv_ConvertFtoU); | ||
| 154 | CONVERSION_SELECTOR(float, int16_t, __builtin_spv_ConvertFtoS); | ||
| 155 | |||
| 156 | CONVERSION_SELECTOR(double, uint16_t, __builtin_spv_ConvertFtoU); | ||
| 157 | CONVERSION_SELECTOR(double, int16_t, __builtin_spv_ConvertFtoS); | ||
| 158 | #endif | ||
| 159 | |||
| 160 | CONVERSION_SELECTOR(uint32_t, uint32_t, __builtin_spv_CopyObject); | ||
| 161 | CONVERSION_SELECTOR(uint32_t, int32_t, __builtin_spv_Bitcast); | ||
| 162 | CONVERSION_SELECTOR(uint32_t, uint64_t, __builtin_spv_UConvert); | ||
| 163 | CONVERSION_SELECTOR(uint32_t, int64_t, __builtin_spv_SConvert); | ||
| 164 | CONVERSION_SELECTOR(uint32_t, half, __builtin_spv_ConvertUToF); | ||
| 165 | CONVERSION_SELECTOR(uint32_t, float, __builtin_spv_ConvertUToF); | ||
| 166 | CONVERSION_SELECTOR(uint32_t, double, __builtin_spv_ConvertUToF); | ||
| 167 | |||
| 168 | CONVERSION_SELECTOR(int32_t, uint32_t, __builtin_spv_Bitcast); | ||
| 169 | CONVERSION_SELECTOR(int32_t, int32_t, __builtin_spv_CopyObject); | ||
| 170 | CONVERSION_SELECTOR(int32_t, uint64_t, __builtin_spv_UConvert); | ||
| 171 | CONVERSION_SELECTOR(int32_t, int64_t, __builtin_spv_SConvert); | ||
| 172 | CONVERSION_SELECTOR(int32_t, half, __builtin_spv_ConvertSToF); | ||
| 173 | CONVERSION_SELECTOR(int32_t, float, __builtin_spv_ConvertSToF); | ||
| 174 | CONVERSION_SELECTOR(int32_t, double, __builtin_spv_ConvertSToF); | ||
| 175 | |||
| 176 | CONVERSION_SELECTOR(uint64_t, uint32_t, __builtin_spv_UConvert); | ||
| 177 | CONVERSION_SELECTOR(uint64_t, int32_t, __builtin_spv_SConvert); | ||
| 178 | CONVERSION_SELECTOR(uint64_t, uint64_t, __builtin_spv_Bitcast); | ||
| 179 | CONVERSION_SELECTOR(uint64_t, int64_t, __builtin_spv_CopyObject); | ||
| 180 | CONVERSION_SELECTOR(uint64_t, half, __builtin_spv_ConvertUToF); | ||
| 181 | CONVERSION_SELECTOR(uint64_t, float, __builtin_spv_ConvertUToF); | ||
| 182 | CONVERSION_SELECTOR(uint64_t, double, __builtin_spv_ConvertUToF); | ||
| 183 | |||
| 184 | CONVERSION_SELECTOR(int64_t, uint32_t, __builtin_spv_UConvert); | ||
| 185 | CONVERSION_SELECTOR(int64_t, int32_t, __builtin_spv_SConvert); | ||
| 186 | CONVERSION_SELECTOR(int64_t, uint64_t, __builtin_spv_Bitcast); | ||
| 187 | CONVERSION_SELECTOR(int64_t, int64_t, __builtin_spv_CopyObject); | ||
| 188 | CONVERSION_SELECTOR(int64_t, half, __builtin_spv_ConvertSToF); | ||
| 189 | CONVERSION_SELECTOR(int64_t, float, __builtin_spv_ConvertSToF); | ||
| 190 | CONVERSION_SELECTOR(int64_t, double, __builtin_spv_ConvertSToF); | ||
| 191 | |||
| 192 | CONVERSION_SELECTOR(half, uint32_t, __builtin_spv_ConvertFtoU); | ||
| 193 | CONVERSION_SELECTOR(half, int32_t, __builtin_spv_ConvertFtoS); | ||
| 194 | CONVERSION_SELECTOR(half, uint64_t, __builtin_spv_ConvertFtoU); | ||
| 195 | CONVERSION_SELECTOR(half, int64_t, __builtin_spv_ConvertFtoS); | ||
| 196 | CONVERSION_SELECTOR(half, half, __builtin_spv_CopyObject); | ||
| 197 | #if __HLSL_ENABLE_16_BIT | ||
| 198 | CONVERSION_SELECTOR(half, float, __builtin_spv_FConvert); | ||
| 199 | #else | ||
| 200 | CONVERSION_SELECTOR(half, float, __builtin_spv_CopyObject); | ||
| 201 | #endif | ||
| 202 | |||
| 203 | CONVERSION_SELECTOR(half, double, __builtin_spv_FConvert); | ||
| 204 | |||
| 205 | CONVERSION_SELECTOR(float, uint32_t, __builtin_spv_ConvertFtoU); | ||
| 206 | CONVERSION_SELECTOR(float, int32_t, __builtin_spv_ConvertFtoS); | ||
| 207 | CONVERSION_SELECTOR(float, uint64_t, __builtin_spv_ConvertFtoU); | ||
| 208 | CONVERSION_SELECTOR(float, int64_t, __builtin_spv_ConvertFtoS); | ||
| 209 | #if __HLSL_ENABLE_16_BIT | ||
| 210 | CONVERSION_SELECTOR(float, half, __builtin_spv_FConvert); | ||
| 211 | #else | ||
| 212 | CONVERSION_SELECTOR(float, half, __builtin_spv_CopyObject); | ||
| 213 | #endif | ||
| 214 | CONVERSION_SELECTOR(float, float, __builtin_spv_CopyObject); | ||
| 215 | CONVERSION_SELECTOR(float, double, __builtin_spv_FConvert); | ||
| 216 | |||
| 217 | CONVERSION_SELECTOR(double, uint32_t, __builtin_spv_ConvertFtoU); | ||
| 218 | CONVERSION_SELECTOR(double, int32_t, __builtin_spv_ConvertFtoS); | ||
| 219 | CONVERSION_SELECTOR(double, uint64_t, __builtin_spv_ConvertFtoU); | ||
| 220 | CONVERSION_SELECTOR(double, int64_t, __builtin_spv_ConvertFtoS); | ||
| 221 | CONVERSION_SELECTOR(double, half, __builtin_spv_FConvert); | ||
| 222 | CONVERSION_SELECTOR(double, float, __builtin_spv_FConvert); | ||
| 223 | CONVERSION_SELECTOR(double, double, __builtin_spv_CopyObject); | ||
| 224 | }; // namespace util | ||
| 225 | } // namespace vk | ||
| 226 | |||
| 227 | #endif // _HLSL_VK_KHR_OPCODE_SELECTOR_H_ | ||
diff --git a/contrib/dxc_2025_07_14/inc/hlsl/vk/spirv.h b/contrib/dxc_2025_07_14/inc/hlsl/vk/spirv.h new file mode 100644 index 0000000..69bb53b --- /dev/null +++ b/contrib/dxc_2025_07_14/inc/hlsl/vk/spirv.h | |||
| @@ -0,0 +1,85 @@ | |||
| 1 | // Copyright (c) 2024 Google LLC | ||
| 2 | // | ||
| 3 | // This file is licensed under the Apache License v2.0 with LLVM Exceptions. | ||
| 4 | // See https://llvm.org/LICENSE.txt for license information. | ||
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
| 6 | |||
| 7 | #ifndef _HLSL_VK_SPIRV_H_ | ||
| 8 | #define _HLSL_VK_SPIRV_H_ | ||
| 9 | |||
| 10 | namespace vk { | ||
| 11 | |||
| 12 | enum CooperativeMatrixUse { | ||
| 13 | CooperativeMatrixUseMatrixAKHR = 0, | ||
| 14 | CooperativeMatrixUseMatrixBKHR = 1, | ||
| 15 | CooperativeMatrixUseMatrixAccumulatorKHR = 2, | ||
| 16 | CooperativeMatrixUseMax = 0x7fffffff, | ||
| 17 | }; | ||
| 18 | |||
| 19 | enum CooperativeMatrixLayout { | ||
| 20 | CooperativeMatrixLayoutRowMajorKHR = 0, | ||
| 21 | CooperativeMatrixLayoutColumnMajorKHR = 1, | ||
| 22 | CooperativeMatrixLayoutRowBlockedInterleavedARM = 4202, | ||
| 23 | CooperativeMatrixLayoutColumnBlockedInterleavedARM = 4203, | ||
| 24 | CooperativeMatrixLayoutMax = 0x7fffffff, | ||
| 25 | }; | ||
| 26 | |||
| 27 | enum CooperativeMatrixOperandsMask { | ||
| 28 | CooperativeMatrixOperandsMaskNone = 0, | ||
| 29 | CooperativeMatrixOperandsMatrixASignedComponentsKHRMask = 0x00000001, | ||
| 30 | CooperativeMatrixOperandsMatrixBSignedComponentsKHRMask = 0x00000002, | ||
| 31 | CooperativeMatrixOperandsMatrixCSignedComponentsKHRMask = 0x00000004, | ||
| 32 | CooperativeMatrixOperandsMatrixResultSignedComponentsKHRMask = 0x00000008, | ||
| 33 | CooperativeMatrixOperandsSaturatingAccumulationKHRMask = 0x00000010, | ||
| 34 | }; | ||
| 35 | |||
| 36 | enum MemoryAccessMask { | ||
| 37 | MemoryAccessMaskNone = 0, | ||
| 38 | MemoryAccessVolatileMask = 0x00000001, | ||
| 39 | MemoryAccessAlignedMask = 0x00000002, | ||
| 40 | MemoryAccessNontemporalMask = 0x00000004, | ||
| 41 | MemoryAccessMakePointerAvailableMask = 0x00000008, | ||
| 42 | MemoryAccessMakePointerAvailableKHRMask = 0x00000008, | ||
| 43 | MemoryAccessMakePointerVisibleMask = 0x00000010, | ||
| 44 | MemoryAccessMakePointerVisibleKHRMask = 0x00000010, | ||
| 45 | MemoryAccessNonPrivatePointerMask = 0x00000020, | ||
| 46 | MemoryAccessNonPrivatePointerKHRMask = 0x00000020, | ||
| 47 | MemoryAccessAliasScopeINTELMaskMask = 0x00010000, | ||
| 48 | MemoryAccessNoAliasINTELMaskMask = 0x00020000, | ||
| 49 | }; | ||
| 50 | |||
| 51 | enum Scope { | ||
| 52 | ScopeCrossDevice = 0, | ||
| 53 | ScopeDevice = 1, | ||
| 54 | ScopeWorkgroup = 2, | ||
| 55 | ScopeSubgroup = 3, | ||
| 56 | ScopeInvocation = 4, | ||
| 57 | ScopeQueueFamily = 5, | ||
| 58 | ScopeQueueFamilyKHR = 5, | ||
| 59 | ScopeShaderCallKHR = 6, | ||
| 60 | ScopeMax = 0x7fffffff, | ||
| 61 | }; | ||
| 62 | |||
| 63 | enum StorageClass { | ||
| 64 | StorageClassWorkgroup = 4, | ||
| 65 | }; | ||
| 66 | |||
| 67 | // An opaque type to represent a Spir-V pointer to the workgroup storage class. | ||
| 68 | // clang-format off | ||
| 69 | template <typename PointeeType> | ||
| 70 | using WorkgroupSpirvPointer = const vk::SpirvOpaqueType< | ||
| 71 | /* OpTypePointer */ 32, | ||
| 72 | vk::Literal<vk::integral_constant<uint, StorageClassWorkgroup> >, | ||
| 73 | PointeeType>; | ||
| 74 | // clang-format on | ||
| 75 | |||
| 76 | // Returns an opaque Spir-V pointer to v. The memory object v's storage class | ||
| 77 | // modifier must be groupshared. If the incorrect storage class is used, then | ||
| 78 | // there will be a validation error, and it will not show the correct | ||
| 79 | template <typename T> | ||
| 80 | [[vk::ext_instruction(/* OpCopyObject */ 83)]] WorkgroupSpirvPointer<T> | ||
| 81 | GetGroupSharedAddress([[vk::ext_reference]] T v); | ||
| 82 | |||
| 83 | } // namespace vk | ||
| 84 | |||
| 85 | #endif // _HLSL_VK_SPIRV_H_ | ||
