simdutf 9.0.0
Unicode at GB/s.
Loading...
Searching...
No Matches
isadetection.h
1/* From
2https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h
3Highly modified.
4
5Copyright (c) 2016- Facebook, Inc (Adam Paszke)
6Copyright (c) 2014- Facebook, Inc (Soumith Chintala)
7Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
8Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu)
9Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
10Copyright (c) 2011-2013 NYU (Clement Farabet)
11Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou,
12Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute
13(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert,
14Samy Bengio, Johnny Mariethoz)
15
16All rights reserved.
17
18Redistribution and use in source and binary forms, with or without
19modification, are permitted provided that the following conditions are met:
20
211. Redistributions of source code must retain the above copyright
22 notice, this list of conditions and the following disclaimer.
23
242. Redistributions in binary form must reproduce the above copyright
25 notice, this list of conditions and the following disclaimer in the
26 documentation and/or other materials provided with the distribution.
27
283. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories
29America and IDIAP Research Institute nor the names of its contributors may be
30 used to endorse or promote products derived from this software without
31 specific prior written permission.
32
33THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
34AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
37LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
38CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
39SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
43POSSIBILITY OF SUCH DAMAGE.
44*/
45
46#ifndef SIMDutf_INTERNAL_ISADETECTION_H
47#define SIMDutf_INTERNAL_ISADETECTION_H
48
49#include <cstdint>
50#include <cstdlib>
51#if defined(_MSC_VER)
52 #include <intrin.h>
53#elif (defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)) || \
54 defined(__FILC__)
55 #include <cpuid.h>
56#endif
57
58#ifdef __FILC__
59 #include <stdfil.h>
60#endif
61
62#include "simdutf/portability.h"
63
64// RISC-V ISA detection utilities
65#if SIMDUTF_IS_RISCV64 && defined(__linux__)
66 #include <unistd.h> // for syscall
67// We define these ourselves, for backwards compatibility
68struct simdutf_riscv_hwprobe {
69 int64_t key;
70 uint64_t value;
71};
72 #define simdutf_riscv_hwprobe(...) syscall(258, __VA_ARGS__)
73 #define SIMDUTF_RISCV_HWPROBE_KEY_IMA_EXT_0 4
74 #define SIMDUTF_RISCV_HWPROBE_IMA_V (1 << 2)
75 #define SIMDUTF_RISCV_HWPROBE_EXT_ZVBB (1 << 17)
76#endif // SIMDUTF_IS_RISCV64 && defined(__linux__)
77
78#if defined(__loongarch__) && defined(__linux__)
79 #include <sys/auxv.h>
80// bits/hwcap.h
81// #define HWCAP_LOONGARCH_LSX (1 << 4)
82// #define HWCAP_LOONGARCH_LASX (1 << 5)
83#endif
84
85namespace simdutf {
86namespace internal {
87
88enum instruction_set {
89 DEFAULT = 0x0,
90 NEON = 0x1,
91 AVX2 = 0x4,
92 SSE42 = 0x8,
93 PCLMULQDQ = 0x10,
94 BMI1 = 0x20,
95 BMI2 = 0x40,
96 ALTIVEC = 0x80,
97 AVX512F = 0x100,
98 AVX512DQ = 0x200,
99 AVX512IFMA = 0x400,
100 AVX512PF = 0x800,
101 AVX512ER = 0x1000,
102 AVX512CD = 0x2000,
103 AVX512BW = 0x4000,
104 AVX512VL = 0x8000,
105 AVX512VBMI2 = 0x10000,
106 AVX512VPOPCNTDQ = 0x2000,
107 RVV = 0x4000,
108 ZVBB = 0x8000,
109 LSX = 0x40000,
110 LASX = 0x80000,
111};
112
113#if defined(__PPC64__)
114
115static inline uint32_t detect_supported_architectures() {
116 return instruction_set::ALTIVEC;
117}
118
119#elif SIMDUTF_IS_RISCV64
120
121static inline uint32_t detect_supported_architectures() {
122 uint32_t host_isa = instruction_set::DEFAULT;
123 #if SIMDUTF_IS_RVV
124 host_isa |= instruction_set::RVV;
125 #endif
126 #if SIMDUTF_IS_ZVBB
127 host_isa |= instruction_set::ZVBB;
128 #endif
129 #if defined(__linux__)
130 simdutf_riscv_hwprobe probes[] = {{SIMDUTF_RISCV_HWPROBE_KEY_IMA_EXT_0, 0}};
131 long ret = simdutf_riscv_hwprobe(&probes, sizeof probes / sizeof *probes, 0,
132 nullptr, 0);
133 if (ret == 0) {
134 uint64_t extensions = probes[0].value;
135 if (extensions & SIMDUTF_RISCV_HWPROBE_IMA_V)
136 host_isa |= instruction_set::RVV;
137 if (extensions & SIMDUTF_RISCV_HWPROBE_EXT_ZVBB)
138 host_isa |= instruction_set::ZVBB;
139 }
140 #endif
141 #if defined(RUN_IN_SPIKE_SIMULATOR)
142 // Proxy Kernel does not implement yet hwprobe syscall
143 host_isa |= instruction_set::RVV;
144 #endif
145 return host_isa;
146}
147
148#elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
149
150static inline uint32_t detect_supported_architectures() {
151 return instruction_set::NEON;
152}
153
154#elif defined(__x86_64__) || defined(_M_AMD64) // x64
155
156namespace {
157namespace cpuid_bit {
158// Can be found on Intel ISA Reference for CPUID
159
160// EAX = 0x01
161constexpr uint32_t pclmulqdq = uint32_t(1)
162 << 1; ///< @private bit 1 of ECX for EAX=0x1
163constexpr uint32_t sse42 = uint32_t(1)
164 << 20; ///< @private bit 20 of ECX for EAX=0x1
165constexpr uint32_t osxsave =
166 (uint32_t(1) << 26) |
167 (uint32_t(1) << 27); ///< @private bits 26+27 of ECX for EAX=0x1
168
169// EAX = 0x7f (Structured Extended Feature Flags), ECX = 0x00 (Sub-leaf)
170// See: "Table 3-8. Information Returned by CPUID Instruction"
171namespace ebx {
172constexpr uint32_t bmi1 = uint32_t(1) << 3;
173constexpr uint32_t avx2 = uint32_t(1) << 5;
174constexpr uint32_t bmi2 = uint32_t(1) << 8;
175constexpr uint32_t avx512f = uint32_t(1) << 16;
176constexpr uint32_t avx512dq = uint32_t(1) << 17;
177constexpr uint32_t avx512ifma = uint32_t(1) << 21;
178constexpr uint32_t avx512cd = uint32_t(1) << 28;
179constexpr uint32_t avx512bw = uint32_t(1) << 30;
180constexpr uint32_t avx512vl = uint32_t(1) << 31;
181} // namespace ebx
182
183namespace ecx {
184constexpr uint32_t avx512vbmi = uint32_t(1) << 1;
185constexpr uint32_t avx512vbmi2 = uint32_t(1) << 6;
186constexpr uint32_t avx512vnni = uint32_t(1) << 11;
187constexpr uint32_t avx512bitalg = uint32_t(1) << 12;
188constexpr uint32_t avx512vpopcnt = uint32_t(1) << 14;
189} // namespace ecx
190namespace edx {
191constexpr uint32_t avx512vp2intersect = uint32_t(1) << 8;
192}
193namespace xcr0_bit {
194constexpr uint64_t avx256_saved = uint64_t(1) << 2; ///< @private bit 2 = AVX
195constexpr uint64_t avx512_saved =
196 uint64_t(7) << 5; ///< @private bits 5,6,7 = opmask, ZMM_hi256, hi16_ZMM
197} // namespace xcr0_bit
198} // namespace cpuid_bit
199} // namespace
200
201static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
202 uint32_t *edx) {
203 #if defined(_MSC_VER)
204 int cpu_info[4];
205 __cpuidex(cpu_info, *eax, *ecx);
206 *eax = cpu_info[0];
207 *ebx = cpu_info[1];
208 *ecx = cpu_info[2];
209 *edx = cpu_info[3];
210 #elif (defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)) || \
211 defined(__FILC__)
212 uint32_t level = *eax;
213 __get_cpuid(level, eax, ebx, ecx, edx);
214 #else
215 uint32_t a = *eax, b, c = *ecx, d;
216 asm volatile("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d));
217 *eax = a;
218 *ebx = b;
219 *ecx = c;
220 *edx = d;
221 #endif
222}
223
224static inline uint64_t xgetbv() {
225 #if defined(_MSC_VER)
226 return _xgetbv(0);
227 #elif defined(__FILC__)
228 return zxgetbv();
229 #else
230 uint32_t xcr0_lo, xcr0_hi;
231 asm volatile("xgetbv\n\t" : "=a"(xcr0_lo), "=d"(xcr0_hi) : "c"(0));
232 return xcr0_lo | ((uint64_t)xcr0_hi << 32);
233 #endif
234}
235
236static inline uint32_t detect_supported_architectures() {
237 uint32_t eax;
238 uint32_t ebx = 0;
239 uint32_t ecx = 0;
240 uint32_t edx = 0;
241 uint32_t host_isa = 0x0;
242
243 // EBX for EAX=0x1
244 eax = 0x1;
245 cpuid(&eax, &ebx, &ecx, &edx);
246
247 if (ecx & cpuid_bit::sse42) {
248 host_isa |= instruction_set::SSE42;
249 }
250
251 if (ecx & cpuid_bit::pclmulqdq) {
252 host_isa |= instruction_set::PCLMULQDQ;
253 }
254
255 if ((ecx & cpuid_bit::osxsave) != cpuid_bit::osxsave) {
256 return host_isa;
257 }
258
259 // xgetbv for checking if the OS saves registers
260 uint64_t xcr0 = xgetbv();
261
262 if ((xcr0 & cpuid_bit::xcr0_bit::avx256_saved) == 0) {
263 return host_isa;
264 }
265 // ECX for EAX=0x7
266 eax = 0x7;
267 ecx = 0x0; // Sub-leaf = 0
268 cpuid(&eax, &ebx, &ecx, &edx);
269 if (ebx & cpuid_bit::ebx::avx2) {
270 host_isa |= instruction_set::AVX2;
271 }
272 if (ebx & cpuid_bit::ebx::bmi1) {
273 host_isa |= instruction_set::BMI1;
274 }
275 if (ebx & cpuid_bit::ebx::bmi2) {
276 host_isa |= instruction_set::BMI2;
277 }
278 if (!((xcr0 & cpuid_bit::xcr0_bit::avx512_saved) ==
279 cpuid_bit::xcr0_bit::avx512_saved)) {
280 return host_isa;
281 }
282 if (ebx & cpuid_bit::ebx::avx512f) {
283 host_isa |= instruction_set::AVX512F;
284 }
285 if (ebx & cpuid_bit::ebx::avx512bw) {
286 host_isa |= instruction_set::AVX512BW;
287 }
288 if (ebx & cpuid_bit::ebx::avx512cd) {
289 host_isa |= instruction_set::AVX512CD;
290 }
291 if (ebx & cpuid_bit::ebx::avx512dq) {
292 host_isa |= instruction_set::AVX512DQ;
293 }
294 if (ebx & cpuid_bit::ebx::avx512vl) {
295 host_isa |= instruction_set::AVX512VL;
296 }
297 if (ecx & cpuid_bit::ecx::avx512vbmi2) {
298 host_isa |= instruction_set::AVX512VBMI2;
299 }
300 if (ecx & cpuid_bit::ecx::avx512vpopcnt) {
301 host_isa |= instruction_set::AVX512VPOPCNTDQ;
302 }
303 return host_isa;
304}
305#elif defined(__loongarch__)
306
307static inline uint32_t detect_supported_architectures() {
308 uint32_t host_isa = instruction_set::DEFAULT;
309 #if defined(__linux__)
310 uint64_t hwcap = 0;
311 hwcap = getauxval(AT_HWCAP);
312 if (hwcap & HWCAP_LOONGARCH_LSX) {
313 host_isa |= instruction_set::LSX;
314 }
315 if (hwcap & HWCAP_LOONGARCH_LASX) {
316 host_isa |= instruction_set::LASX;
317 }
318 #endif
319 return host_isa;
320}
321#else // fallback
322
323// includes 32-bit ARM.
324static inline uint32_t detect_supported_architectures() {
325 return instruction_set::DEFAULT;
326}
327
328#endif // end SIMD extension detection code
329
330} // namespace internal
331} // namespace simdutf
332
333#endif // SIMDutf_INTERNAL_ISADETECTION_H