simdutf 6.1.1
Unicode at GB/s.
Loading...
Searching...
No Matches
isadetection.h
1/* From
2https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h
3Highly modified.
4
5Copyright (c) 2016- Facebook, Inc (Adam Paszke)
6Copyright (c) 2014- Facebook, Inc (Soumith Chintala)
7Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
8Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu)
9Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
10Copyright (c) 2011-2013 NYU (Clement Farabet)
11Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou,
12Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute
13(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert,
14Samy Bengio, Johnny Mariethoz)
15
16All rights reserved.
17
18Redistribution and use in source and binary forms, with or without
19modification, are permitted provided that the following conditions are met:
20
211. Redistributions of source code must retain the above copyright
22 notice, this list of conditions and the following disclaimer.
23
242. Redistributions in binary form must reproduce the above copyright
25 notice, this list of conditions and the following disclaimer in the
26 documentation and/or other materials provided with the distribution.
27
283. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories
29America and IDIAP Research Institute nor the names of its contributors may be
30 used to endorse or promote products derived from this software without
31 specific prior written permission.
32
33THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
34AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
37LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
38CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
39SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
43POSSIBILITY OF SUCH DAMAGE.
44*/
45
46#ifndef SIMDutf_INTERNAL_ISADETECTION_H
47#define SIMDutf_INTERNAL_ISADETECTION_H
48
49#include <cstdint>
50#include <cstdlib>
51#if defined(_MSC_VER)
52 #include <intrin.h>
53#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
54 #include <cpuid.h>
55#endif
56
57#include "simdutf/portability.h"
58
59// RISC-V ISA detection utilities
60#if SIMDUTF_IS_RISCV64 && defined(__linux__)
61 #include <unistd.h> // for syscall
62// We define these ourselves, for backwards compatibility
63struct simdutf_riscv_hwprobe {
64 int64_t key;
65 uint64_t value;
66};
67 #define simdutf_riscv_hwprobe(...) syscall(258, __VA_ARGS__)
68 #define SIMDUTF_RISCV_HWPROBE_KEY_IMA_EXT_0 4
69 #define SIMDUTF_RISCV_HWPROBE_IMA_V (1 << 2)
70 #define SIMDUTF_RISCV_HWPROBE_EXT_ZVBB (1 << 17)
71#endif // SIMDUTF_IS_RISCV64 && defined(__linux__)
72
73namespace simdutf {
74namespace internal {
75
76enum instruction_set {
77 DEFAULT = 0x0,
78 NEON = 0x1,
79 AVX2 = 0x4,
80 SSE42 = 0x8,
81 PCLMULQDQ = 0x10,
82 BMI1 = 0x20,
83 BMI2 = 0x40,
84 ALTIVEC = 0x80,
85 AVX512F = 0x100,
86 AVX512DQ = 0x200,
87 AVX512IFMA = 0x400,
88 AVX512PF = 0x800,
89 AVX512ER = 0x1000,
90 AVX512CD = 0x2000,
91 AVX512BW = 0x4000,
92 AVX512VL = 0x8000,
93 AVX512VBMI2 = 0x10000,
94 AVX512VPOPCNTDQ = 0x2000,
95 RVV = 0x4000,
96 ZVBB = 0x8000,
97 LSX = 0x40000,
98 LASX = 0x80000,
99};
100
101#if defined(__PPC64__)
102
103static inline uint32_t detect_supported_architectures() {
104 return instruction_set::ALTIVEC;
105}
106
107#elif SIMDUTF_IS_RISCV64
108
109static inline uint32_t detect_supported_architectures() {
110 uint32_t host_isa = instruction_set::DEFAULT;
111 #if SIMDUTF_IS_RVV
112 host_isa |= instruction_set::RVV;
113 #endif
114 #if SIMDUTF_IS_ZVBB
115 host_isa |= instruction_set::ZVBB;
116 #endif
117 #if defined(__linux__)
118 simdutf_riscv_hwprobe probes[] = {{SIMDUTF_RISCV_HWPROBE_KEY_IMA_EXT_0, 0}};
119 long ret = simdutf_riscv_hwprobe(&probes, sizeof probes / sizeof *probes, 0,
120 nullptr, 0);
121 if (ret == 0) {
122 uint64_t extensions = probes[0].value;
123 if (extensions & SIMDUTF_RISCV_HWPROBE_IMA_V)
124 host_isa |= instruction_set::RVV;
125 if (extensions & SIMDUTF_RISCV_HWPROBE_EXT_ZVBB)
126 host_isa |= instruction_set::ZVBB;
127 }
128 #endif
129 #if defined(RUN_IN_SPIKE_SIMULATOR)
130 // Proxy Kernel does not implement yet hwprobe syscall
131 host_isa |= instruction_set::RVV;
132 #endif
133 return host_isa;
134}
135
136#elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
137
138static inline uint32_t detect_supported_architectures() {
139 return instruction_set::NEON;
140}
141
142#elif defined(__x86_64__) || defined(_M_AMD64) // x64
143
144namespace {
145namespace cpuid_bit {
146// Can be found on Intel ISA Reference for CPUID
147
148// EAX = 0x01
149constexpr uint32_t pclmulqdq = uint32_t(1)
150 << 1;
151constexpr uint32_t sse42 = uint32_t(1)
152 << 20;
153constexpr uint32_t osxsave =
154 (uint32_t(1) << 26) |
155 (uint32_t(1) << 27);
156
157// EAX = 0x7f (Structured Extended Feature Flags), ECX = 0x00 (Sub-leaf)
158// See: "Table 3-8. Information Returned by CPUID Instruction"
159namespace ebx {
160constexpr uint32_t bmi1 = uint32_t(1) << 3;
161constexpr uint32_t avx2 = uint32_t(1) << 5;
162constexpr uint32_t bmi2 = uint32_t(1) << 8;
163constexpr uint32_t avx512f = uint32_t(1) << 16;
164constexpr uint32_t avx512dq = uint32_t(1) << 17;
165constexpr uint32_t avx512ifma = uint32_t(1) << 21;
166constexpr uint32_t avx512cd = uint32_t(1) << 28;
167constexpr uint32_t avx512bw = uint32_t(1) << 30;
168constexpr uint32_t avx512vl = uint32_t(1) << 31;
169} // namespace ebx
170
171namespace ecx {
172constexpr uint32_t avx512vbmi = uint32_t(1) << 1;
173constexpr uint32_t avx512vbmi2 = uint32_t(1) << 6;
174constexpr uint32_t avx512vnni = uint32_t(1) << 11;
175constexpr uint32_t avx512bitalg = uint32_t(1) << 12;
176constexpr uint32_t avx512vpopcnt = uint32_t(1) << 14;
177} // namespace ecx
178namespace edx {
179constexpr uint32_t avx512vp2intersect = uint32_t(1) << 8;
180}
181namespace xcr0_bit {
182constexpr uint64_t avx256_saved = uint64_t(1) << 2;
183constexpr uint64_t avx512_saved =
184 uint64_t(7) << 5;
185} // namespace xcr0_bit
186} // namespace cpuid_bit
187} // namespace
188
189static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
190 uint32_t *edx) {
191 #if defined(_MSC_VER)
192 int cpu_info[4];
193 __cpuidex(cpu_info, *eax, *ecx);
194 *eax = cpu_info[0];
195 *ebx = cpu_info[1];
196 *ecx = cpu_info[2];
197 *edx = cpu_info[3];
198 #elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
199 uint32_t level = *eax;
200 __get_cpuid(level, eax, ebx, ecx, edx);
201 #else
202 uint32_t a = *eax, b, c = *ecx, d;
203 asm volatile("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d));
204 *eax = a;
205 *ebx = b;
206 *ecx = c;
207 *edx = d;
208 #endif
209}
210
211static inline uint64_t xgetbv() {
212 #if defined(_MSC_VER)
213 return _xgetbv(0);
214 #else
215 uint32_t xcr0_lo, xcr0_hi;
216 asm volatile("xgetbv\n\t" : "=a"(xcr0_lo), "=d"(xcr0_hi) : "c"(0));
217 return xcr0_lo | ((uint64_t)xcr0_hi << 32);
218 #endif
219}
220
221static inline uint32_t detect_supported_architectures() {
222 uint32_t eax;
223 uint32_t ebx = 0;
224 uint32_t ecx = 0;
225 uint32_t edx = 0;
226 uint32_t host_isa = 0x0;
227
228 // EBX for EAX=0x1
229 eax = 0x1;
230 cpuid(&eax, &ebx, &ecx, &edx);
231
232 if (ecx & cpuid_bit::sse42) {
233 host_isa |= instruction_set::SSE42;
234 }
235
236 if (ecx & cpuid_bit::pclmulqdq) {
237 host_isa |= instruction_set::PCLMULQDQ;
238 }
239
240 if ((ecx & cpuid_bit::osxsave) != cpuid_bit::osxsave) {
241 return host_isa;
242 }
243
244 // xgetbv for checking if the OS saves registers
245 uint64_t xcr0 = xgetbv();
246
247 if ((xcr0 & cpuid_bit::xcr0_bit::avx256_saved) == 0) {
248 return host_isa;
249 }
250 // ECX for EAX=0x7
251 eax = 0x7;
252 ecx = 0x0; // Sub-leaf = 0
253 cpuid(&eax, &ebx, &ecx, &edx);
254 if (ebx & cpuid_bit::ebx::avx2) {
255 host_isa |= instruction_set::AVX2;
256 }
257 if (ebx & cpuid_bit::ebx::bmi1) {
258 host_isa |= instruction_set::BMI1;
259 }
260 if (ebx & cpuid_bit::ebx::bmi2) {
261 host_isa |= instruction_set::BMI2;
262 }
263 if (!((xcr0 & cpuid_bit::xcr0_bit::avx512_saved) ==
264 cpuid_bit::xcr0_bit::avx512_saved)) {
265 return host_isa;
266 }
267 if (ebx & cpuid_bit::ebx::avx512f) {
268 host_isa |= instruction_set::AVX512F;
269 }
270 if (ebx & cpuid_bit::ebx::avx512bw) {
271 host_isa |= instruction_set::AVX512BW;
272 }
273 if (ebx & cpuid_bit::ebx::avx512cd) {
274 host_isa |= instruction_set::AVX512CD;
275 }
276 if (ebx & cpuid_bit::ebx::avx512dq) {
277 host_isa |= instruction_set::AVX512DQ;
278 }
279 if (ebx & cpuid_bit::ebx::avx512vl) {
280 host_isa |= instruction_set::AVX512VL;
281 }
282 if (ecx & cpuid_bit::ecx::avx512vbmi2) {
283 host_isa |= instruction_set::AVX512VBMI2;
284 }
285 if (ecx & cpuid_bit::ecx::avx512vpopcnt) {
286 host_isa |= instruction_set::AVX512VPOPCNTDQ;
287 }
288 return host_isa;
289}
290#elif defined(__loongarch__)
291 #if defined(__linux__)
292 #include <sys/auxv.h>
293 // bits/hwcap.h
294 // #define HWCAP_LOONGARCH_LSX (1 << 4)
295 // #define HWCAP_LOONGARCH_LASX (1 << 5)
296 #endif
297
298static inline uint32_t detect_supported_architectures() {
299 uint32_t host_isa = instruction_set::DEFAULT;
300 #if defined(__linux__)
301 uint64_t hwcap = 0;
302 hwcap = getauxval(AT_HWCAP);
303 if (hwcap & HWCAP_LOONGARCH_LSX) {
304 host_isa |= instruction_set::LSX;
305 }
306 if (hwcap & HWCAP_LOONGARCH_LASX) {
307 host_isa |= instruction_set::LASX;
308 }
309 #endif
310 return host_isa;
311}
312#else // fallback
313
314// includes 32-bit ARM.
315static inline uint32_t detect_supported_architectures() {
316 return instruction_set::DEFAULT;
317}
318
319#endif // end SIMD extension detection code
320
321} // namespace internal
322} // namespace simdutf
323
324#endif // SIMDutf_INTERNAL_ISADETECTION_H