simdutf 6.4.0
Unicode at GB/s.
Loading...
Searching...
No Matches
isadetection.h
1/* From
2https://github.com/endorno/pytorch/blob/master/torch/lib/TH/generic/simd/simd.h
3Highly modified.
4
5Copyright (c) 2016- Facebook, Inc (Adam Paszke)
6Copyright (c) 2014- Facebook, Inc (Soumith Chintala)
7Copyright (c) 2011-2014 Idiap Research Institute (Ronan Collobert)
8Copyright (c) 2012-2014 Deepmind Technologies (Koray Kavukcuoglu)
9Copyright (c) 2011-2012 NEC Laboratories America (Koray Kavukcuoglu)
10Copyright (c) 2011-2013 NYU (Clement Farabet)
11Copyright (c) 2006-2010 NEC Laboratories America (Ronan Collobert, Leon Bottou,
12Iain Melvin, Jason Weston) Copyright (c) 2006 Idiap Research Institute
13(Samy Bengio) Copyright (c) 2001-2004 Idiap Research Institute (Ronan Collobert,
14Samy Bengio, Johnny Mariethoz)
15
16All rights reserved.
17
18Redistribution and use in source and binary forms, with or without
19modification, are permitted provided that the following conditions are met:
20
211. Redistributions of source code must retain the above copyright
22 notice, this list of conditions and the following disclaimer.
23
242. Redistributions in binary form must reproduce the above copyright
25 notice, this list of conditions and the following disclaimer in the
26 documentation and/or other materials provided with the distribution.
27
283. Neither the names of Facebook, Deepmind Technologies, NYU, NEC Laboratories
29America and IDIAP Research Institute nor the names of its contributors may be
30 used to endorse or promote products derived from this software without
31 specific prior written permission.
32
33THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
34AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
35IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
36ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
37LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
38CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
39SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
40INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
41CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
42ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
43POSSIBILITY OF SUCH DAMAGE.
44*/
45
46#ifndef SIMDutf_INTERNAL_ISADETECTION_H
47#define SIMDutf_INTERNAL_ISADETECTION_H
48
49#include <cstdint>
50#include <cstdlib>
51#if defined(_MSC_VER)
52 #include <intrin.h>
53#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
54 #include <cpuid.h>
55#endif
56
57#include "simdutf/portability.h"
58
59// RISC-V ISA detection utilities
60#if SIMDUTF_IS_RISCV64 && defined(__linux__)
61 #include <unistd.h> // for syscall
62// We define these ourselves, for backwards compatibility
63struct simdutf_riscv_hwprobe {
64 int64_t key;
65 uint64_t value;
66};
67 #define simdutf_riscv_hwprobe(...) syscall(258, __VA_ARGS__)
68 #define SIMDUTF_RISCV_HWPROBE_KEY_IMA_EXT_0 4
69 #define SIMDUTF_RISCV_HWPROBE_IMA_V (1 << 2)
70 #define SIMDUTF_RISCV_HWPROBE_EXT_ZVBB (1 << 17)
71#endif // SIMDUTF_IS_RISCV64 && defined(__linux__)
72
73#if defined(__loongarch__) && defined(__linux__)
74 #include <sys/auxv.h>
75// bits/hwcap.h
76// #define HWCAP_LOONGARCH_LSX (1 << 4)
77// #define HWCAP_LOONGARCH_LASX (1 << 5)
78#endif
79
80namespace simdutf {
81namespace internal {
82
83enum instruction_set {
84 DEFAULT = 0x0,
85 NEON = 0x1,
86 AVX2 = 0x4,
87 SSE42 = 0x8,
88 PCLMULQDQ = 0x10,
89 BMI1 = 0x20,
90 BMI2 = 0x40,
91 ALTIVEC = 0x80,
92 AVX512F = 0x100,
93 AVX512DQ = 0x200,
94 AVX512IFMA = 0x400,
95 AVX512PF = 0x800,
96 AVX512ER = 0x1000,
97 AVX512CD = 0x2000,
98 AVX512BW = 0x4000,
99 AVX512VL = 0x8000,
100 AVX512VBMI2 = 0x10000,
101 AVX512VPOPCNTDQ = 0x2000,
102 RVV = 0x4000,
103 ZVBB = 0x8000,
104 LSX = 0x40000,
105 LASX = 0x80000,
106};
107
108#if defined(__PPC64__)
109
110static inline uint32_t detect_supported_architectures() {
111 return instruction_set::ALTIVEC;
112}
113
114#elif SIMDUTF_IS_RISCV64
115
116static inline uint32_t detect_supported_architectures() {
117 uint32_t host_isa = instruction_set::DEFAULT;
118 #if SIMDUTF_IS_RVV
119 host_isa |= instruction_set::RVV;
120 #endif
121 #if SIMDUTF_IS_ZVBB
122 host_isa |= instruction_set::ZVBB;
123 #endif
124 #if defined(__linux__)
125 simdutf_riscv_hwprobe probes[] = {{SIMDUTF_RISCV_HWPROBE_KEY_IMA_EXT_0, 0}};
126 long ret = simdutf_riscv_hwprobe(&probes, sizeof probes / sizeof *probes, 0,
127 nullptr, 0);
128 if (ret == 0) {
129 uint64_t extensions = probes[0].value;
130 if (extensions & SIMDUTF_RISCV_HWPROBE_IMA_V)
131 host_isa |= instruction_set::RVV;
132 if (extensions & SIMDUTF_RISCV_HWPROBE_EXT_ZVBB)
133 host_isa |= instruction_set::ZVBB;
134 }
135 #endif
136 #if defined(RUN_IN_SPIKE_SIMULATOR)
137 // Proxy Kernel does not implement yet hwprobe syscall
138 host_isa |= instruction_set::RVV;
139 #endif
140 return host_isa;
141}
142
143#elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
144
145static inline uint32_t detect_supported_architectures() {
146 return instruction_set::NEON;
147}
148
149#elif defined(__x86_64__) || defined(_M_AMD64) // x64
150
151namespace {
152namespace cpuid_bit {
153// Can be found on Intel ISA Reference for CPUID
154
155// EAX = 0x01
156constexpr uint32_t pclmulqdq = uint32_t(1)
157 << 1;
158constexpr uint32_t sse42 = uint32_t(1)
159 << 20;
160constexpr uint32_t osxsave =
161 (uint32_t(1) << 26) |
162 (uint32_t(1) << 27);
163
164// EAX = 0x7f (Structured Extended Feature Flags), ECX = 0x00 (Sub-leaf)
165// See: "Table 3-8. Information Returned by CPUID Instruction"
166namespace ebx {
167constexpr uint32_t bmi1 = uint32_t(1) << 3;
168constexpr uint32_t avx2 = uint32_t(1) << 5;
169constexpr uint32_t bmi2 = uint32_t(1) << 8;
170constexpr uint32_t avx512f = uint32_t(1) << 16;
171constexpr uint32_t avx512dq = uint32_t(1) << 17;
172constexpr uint32_t avx512ifma = uint32_t(1) << 21;
173constexpr uint32_t avx512cd = uint32_t(1) << 28;
174constexpr uint32_t avx512bw = uint32_t(1) << 30;
175constexpr uint32_t avx512vl = uint32_t(1) << 31;
176} // namespace ebx
177
178namespace ecx {
179constexpr uint32_t avx512vbmi = uint32_t(1) << 1;
180constexpr uint32_t avx512vbmi2 = uint32_t(1) << 6;
181constexpr uint32_t avx512vnni = uint32_t(1) << 11;
182constexpr uint32_t avx512bitalg = uint32_t(1) << 12;
183constexpr uint32_t avx512vpopcnt = uint32_t(1) << 14;
184} // namespace ecx
185namespace edx {
186constexpr uint32_t avx512vp2intersect = uint32_t(1) << 8;
187}
188namespace xcr0_bit {
189constexpr uint64_t avx256_saved = uint64_t(1) << 2;
190constexpr uint64_t avx512_saved =
191 uint64_t(7) << 5;
192} // namespace xcr0_bit
193} // namespace cpuid_bit
194} // namespace
195
196static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
197 uint32_t *edx) {
198 #if defined(_MSC_VER)
199 int cpu_info[4];
200 __cpuidex(cpu_info, *eax, *ecx);
201 *eax = cpu_info[0];
202 *ebx = cpu_info[1];
203 *ecx = cpu_info[2];
204 *edx = cpu_info[3];
205 #elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
206 uint32_t level = *eax;
207 __get_cpuid(level, eax, ebx, ecx, edx);
208 #else
209 uint32_t a = *eax, b, c = *ecx, d;
210 asm volatile("cpuid\n\t" : "+a"(a), "=b"(b), "+c"(c), "=d"(d));
211 *eax = a;
212 *ebx = b;
213 *ecx = c;
214 *edx = d;
215 #endif
216}
217
218static inline uint64_t xgetbv() {
219 #if defined(_MSC_VER)
220 return _xgetbv(0);
221 #else
222 uint32_t xcr0_lo, xcr0_hi;
223 asm volatile("xgetbv\n\t" : "=a"(xcr0_lo), "=d"(xcr0_hi) : "c"(0));
224 return xcr0_lo | ((uint64_t)xcr0_hi << 32);
225 #endif
226}
227
228static inline uint32_t detect_supported_architectures() {
229 uint32_t eax;
230 uint32_t ebx = 0;
231 uint32_t ecx = 0;
232 uint32_t edx = 0;
233 uint32_t host_isa = 0x0;
234
235 // EBX for EAX=0x1
236 eax = 0x1;
237 cpuid(&eax, &ebx, &ecx, &edx);
238
239 if (ecx & cpuid_bit::sse42) {
240 host_isa |= instruction_set::SSE42;
241 }
242
243 if (ecx & cpuid_bit::pclmulqdq) {
244 host_isa |= instruction_set::PCLMULQDQ;
245 }
246
247 if ((ecx & cpuid_bit::osxsave) != cpuid_bit::osxsave) {
248 return host_isa;
249 }
250
251 // xgetbv for checking if the OS saves registers
252 uint64_t xcr0 = xgetbv();
253
254 if ((xcr0 & cpuid_bit::xcr0_bit::avx256_saved) == 0) {
255 return host_isa;
256 }
257 // ECX for EAX=0x7
258 eax = 0x7;
259 ecx = 0x0; // Sub-leaf = 0
260 cpuid(&eax, &ebx, &ecx, &edx);
261 if (ebx & cpuid_bit::ebx::avx2) {
262 host_isa |= instruction_set::AVX2;
263 }
264 if (ebx & cpuid_bit::ebx::bmi1) {
265 host_isa |= instruction_set::BMI1;
266 }
267 if (ebx & cpuid_bit::ebx::bmi2) {
268 host_isa |= instruction_set::BMI2;
269 }
270 if (!((xcr0 & cpuid_bit::xcr0_bit::avx512_saved) ==
271 cpuid_bit::xcr0_bit::avx512_saved)) {
272 return host_isa;
273 }
274 if (ebx & cpuid_bit::ebx::avx512f) {
275 host_isa |= instruction_set::AVX512F;
276 }
277 if (ebx & cpuid_bit::ebx::avx512bw) {
278 host_isa |= instruction_set::AVX512BW;
279 }
280 if (ebx & cpuid_bit::ebx::avx512cd) {
281 host_isa |= instruction_set::AVX512CD;
282 }
283 if (ebx & cpuid_bit::ebx::avx512dq) {
284 host_isa |= instruction_set::AVX512DQ;
285 }
286 if (ebx & cpuid_bit::ebx::avx512vl) {
287 host_isa |= instruction_set::AVX512VL;
288 }
289 if (ecx & cpuid_bit::ecx::avx512vbmi2) {
290 host_isa |= instruction_set::AVX512VBMI2;
291 }
292 if (ecx & cpuid_bit::ecx::avx512vpopcnt) {
293 host_isa |= instruction_set::AVX512VPOPCNTDQ;
294 }
295 return host_isa;
296}
297#elif defined(__loongarch__)
298
299static inline uint32_t detect_supported_architectures() {
300 uint32_t host_isa = instruction_set::DEFAULT;
301 #if defined(__linux__)
302 uint64_t hwcap = 0;
303 hwcap = getauxval(AT_HWCAP);
304 if (hwcap & HWCAP_LOONGARCH_LSX) {
305 host_isa |= instruction_set::LSX;
306 }
307 if (hwcap & HWCAP_LOONGARCH_LASX) {
308 host_isa |= instruction_set::LASX;
309 }
310 #endif
311 return host_isa;
312}
313#else // fallback
314
315// includes 32-bit ARM.
316static inline uint32_t detect_supported_architectures() {
317 return instruction_set::DEFAULT;
318}
319
320#endif // end SIMD extension detection code
321
322} // namespace internal
323} // namespace simdutf
324
325#endif // SIMDutf_INTERNAL_ISADETECTION_H