46#ifndef SIMDutf_INTERNAL_ISADETECTION_H
47#define SIMDutf_INTERNAL_ISADETECTION_H
53#elif (defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)) || \
62#include "simdutf/portability.h"
65#if SIMDUTF_IS_RISCV64 && defined(__linux__)
68struct simdutf_riscv_hwprobe {
72 #define simdutf_riscv_hwprobe(...) syscall(258, __VA_ARGS__)
73 #define SIMDUTF_RISCV_HWPROBE_KEY_IMA_EXT_0 4
74 #define SIMDUTF_RISCV_HWPROBE_IMA_V (1 << 2)
75 #define SIMDUTF_RISCV_HWPROBE_EXT_ZVBB (1 << 17)
78#if defined(__loongarch__) && defined(__linux__)
105 AVX512VBMI2 = 0x10000,
106 AVX512VPOPCNTDQ = 0x2000,
113#if defined(__PPC64__)
115static inline uint32_t detect_supported_architectures() {
116 return instruction_set::ALTIVEC;
119#elif SIMDUTF_IS_RISCV64
121static inline uint32_t detect_supported_architectures() {
122 uint32_t host_isa = instruction_set::DEFAULT;
124 host_isa |= instruction_set::RVV;
127 host_isa |= instruction_set::ZVBB;
129 #if defined(__linux__)
130 simdutf_riscv_hwprobe probes[] = {{SIMDUTF_RISCV_HWPROBE_KEY_IMA_EXT_0, 0}};
131 long ret = simdutf_riscv_hwprobe(&probes,
sizeof probes /
sizeof *probes, 0,
134 uint64_t extensions = probes[0].value;
135 if (extensions & SIMDUTF_RISCV_HWPROBE_IMA_V)
136 host_isa |= instruction_set::RVV;
137 if (extensions & SIMDUTF_RISCV_HWPROBE_EXT_ZVBB)
138 host_isa |= instruction_set::ZVBB;
141 #if defined(RUN_IN_SPIKE_SIMULATOR)
143 host_isa |= instruction_set::RVV;
148#elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
150static inline uint32_t detect_supported_architectures() {
151 return instruction_set::NEON;
154#elif defined(__x86_64__) || defined(_M_AMD64)
161constexpr uint32_t pclmulqdq = uint32_t(1)
163constexpr uint32_t sse42 = uint32_t(1)
165constexpr uint32_t osxsave =
166 (uint32_t(1) << 26) |
172constexpr uint32_t bmi1 = uint32_t(1) << 3;
173constexpr uint32_t avx2 = uint32_t(1) << 5;
174constexpr uint32_t bmi2 = uint32_t(1) << 8;
175constexpr uint32_t avx512f = uint32_t(1) << 16;
176constexpr uint32_t avx512dq = uint32_t(1) << 17;
177constexpr uint32_t avx512ifma = uint32_t(1) << 21;
178constexpr uint32_t avx512cd = uint32_t(1) << 28;
179constexpr uint32_t avx512bw = uint32_t(1) << 30;
180constexpr uint32_t avx512vl = uint32_t(1) << 31;
184constexpr uint32_t avx512vbmi = uint32_t(1) << 1;
185constexpr uint32_t avx512vbmi2 = uint32_t(1) << 6;
186constexpr uint32_t avx512vnni = uint32_t(1) << 11;
187constexpr uint32_t avx512bitalg = uint32_t(1) << 12;
188constexpr uint32_t avx512vpopcnt = uint32_t(1) << 14;
191constexpr uint32_t avx512vp2intersect = uint32_t(1) << 8;
194constexpr uint64_t avx256_saved = uint64_t(1) << 2;
195constexpr uint64_t avx512_saved =
201static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
203 #if defined(_MSC_VER)
205 __cpuidex(cpu_info, *eax, *ecx);
210 #elif (defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)) || \
212 uint32_t level = *eax;
213 __get_cpuid(level, eax, ebx, ecx, edx);
215 uint32_t a = *eax, b, c = *ecx, d;
216 asm volatile(
"cpuid\n\t" :
"+a"(a),
"=b"(b),
"+c"(c),
"=d"(d));
224static inline uint64_t xgetbv() {
225 #if defined(_MSC_VER)
227 #elif defined(__FILC__)
230 uint32_t xcr0_lo, xcr0_hi;
231 asm volatile(
"xgetbv\n\t" :
"=a"(xcr0_lo),
"=d"(xcr0_hi) :
"c"(0));
232 return xcr0_lo | ((uint64_t)xcr0_hi << 32);
236static inline uint32_t detect_supported_architectures() {
241 uint32_t host_isa = 0x0;
245 cpuid(&eax, &ebx, &ecx, &edx);
247 if (ecx & cpuid_bit::sse42) {
248 host_isa |= instruction_set::SSE42;
251 if (ecx & cpuid_bit::pclmulqdq) {
252 host_isa |= instruction_set::PCLMULQDQ;
255 if ((ecx & cpuid_bit::osxsave) != cpuid_bit::osxsave) {
260 uint64_t xcr0 = xgetbv();
262 if ((xcr0 & cpuid_bit::xcr0_bit::avx256_saved) == 0) {
268 cpuid(&eax, &ebx, &ecx, &edx);
269 if (ebx & cpuid_bit::ebx::avx2) {
270 host_isa |= instruction_set::AVX2;
272 if (ebx & cpuid_bit::ebx::bmi1) {
273 host_isa |= instruction_set::BMI1;
275 if (ebx & cpuid_bit::ebx::bmi2) {
276 host_isa |= instruction_set::BMI2;
278 if (!((xcr0 & cpuid_bit::xcr0_bit::avx512_saved) ==
279 cpuid_bit::xcr0_bit::avx512_saved)) {
282 if (ebx & cpuid_bit::ebx::avx512f) {
283 host_isa |= instruction_set::AVX512F;
285 if (ebx & cpuid_bit::ebx::avx512bw) {
286 host_isa |= instruction_set::AVX512BW;
288 if (ebx & cpuid_bit::ebx::avx512cd) {
289 host_isa |= instruction_set::AVX512CD;
291 if (ebx & cpuid_bit::ebx::avx512dq) {
292 host_isa |= instruction_set::AVX512DQ;
294 if (ebx & cpuid_bit::ebx::avx512vl) {
295 host_isa |= instruction_set::AVX512VL;
297 if (ecx & cpuid_bit::ecx::avx512vbmi2) {
298 host_isa |= instruction_set::AVX512VBMI2;
300 if (ecx & cpuid_bit::ecx::avx512vpopcnt) {
301 host_isa |= instruction_set::AVX512VPOPCNTDQ;
305#elif defined(__loongarch__)
307static inline uint32_t detect_supported_architectures() {
308 uint32_t host_isa = instruction_set::DEFAULT;
309 #if defined(__linux__)
311 hwcap = getauxval(AT_HWCAP);
312 if (hwcap & HWCAP_LOONGARCH_LSX) {
313 host_isa |= instruction_set::LSX;
315 if (hwcap & HWCAP_LOONGARCH_LASX) {
316 host_isa |= instruction_set::LASX;
324static inline uint32_t detect_supported_architectures() {
325 return instruction_set::DEFAULT;