46#ifndef SIMDutf_INTERNAL_ISADETECTION_H
47#define SIMDutf_INTERNAL_ISADETECTION_H
53#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
57#include "simdutf/portability.h"
60#if SIMDUTF_IS_RISCV64 && defined(__linux__)
63struct simdutf_riscv_hwprobe {
67 #define simdutf_riscv_hwprobe(...) syscall(258, __VA_ARGS__)
68 #define SIMDUTF_RISCV_HWPROBE_KEY_IMA_EXT_0 4
69 #define SIMDUTF_RISCV_HWPROBE_IMA_V (1 << 2)
70 #define SIMDUTF_RISCV_HWPROBE_EXT_ZVBB (1 << 17)
93 AVX512VBMI2 = 0x10000,
94 AVX512VPOPCNTDQ = 0x2000,
101#if defined(__PPC64__)
103static inline uint32_t detect_supported_architectures() {
104 return instruction_set::ALTIVEC;
107#elif SIMDUTF_IS_RISCV64
109static inline uint32_t detect_supported_architectures() {
110 uint32_t host_isa = instruction_set::DEFAULT;
112 host_isa |= instruction_set::RVV;
115 host_isa |= instruction_set::ZVBB;
117 #if defined(__linux__)
118 simdutf_riscv_hwprobe probes[] = {{SIMDUTF_RISCV_HWPROBE_KEY_IMA_EXT_0, 0}};
119 long ret = simdutf_riscv_hwprobe(&probes,
sizeof probes /
sizeof *probes, 0,
122 uint64_t extensions = probes[0].value;
123 if (extensions & SIMDUTF_RISCV_HWPROBE_IMA_V)
124 host_isa |= instruction_set::RVV;
125 if (extensions & SIMDUTF_RISCV_HWPROBE_EXT_ZVBB)
126 host_isa |= instruction_set::ZVBB;
129 #if defined(RUN_IN_SPIKE_SIMULATOR)
131 host_isa |= instruction_set::RVV;
136#elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
138static inline uint32_t detect_supported_architectures() {
139 return instruction_set::NEON;
142#elif defined(__x86_64__) || defined(_M_AMD64)
149constexpr uint32_t pclmulqdq = uint32_t(1)
151constexpr uint32_t sse42 = uint32_t(1)
153constexpr uint32_t osxsave =
154 (uint32_t(1) << 26) |
160constexpr uint32_t bmi1 = uint32_t(1) << 3;
161constexpr uint32_t avx2 = uint32_t(1) << 5;
162constexpr uint32_t bmi2 = uint32_t(1) << 8;
163constexpr uint32_t avx512f = uint32_t(1) << 16;
164constexpr uint32_t avx512dq = uint32_t(1) << 17;
165constexpr uint32_t avx512ifma = uint32_t(1) << 21;
166constexpr uint32_t avx512cd = uint32_t(1) << 28;
167constexpr uint32_t avx512bw = uint32_t(1) << 30;
168constexpr uint32_t avx512vl = uint32_t(1) << 31;
172constexpr uint32_t avx512vbmi = uint32_t(1) << 1;
173constexpr uint32_t avx512vbmi2 = uint32_t(1) << 6;
174constexpr uint32_t avx512vnni = uint32_t(1) << 11;
175constexpr uint32_t avx512bitalg = uint32_t(1) << 12;
176constexpr uint32_t avx512vpopcnt = uint32_t(1) << 14;
179constexpr uint32_t avx512vp2intersect = uint32_t(1) << 8;
182constexpr uint64_t avx256_saved = uint64_t(1) << 2;
183constexpr uint64_t avx512_saved =
189static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
191 #if defined(_MSC_VER)
193 __cpuidex(cpu_info, *eax, *ecx);
198 #elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
199 uint32_t level = *eax;
200 __get_cpuid(level, eax, ebx, ecx, edx);
202 uint32_t a = *eax, b, c = *ecx, d;
203 asm volatile(
"cpuid\n\t" :
"+a"(a),
"=b"(b),
"+c"(c),
"=d"(d));
211static inline uint64_t xgetbv() {
212 #if defined(_MSC_VER)
215 uint32_t xcr0_lo, xcr0_hi;
216 asm volatile(
"xgetbv\n\t" :
"=a"(xcr0_lo),
"=d"(xcr0_hi) :
"c"(0));
217 return xcr0_lo | ((uint64_t)xcr0_hi << 32);
221static inline uint32_t detect_supported_architectures() {
226 uint32_t host_isa = 0x0;
230 cpuid(&eax, &ebx, &ecx, &edx);
232 if (ecx & cpuid_bit::sse42) {
233 host_isa |= instruction_set::SSE42;
236 if (ecx & cpuid_bit::pclmulqdq) {
237 host_isa |= instruction_set::PCLMULQDQ;
240 if ((ecx & cpuid_bit::osxsave) != cpuid_bit::osxsave) {
245 uint64_t xcr0 = xgetbv();
247 if ((xcr0 & cpuid_bit::xcr0_bit::avx256_saved) == 0) {
253 cpuid(&eax, &ebx, &ecx, &edx);
254 if (ebx & cpuid_bit::ebx::avx2) {
255 host_isa |= instruction_set::AVX2;
257 if (ebx & cpuid_bit::ebx::bmi1) {
258 host_isa |= instruction_set::BMI1;
260 if (ebx & cpuid_bit::ebx::bmi2) {
261 host_isa |= instruction_set::BMI2;
263 if (!((xcr0 & cpuid_bit::xcr0_bit::avx512_saved) ==
264 cpuid_bit::xcr0_bit::avx512_saved)) {
267 if (ebx & cpuid_bit::ebx::avx512f) {
268 host_isa |= instruction_set::AVX512F;
270 if (ebx & cpuid_bit::ebx::avx512bw) {
271 host_isa |= instruction_set::AVX512BW;
273 if (ebx & cpuid_bit::ebx::avx512cd) {
274 host_isa |= instruction_set::AVX512CD;
276 if (ebx & cpuid_bit::ebx::avx512dq) {
277 host_isa |= instruction_set::AVX512DQ;
279 if (ebx & cpuid_bit::ebx::avx512vl) {
280 host_isa |= instruction_set::AVX512VL;
282 if (ecx & cpuid_bit::ecx::avx512vbmi2) {
283 host_isa |= instruction_set::AVX512VBMI2;
285 if (ecx & cpuid_bit::ecx::avx512vpopcnt) {
286 host_isa |= instruction_set::AVX512VPOPCNTDQ;
290#elif defined(__loongarch__)
291 #if defined(__linux__)
292 #include <sys/auxv.h>
298static inline uint32_t detect_supported_architectures() {
299 uint32_t host_isa = instruction_set::DEFAULT;
300 #if defined(__linux__)
302 hwcap = getauxval(AT_HWCAP);
303 if (hwcap & HWCAP_LOONGARCH_LSX) {
304 host_isa |= instruction_set::LSX;
306 if (hwcap & HWCAP_LOONGARCH_LASX) {
307 host_isa |= instruction_set::LASX;
315static inline uint32_t detect_supported_architectures() {
316 return instruction_set::DEFAULT;