46#ifndef SIMDutf_INTERNAL_ISADETECTION_H
47#define SIMDutf_INTERNAL_ISADETECTION_H
53#elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
57#include "simdutf/portability.h"
60#if SIMDUTF_IS_RISCV64 && defined(__linux__)
63struct simdutf_riscv_hwprobe {
67 #define simdutf_riscv_hwprobe(...) syscall(258, __VA_ARGS__)
68 #define SIMDUTF_RISCV_HWPROBE_KEY_IMA_EXT_0 4
69 #define SIMDUTF_RISCV_HWPROBE_IMA_V (1 << 2)
70 #define SIMDUTF_RISCV_HWPROBE_EXT_ZVBB (1 << 17)
73#if defined(__loongarch__) && defined(__linux__)
100 AVX512VBMI2 = 0x10000,
101 AVX512VPOPCNTDQ = 0x2000,
108#if defined(__PPC64__)
110static inline uint32_t detect_supported_architectures() {
111 return instruction_set::ALTIVEC;
114#elif SIMDUTF_IS_RISCV64
116static inline uint32_t detect_supported_architectures() {
117 uint32_t host_isa = instruction_set::DEFAULT;
119 host_isa |= instruction_set::RVV;
122 host_isa |= instruction_set::ZVBB;
124 #if defined(__linux__)
125 simdutf_riscv_hwprobe probes[] = {{SIMDUTF_RISCV_HWPROBE_KEY_IMA_EXT_0, 0}};
126 long ret = simdutf_riscv_hwprobe(&probes,
sizeof probes /
sizeof *probes, 0,
129 uint64_t extensions = probes[0].value;
130 if (extensions & SIMDUTF_RISCV_HWPROBE_IMA_V)
131 host_isa |= instruction_set::RVV;
132 if (extensions & SIMDUTF_RISCV_HWPROBE_EXT_ZVBB)
133 host_isa |= instruction_set::ZVBB;
136 #if defined(RUN_IN_SPIKE_SIMULATOR)
138 host_isa |= instruction_set::RVV;
143#elif defined(__aarch64__) || defined(_M_ARM64) || defined(_M_ARM64EC)
145static inline uint32_t detect_supported_architectures() {
146 return instruction_set::NEON;
149#elif defined(__x86_64__) || defined(_M_AMD64)
156constexpr uint32_t pclmulqdq = uint32_t(1)
158constexpr uint32_t sse42 = uint32_t(1)
160constexpr uint32_t osxsave =
161 (uint32_t(1) << 26) |
167constexpr uint32_t bmi1 = uint32_t(1) << 3;
168constexpr uint32_t avx2 = uint32_t(1) << 5;
169constexpr uint32_t bmi2 = uint32_t(1) << 8;
170constexpr uint32_t avx512f = uint32_t(1) << 16;
171constexpr uint32_t avx512dq = uint32_t(1) << 17;
172constexpr uint32_t avx512ifma = uint32_t(1) << 21;
173constexpr uint32_t avx512cd = uint32_t(1) << 28;
174constexpr uint32_t avx512bw = uint32_t(1) << 30;
175constexpr uint32_t avx512vl = uint32_t(1) << 31;
179constexpr uint32_t avx512vbmi = uint32_t(1) << 1;
180constexpr uint32_t avx512vbmi2 = uint32_t(1) << 6;
181constexpr uint32_t avx512vnni = uint32_t(1) << 11;
182constexpr uint32_t avx512bitalg = uint32_t(1) << 12;
183constexpr uint32_t avx512vpopcnt = uint32_t(1) << 14;
186constexpr uint32_t avx512vp2intersect = uint32_t(1) << 8;
189constexpr uint64_t avx256_saved = uint64_t(1) << 2;
190constexpr uint64_t avx512_saved =
196static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx,
198 #if defined(_MSC_VER)
200 __cpuidex(cpu_info, *eax, *ecx);
205 #elif defined(HAVE_GCC_GET_CPUID) && defined(USE_GCC_GET_CPUID)
206 uint32_t level = *eax;
207 __get_cpuid(level, eax, ebx, ecx, edx);
209 uint32_t a = *eax, b, c = *ecx, d;
210 asm volatile(
"cpuid\n\t" :
"+a"(a),
"=b"(b),
"+c"(c),
"=d"(d));
218static inline uint64_t xgetbv() {
219 #if defined(_MSC_VER)
222 uint32_t xcr0_lo, xcr0_hi;
223 asm volatile(
"xgetbv\n\t" :
"=a"(xcr0_lo),
"=d"(xcr0_hi) :
"c"(0));
224 return xcr0_lo | ((uint64_t)xcr0_hi << 32);
228static inline uint32_t detect_supported_architectures() {
233 uint32_t host_isa = 0x0;
237 cpuid(&eax, &ebx, &ecx, &edx);
239 if (ecx & cpuid_bit::sse42) {
240 host_isa |= instruction_set::SSE42;
243 if (ecx & cpuid_bit::pclmulqdq) {
244 host_isa |= instruction_set::PCLMULQDQ;
247 if ((ecx & cpuid_bit::osxsave) != cpuid_bit::osxsave) {
252 uint64_t xcr0 = xgetbv();
254 if ((xcr0 & cpuid_bit::xcr0_bit::avx256_saved) == 0) {
260 cpuid(&eax, &ebx, &ecx, &edx);
261 if (ebx & cpuid_bit::ebx::avx2) {
262 host_isa |= instruction_set::AVX2;
264 if (ebx & cpuid_bit::ebx::bmi1) {
265 host_isa |= instruction_set::BMI1;
267 if (ebx & cpuid_bit::ebx::bmi2) {
268 host_isa |= instruction_set::BMI2;
270 if (!((xcr0 & cpuid_bit::xcr0_bit::avx512_saved) ==
271 cpuid_bit::xcr0_bit::avx512_saved)) {
274 if (ebx & cpuid_bit::ebx::avx512f) {
275 host_isa |= instruction_set::AVX512F;
277 if (ebx & cpuid_bit::ebx::avx512bw) {
278 host_isa |= instruction_set::AVX512BW;
280 if (ebx & cpuid_bit::ebx::avx512cd) {
281 host_isa |= instruction_set::AVX512CD;
283 if (ebx & cpuid_bit::ebx::avx512dq) {
284 host_isa |= instruction_set::AVX512DQ;
286 if (ebx & cpuid_bit::ebx::avx512vl) {
287 host_isa |= instruction_set::AVX512VL;
289 if (ecx & cpuid_bit::ecx::avx512vbmi2) {
290 host_isa |= instruction_set::AVX512VBMI2;
292 if (ecx & cpuid_bit::ecx::avx512vpopcnt) {
293 host_isa |= instruction_set::AVX512VPOPCNTDQ;
297#elif defined(__loongarch__)
299static inline uint32_t detect_supported_architectures() {
300 uint32_t host_isa = instruction_set::DEFAULT;
301 #if defined(__linux__)
303 hwcap = getauxval(AT_HWCAP);
304 if (hwcap & HWCAP_LOONGARCH_LSX) {
305 host_isa |= instruction_set::LSX;
307 if (hwcap & HWCAP_LOONGARCH_LASX) {
308 host_isa |= instruction_set::LASX;
316static inline uint32_t detect_supported_architectures() {
317 return instruction_set::DEFAULT;