如何检查CPU是否支持SSE3指令集?

发布于 2024-11-09 11:12:48 字数 675 浏览 3 评论 0 原文

以下代码是否有效用于检查 CPU 是否支持 SSE3 指令集?

使用 IsProcessorFeaturePresent( ) 功能显然在 Windows XP 上不起作用。

bool CheckSSE3()
{
    int CPUInfo[4] = {-1};

    //-- Get number of valid info ids
    __cpuid(CPUInfo, 0);
    int nIds = CPUInfo[0];

    //-- Get info for id "1"
    if (nIds >= 1)
    {
        __cpuid(CPUInfo, 1);
        bool bSSE3NewInstructions = (CPUInfo[2] & 0x1) || false;
        return bSSE3NewInstructions;     
    }

    return false;      
}

Is the following code valid to check if a CPU supports the SSE3 instruction set?

Using the IsProcessorFeaturePresent() function apparently does not work on Windows XP.

bool CheckSSE3()
{
    int CPUInfo[4] = {-1};

    //-- Get number of valid info ids
    __cpuid(CPUInfo, 0);
    int nIds = CPUInfo[0];

    //-- Get info for id "1"
    if (nIds >= 1)
    {
        __cpuid(CPUInfo, 1);
        bool bSSE3NewInstructions = (CPUInfo[2] & 0x1) || false;
        return bSSE3NewInstructions;     
    }

    return false;      
}

如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

扫码二维码加入Web技术交流群

发布评论

需要 登录 才能够评论, 你可以免费 注册 一个本站的账号。

评论(7

哭了丶谁疼 2024-11-16 11:12:48

我创建了一个 GitHub 重现,它将检测所有主要 x86 ISA 扩展的 CPU 和操作系统支持:https://github .com/Mysticial/FeatureDetector

这是一个较短的版本:


首先您需要访问CPUID指令:

#ifdef _WIN32

//  Windows
#define cpuid(info, x)    __cpuidex(info, x, 0)

#else

//  GCC Intrinsics
#include <cpuid.h>
void cpuid(int info[4], int InfoType){
    __cpuid_count(InfoType, 0, info[0], info[1], info[2], info[3]);
}

#endif

然后您可以运行以下代码:

//  Misc.
bool HW_MMX;
bool HW_x64;
bool HW_ABM;      // Advanced Bit Manipulation
bool HW_RDRAND;
bool HW_BMI1;
bool HW_BMI2;
bool HW_ADX;
bool HW_PREFETCHWT1;

//  SIMD: 128-bit
bool HW_SSE;
bool HW_SSE2;
bool HW_SSE3;
bool HW_SSSE3;
bool HW_SSE41;
bool HW_SSE42;
bool HW_SSE4a;
bool HW_AES;
bool HW_SHA;

//  SIMD: 256-bit
bool HW_AVX;
bool HW_XOP;
bool HW_FMA3;
bool HW_FMA4;
bool HW_AVX2;

//  SIMD: 512-bit
bool HW_AVX512F;    //  AVX512 Foundation
bool HW_AVX512CD;   //  AVX512 Conflict Detection
bool HW_AVX512PF;   //  AVX512 Prefetch
bool HW_AVX512ER;   //  AVX512 Exponential + Reciprocal
bool HW_AVX512VL;   //  AVX512 Vector Length Extensions
bool HW_AVX512BW;   //  AVX512 Byte + Word
bool HW_AVX512DQ;   //  AVX512 Doubleword + Quadword
bool HW_AVX512IFMA; //  AVX512 Integer 52-bit Fused Multiply-Add
bool HW_AVX512VBMI; //  AVX512 Vector Byte Manipulation Instructions

int info[4];
cpuid(info, 0);
int nIds = info[0];

cpuid(info, 0x80000000);
unsigned nExIds = info[0];

//  Detect Features
if (nIds >= 0x00000001){
    cpuid(info,0x00000001);
    HW_MMX    = (info[3] & ((int)1 << 23)) != 0;
    HW_SSE    = (info[3] & ((int)1 << 25)) != 0;
    HW_SSE2   = (info[3] & ((int)1 << 26)) != 0;
    HW_SSE3   = (info[2] & ((int)1 <<  0)) != 0;

    HW_SSSE3  = (info[2] & ((int)1 <<  9)) != 0;
    HW_SSE41  = (info[2] & ((int)1 << 19)) != 0;
    HW_SSE42  = (info[2] & ((int)1 << 20)) != 0;
    HW_AES    = (info[2] & ((int)1 << 25)) != 0;

    HW_AVX    = (info[2] & ((int)1 << 28)) != 0;
    HW_FMA3   = (info[2] & ((int)1 << 12)) != 0;

    HW_RDRAND = (info[2] & ((int)1 << 30)) != 0;
}
if (nIds >= 0x00000007){
    cpuid(info,0x00000007);
    HW_AVX2   = (info[1] & ((int)1 <<  5)) != 0;

    HW_BMI1        = (info[1] & ((int)1 <<  3)) != 0;
    HW_BMI2        = (info[1] & ((int)1 <<  8)) != 0;
    HW_ADX         = (info[1] & ((int)1 << 19)) != 0;
    HW_SHA         = (info[1] & ((int)1 << 29)) != 0;
    HW_PREFETCHWT1 = (info[2] & ((int)1 <<  0)) != 0;

    HW_AVX512F     = (info[1] & ((int)1 << 16)) != 0;
    HW_AVX512CD    = (info[1] & ((int)1 << 28)) != 0;
    HW_AVX512PF    = (info[1] & ((int)1 << 26)) != 0;
    HW_AVX512ER    = (info[1] & ((int)1 << 27)) != 0;
    HW_AVX512VL    = (info[1] & ((int)1 << 31)) != 0;
    HW_AVX512BW    = (info[1] & ((int)1 << 30)) != 0;
    HW_AVX512DQ    = (info[1] & ((int)1 << 17)) != 0;
    HW_AVX512IFMA  = (info[1] & ((int)1 << 21)) != 0;
    HW_AVX512VBMI  = (info[2] & ((int)1 <<  1)) != 0;
}
if (nExIds >= 0x80000001){
    cpuid(info,0x80000001);
    HW_x64   = (info[3] & ((int)1 << 29)) != 0;
    HW_ABM   = (info[2] & ((int)1 <<  5)) != 0;
    HW_SSE4a = (info[2] & ((int)1 <<  6)) != 0;
    HW_FMA4  = (info[2] & ((int)1 << 16)) != 0;
    HW_XOP   = (info[2] & ((int)1 << 11)) != 0;
}

注意,这仅检测CPU是否支持该指令。要实际运行它们,您还需要操作系统支持。

具体来说,需要操作系统支持:

  • x64 指令。 (您需要 64 位操作系统。)
  • 使用 (AVX) 256 位 ymm 寄存器的指令。有关如何检测此问题的信息,请参阅 Andy Lutomirski 的回答
  • 使用 (AVX512) 512 位 zmm 和掩码寄存器的指令。检测操作系统对 AVX512 的支持与 AVX 相同,但使用标志 0xe6 而不是 0x6

I've created a GitHub repro that will detect CPU and OS support for all the major x86 ISA extensions: https://github.com/Mysticial/FeatureDetector

Here's a shorter version:


First you need to access the CPUID instruction:

#ifdef _WIN32

//  Windows
#define cpuid(info, x)    __cpuidex(info, x, 0)

#else

//  GCC Intrinsics
#include <cpuid.h>
void cpuid(int info[4], int InfoType){
    __cpuid_count(InfoType, 0, info[0], info[1], info[2], info[3]);
}

#endif

Then you can run the following code:

//  Misc.
bool HW_MMX;
bool HW_x64;
bool HW_ABM;      // Advanced Bit Manipulation
bool HW_RDRAND;
bool HW_BMI1;
bool HW_BMI2;
bool HW_ADX;
bool HW_PREFETCHWT1;

//  SIMD: 128-bit
bool HW_SSE;
bool HW_SSE2;
bool HW_SSE3;
bool HW_SSSE3;
bool HW_SSE41;
bool HW_SSE42;
bool HW_SSE4a;
bool HW_AES;
bool HW_SHA;

//  SIMD: 256-bit
bool HW_AVX;
bool HW_XOP;
bool HW_FMA3;
bool HW_FMA4;
bool HW_AVX2;

//  SIMD: 512-bit
bool HW_AVX512F;    //  AVX512 Foundation
bool HW_AVX512CD;   //  AVX512 Conflict Detection
bool HW_AVX512PF;   //  AVX512 Prefetch
bool HW_AVX512ER;   //  AVX512 Exponential + Reciprocal
bool HW_AVX512VL;   //  AVX512 Vector Length Extensions
bool HW_AVX512BW;   //  AVX512 Byte + Word
bool HW_AVX512DQ;   //  AVX512 Doubleword + Quadword
bool HW_AVX512IFMA; //  AVX512 Integer 52-bit Fused Multiply-Add
bool HW_AVX512VBMI; //  AVX512 Vector Byte Manipulation Instructions

int info[4];
cpuid(info, 0);
int nIds = info[0];

cpuid(info, 0x80000000);
unsigned nExIds = info[0];

//  Detect Features
if (nIds >= 0x00000001){
    cpuid(info,0x00000001);
    HW_MMX    = (info[3] & ((int)1 << 23)) != 0;
    HW_SSE    = (info[3] & ((int)1 << 25)) != 0;
    HW_SSE2   = (info[3] & ((int)1 << 26)) != 0;
    HW_SSE3   = (info[2] & ((int)1 <<  0)) != 0;

    HW_SSSE3  = (info[2] & ((int)1 <<  9)) != 0;
    HW_SSE41  = (info[2] & ((int)1 << 19)) != 0;
    HW_SSE42  = (info[2] & ((int)1 << 20)) != 0;
    HW_AES    = (info[2] & ((int)1 << 25)) != 0;

    HW_AVX    = (info[2] & ((int)1 << 28)) != 0;
    HW_FMA3   = (info[2] & ((int)1 << 12)) != 0;

    HW_RDRAND = (info[2] & ((int)1 << 30)) != 0;
}
if (nIds >= 0x00000007){
    cpuid(info,0x00000007);
    HW_AVX2   = (info[1] & ((int)1 <<  5)) != 0;

    HW_BMI1        = (info[1] & ((int)1 <<  3)) != 0;
    HW_BMI2        = (info[1] & ((int)1 <<  8)) != 0;
    HW_ADX         = (info[1] & ((int)1 << 19)) != 0;
    HW_SHA         = (info[1] & ((int)1 << 29)) != 0;
    HW_PREFETCHWT1 = (info[2] & ((int)1 <<  0)) != 0;

    HW_AVX512F     = (info[1] & ((int)1 << 16)) != 0;
    HW_AVX512CD    = (info[1] & ((int)1 << 28)) != 0;
    HW_AVX512PF    = (info[1] & ((int)1 << 26)) != 0;
    HW_AVX512ER    = (info[1] & ((int)1 << 27)) != 0;
    HW_AVX512VL    = (info[1] & ((int)1 << 31)) != 0;
    HW_AVX512BW    = (info[1] & ((int)1 << 30)) != 0;
    HW_AVX512DQ    = (info[1] & ((int)1 << 17)) != 0;
    HW_AVX512IFMA  = (info[1] & ((int)1 << 21)) != 0;
    HW_AVX512VBMI  = (info[2] & ((int)1 <<  1)) != 0;
}
if (nExIds >= 0x80000001){
    cpuid(info,0x80000001);
    HW_x64   = (info[3] & ((int)1 << 29)) != 0;
    HW_ABM   = (info[2] & ((int)1 <<  5)) != 0;
    HW_SSE4a = (info[2] & ((int)1 <<  6)) != 0;
    HW_FMA4  = (info[2] & ((int)1 << 16)) != 0;
    HW_XOP   = (info[2] & ((int)1 << 11)) != 0;
}

Note that this only detects whether the CPU supports the instructions. To actually run them, you also need to have operating system support.

Specifically, operating system support is required for:

  • x64 instructions. (You need a 64-bit OS.)
  • Instructions that use the (AVX) 256-bit ymm registers. See Andy Lutomirski's answer for how to detect this.
  • Instructions that use the (AVX512) 512-bit zmm and mask registers. Detecting OS support for AVX512 is the same as with AVX, but using the flag 0xe6 instead of 0x6.
旧夏天 2024-11-16 11:12:48

Mysticial 的答案有点危险——它解释了如何检测 CPU 支持而不是操作系统支持。您需要使用_xgetbv来检查操作系统是否启用了所需的CPU扩展状态。请参阅此处了解其他来源。 甚至 gcc 也犯了同样的错误。 代码的主要内容是:

bool avxSupported = false;

int cpuInfo[4];
__cpuid(cpuInfo, 1);

bool osUsesXSAVE_XRSTORE = cpuInfo[2] & (1 << 27) || false;
bool cpuAVXSuport = cpuInfo[2] & (1 << 28) || false;

if (osUsesXSAVE_XRSTORE && cpuAVXSuport)
{
    unsigned long long xcrFeatureMask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
    avxSupported = (xcrFeatureMask & 0x6) == 0x6;
}

Mysticial's answer is a bit dangerous -- it explains how to detect CPU support but not OS support. You need to use _xgetbv to check whether the OS has enabled the required CPU extended state. See here for another source. Even gcc has made the same mistake. The meat of the code is:

bool avxSupported = false;

int cpuInfo[4];
__cpuid(cpuInfo, 1);

bool osUsesXSAVE_XRSTORE = cpuInfo[2] & (1 << 27) || false;
bool cpuAVXSuport = cpuInfo[2] & (1 << 28) || false;

if (osUsesXSAVE_XRSTORE && cpuAVXSuport)
{
    unsigned long long xcrFeatureMask = _xgetbv(_XCR_XFEATURE_ENABLED_MASK);
    avxSupported = (xcrFeatureMask & 0x6) == 0x6;
}
一枫情书 2024-11-16 11:12:48

经过一番谷歌搜索后,我还找到了英特尔的解决方案:

链接:https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th- Generation-intel-core-processor-family< /a>

    void cpuid(uint32_t eax, uint32_t ecx, uint32_t* abcd) {
#if defined(_MSC_VER)
            __cpuidex((int*)abcd, eax, ecx);
#else
            uint32_t ebx, edx;
# if defined( __i386__ ) && defined ( __PIC__ )
            /* in case of PIC under 32-bit EBX cannot be clobbered */
            __asm__("movl %%ebx, %%edi \n\t cpuid \n\t xchgl %%ebx, %%edi" : "=D" (ebx),
# else
            __asm__("cpuid" : "+b" (ebx),
# endif
            "+a" (eax), "+c" (ecx), "=d" (edx));
            abcd[0] = eax; abcd[1] = ebx; abcd[2] = ecx; abcd[3] = edx;
#endif
    }

    int check_xcr0_ymm()
    {
        uint32_t xcr0;
#if defined(_MSC_VER)
        xcr0 = (uint32_t)_xgetbv(0);  /* min VS2010 SP1 compiler is required */
#else
        __asm__("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx");
#endif
        return ((xcr0 & 6) == 6); /* checking if xmm and ymm state are enabled in XCR0 */
    }

另请注意,GCC 有一些您可以使用的特殊内在函数(请参阅:https://gcc.gnu.org/onlinedocs /gcc-4.9.2/gcc/X86-Built-in-Functions.html ):

    if (__builtin_cpu_supports("avx2"))
    // ...

如果将其与上面的信息放在一起,一切都会解决 美好的。

After quite a bit of googling, I also found the solutions from Intel:

Link: https://software.intel.com/en-us/articles/how-to-detect-new-instruction-support-in-the-4th-generation-intel-core-processor-family

    void cpuid(uint32_t eax, uint32_t ecx, uint32_t* abcd) {
#if defined(_MSC_VER)
            __cpuidex((int*)abcd, eax, ecx);
#else
            uint32_t ebx, edx;
# if defined( __i386__ ) && defined ( __PIC__ )
            /* in case of PIC under 32-bit EBX cannot be clobbered */
            __asm__("movl %%ebx, %%edi \n\t cpuid \n\t xchgl %%ebx, %%edi" : "=D" (ebx),
# else
            __asm__("cpuid" : "+b" (ebx),
# endif
            "+a" (eax), "+c" (ecx), "=d" (edx));
            abcd[0] = eax; abcd[1] = ebx; abcd[2] = ecx; abcd[3] = edx;
#endif
    }

    int check_xcr0_ymm()
    {
        uint32_t xcr0;
#if defined(_MSC_VER)
        xcr0 = (uint32_t)_xgetbv(0);  /* min VS2010 SP1 compiler is required */
#else
        __asm__("xgetbv" : "=a" (xcr0) : "c" (0) : "%edx");
#endif
        return ((xcr0 & 6) == 6); /* checking if xmm and ymm state are enabled in XCR0 */
    }

Also note that GCC has some special intrinsics that you can use (see: https://gcc.gnu.org/onlinedocs/gcc-4.9.2/gcc/X86-Built-in-Functions.html ):

    if (__builtin_cpu_supports("avx2"))
    // ...

If you put this together with the information above, it'll all work out fine.

若沐 2024-11-16 11:12:48

添加到 Abhiroop 的答案:
在 Linux 上,您可以运行此 shell 命令来查找您的 CPU 支持的功能

cat /proc/cpuinfo | grep 标志 | uniq

在我的机器上打印

标志:fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx pdpe1gb rdtscp lm Constant_tsc rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand 管理程序 lahf_lm abm 3dnowprefetch invpcid_single retpoline kaiser fsgsbase bmi1 hle avx2 smep bmi2 erms invpcid rtm rdseed adx xsaveopt

To add to Abhiroop's answer:
On linux, you can run this shell command to find out the features supported by your CPU

cat /proc/cpuinfo | grep flags | uniq

On my machine this prints

flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx pdpe1gb rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc aperfmperf eagerfpu pni pclmulqdq ssse3 fma cx16 pcid sse4_1 sse4_2 x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand hypervisor lahf_lm abm 3dnowprefetch invpcid_single retpoline kaiser fsgsbase bmi1 hle avx2 smep bmi2 erms invpcid rtm rdseed adx xsaveopt

对你的占有欲 2024-11-16 11:12:48

在 Mac OS 上,这是有效的:

sysctl -a | grep machdep.cpu.features

在我的机器上,它输出以下内容:

machdep.cpu.features:FPU VME DE PSE TSC MSR PAE MCE CX8 APIC SEP MTRR PGE MCA CMOV PAT PSE36 CLFSH DS ACPI MMX FXSR SSE SSE2 SS HTT TM PBE SSE3SSE4.1 SSE4.2 x2APIC MOVBE POPCNT AES PCID XSAVE OSXSAVE SEGLIM64 TSCTMR AVX1.0< /strong> RDRAND F16C


正如您所看到的以粗体书写的指令,支持 SSE3 和许多其他 SIMD 指令。

On a Mac OS this works:

sysctl -a | grep machdep.cpu.features

In my machine it outputs this:

machdep.cpu.features: FPU VME DE PSE TSC MSR PAE MCE CX8 APIC SEP MTRR PGE MCA CMOV PAT PSE36 CLFSH DS ACPI MMX FXSR SSE SSE2 SS HTT TM PBE SSE3 PCLMULQDQ DTES64 MON DSCPL VMX EST TM2 SSSE3 FMA CX16 TPR PDCM SSE4.1 SSE4.2 x2APIC MOVBE POPCNT AES PCID XSAVE OSXSAVE SEGLIM64 TSCTMR AVX1.0 RDRAND F16C

As you can see with the instructions written in bold, SSE3 and bunch of other SIMD instructions are supported.

东风软 2024-11-16 11:12:48

在 linux 或 wsl2 上,来自 util-linux 存储库的 lscpu 命令也可以完成这项工作。

例如:

lscpu | grep sse3

Alternativley on linux or wsl2 the lscpucommand from the util-linux repository will do the job.

E.g:

lscpu | grep sse3
第七度阳光i 2024-11-16 11:12:48

或“POSIX 方式”:

cat /proc/cpuinfo |grep -i sse3 >/dev/null 2>&1 && echo "ESS3=TRUE" || echo "ESS3=FALSE"

结果:

ESS3=TRUE

Or the "POSIX way":

cat /proc/cpuinfo |grep -i sse3 >/dev/null 2>&1 && echo "ESS3=TRUE" || echo "ESS3=FALSE"

Results:

ESS3=TRUE
~没有更多了~
我们使用 Cookies 和其他技术来定制您的体验包括您的登录状态等。通过阅读我们的 隐私政策 了解更多相关信息。 单击 接受 或继续使用网站,即表示您同意使用 Cookies 和您的相关数据。
原文