LCOV - code coverage report
Current view: top level - asmjit - cpuinfo.cpp (source / functions) Hit Total Coverage
Test: plumed test coverage (other modules) Lines: 137 149 91.9 %
Date: 2024-10-11 08:09:49 Functions: 3 3 100.0 %

          Line data    Source code
       1             : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
       2             : Copyright (c) 2008-2017, Petr Kobalicek
       3             : 
       4             : This software is provided 'as-is', without any express or implied
       5             : warranty. In no event will the authors be held liable for any damages
       6             : arising from the use of this software.
       7             : 
       8             : Permission is granted to anyone to use this software for any purpose,
       9             : including commercial applications, and to alter it and redistribute it
      10             : freely, subject to the following restrictions:
      11             : 
      12             : 1. The origin of this software must not be misrepresented; you must not
      13             :    claim that you wrote the original software. If you use this software
      14             :    in a product, an acknowledgment in the product documentation would be
      15             :    appreciated but is not required.
      16             : 2. Altered source versions must be plainly marked as such, and must not be
      17             :    misrepresented as being the original software.
      18             : 3. This notice may not be removed or altered from any source distribution.
      19             : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
      20             : #ifdef __PLUMED_HAS_ASMJIT
      21             : #pragma GCC diagnostic push
      22             : #pragma GCC diagnostic ignored "-Wpedantic"
      23             : // [AsmJit]
      24             : // Complete x86/x64 JIT and Remote Assembler for C++.
      25             : //
      26             : // [License]
      27             : // Zlib - See LICENSE.md file in the package.
      28             : 
      29             : // [Export]
      30             : #define ASMJIT_EXPORTS
      31             : 
      32             : // [Dependencies]
      33             : #include "./cpuinfo.h"
      34             : #include "./utils.h"
      35             : 
      36             : #if ASMJIT_OS_POSIX
      37             : # include <errno.h>
      38             : # include <sys/utsname.h>
      39             : # include <unistd.h>
      40             : #endif // ASMJIT_OS_POSIX
      41             : 
      42             : #if ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64
      43             : # if ASMJIT_CC_MSC_GE(14, 0, 0)
      44             :  # include <intrin.h>         // Required by `__cpuid()` and `_xgetbv()`.
      45             : # endif // _MSC_VER >= 1400
      46             : #endif
      47             : 
      48             : #if ASMJIT_ARCH_ARM32 || ASMJIT_ARCH_ARM64
      49             : # if ASMJIT_OS_LINUX
      50             : #  include <sys/auxv.h>       // Required by `getauxval()`.
      51             : # endif
      52             : #endif
      53             : 
      54             : // [Api-Begin]
      55             : #include "./asmjit_apibegin.h"
      56             : 
      57             : namespace PLMD {
      58             : namespace asmjit {
      59             : 
      60             : // ============================================================================
      61             : // [asmjit::CpuInfo - Detect ARM]
      62             : // ============================================================================
      63             : 
      64             : // ARM information has to be retrieved by the OS (this is how ARM was designed).
      65             : #if ASMJIT_ARCH_ARM32 || ASMJIT_ARCH_ARM64
      66             : 
      67             : #if ASMJIT_ARCH_ARM32
      68             : static ASMJIT_INLINE void armPopulateBaselineA32Features(CpuInfo* cpuInfo) noexcept {
      69             :   cpuInfo->_archInfo.init(ArchInfo::kTypeA32);
      70             : }
      71             : #endif // ASMJIT_ARCH_ARM32
      72             : 
      73             : #if ASMJIT_ARCH_ARM64
      74             : static ASMJIT_INLINE void armPopulateBaselineA64Features(CpuInfo* cpuInfo) noexcept {
      75             :   cpuInfo->_archInfo.init(ArchInfo::kTypeA64);
      76             : 
      77             :   // Thumb (including all variations) is supported on A64 (but not accessible from A64).
      78             :   cpuInfo->addFeature(CpuInfo::kArmFeatureTHUMB);
      79             :   cpuInfo->addFeature(CpuInfo::kArmFeatureTHUMB2);
      80             : 
      81             :   // A64 is based on ARMv8 and newer.
      82             :   cpuInfo->addFeature(CpuInfo::kArmFeatureV6);
      83             :   cpuInfo->addFeature(CpuInfo::kArmFeatureV7);
      84             :   cpuInfo->addFeature(CpuInfo::kArmFeatureV8);
      85             : 
      86             :   // A64 comes with these features by default.
      87             :   cpuInfo->addFeature(CpuInfo::kArmFeatureVFPv2);
      88             :   cpuInfo->addFeature(CpuInfo::kArmFeatureVFPv3);
      89             :   cpuInfo->addFeature(CpuInfo::kArmFeatureVFPv4);
      90             :   cpuInfo->addFeature(CpuInfo::kArmFeatureEDSP);
      91             :   cpuInfo->addFeature(CpuInfo::kArmFeatureASIMD);
      92             :   cpuInfo->addFeature(CpuInfo::kArmFeatureIDIVA);
      93             :   cpuInfo->addFeature(CpuInfo::kArmFeatureIDIVT);
      94             : }
      95             : #endif // ASMJIT_ARCH_ARM64
      96             : 
      97             : #if ASMJIT_OS_WINDOWS
      98             : //! \internal
      99             : //!
     100             : //! Detect ARM CPU features on Windows.
     101             : //!
     102             : //! The detection is based on `IsProcessorFeaturePresent()` API call.
     103             : static ASMJIT_INLINE void armDetectCpuInfoOnWindows(CpuInfo* cpuInfo) noexcept {
     104             : #if ASMJIT_ARCH_ARM32
     105             :   armPopulateBaselineA32Features(cpuInfo);
     106             : 
     107             :   // Windows for ARM requires at least ARMv7 with DSP extensions.
     108             :   cpuInfo->addFeature(CpuInfo::kArmFeatureV6);
     109             :   cpuInfo->addFeature(CpuInfo::kArmFeatureV7);
     110             :   cpuInfo->addFeature(CpuInfo::kArmFeatureEDSP);
     111             : 
     112             :   // Windows for ARM requires VFPv3.
     113             :   cpuInfo->addFeature(CpuInfo::kArmFeatureVFPv2);
     114             :   cpuInfo->addFeature(CpuInfo::kArmFeatureVFPv3);
     115             : 
     116             :   // Windows for ARM requires and uses THUMB2.
     117             :   cpuInfo->addFeature(CpuInfo::kArmFeatureTHUMB);
     118             :   cpuInfo->addFeature(CpuInfo::kArmFeatureTHUMB2);
     119             : #else
     120             :   armPopulateBaselineA64Features(cpuInfo);
     121             : #endif
     122             : 
     123             :   // Windows for ARM requires ASIMD.
     124             :   cpuInfo->addFeature(CpuInfo::kArmFeatureASIMD);
     125             : 
     126             :   // Detect additional CPU features by calling `IsProcessorFeaturePresent()`.
     127             :   struct WinPFPMapping {
     128             :     uint32_t pfpId;
     129             :     uint32_t featureId;
     130             :   };
     131             : 
     132             :   static const WinPFPMapping mapping[] = {
     133             :     { PF_ARM_FMAC_INSTRUCTIONS_AVAILABLE , CpuInfo::kArmFeatureVFPv4     },
     134             :     { PF_ARM_VFP_32_REGISTERS_AVAILABLE  , CpuInfo::kArmFeatureVFP_D32   },
     135             :     { PF_ARM_DIVIDE_INSTRUCTION_AVAILABLE, CpuInfo::kArmFeatureIDIVT     },
     136             :     { PF_ARM_64BIT_LOADSTORE_ATOMIC      , CpuInfo::kArmFeatureAtomics64 }
     137             :   };
     138             : 
     139             :   for (uint32_t i = 0; i < ASMJIT_ARRAY_SIZE(mapping); i++)
     140             :     if (::IsProcessorFeaturePresent(mapping[i].pfpId))
     141             :       cpuInfo->addFeature(mapping[i].featureId);
     142             : }
     143             : #endif // ASMJIT_OS_WINDOWS
     144             : 
     145             : #if ASMJIT_OS_LINUX
     146             : struct LinuxHWCapMapping {
     147             :   uint32_t hwcapMask;
     148             :   uint32_t featureId;
     149             : };
     150             : 
     151             : static void armDetectHWCaps(CpuInfo* cpuInfo, unsigned long type, const LinuxHWCapMapping* mapping, size_t length) noexcept {
     152             :   unsigned long mask = getauxval(type);
     153             : 
     154             :   for (size_t i = 0; i < length; i++)
     155             :     if ((mask & mapping[i].hwcapMask) == mapping[i].hwcapMask)
     156             :       cpuInfo->addFeature(mapping[i].featureId);
     157             : }
     158             : 
     159             : //! \internal
     160             : //!
     161             : //! Detect ARM CPU features on Linux.
     162             : //!
     163             : //! The detection is based on `getauxval()`.
     164             : ASMJIT_FAVOR_SIZE static void armDetectCpuInfoOnLinux(CpuInfo* cpuInfo) noexcept {
     165             : #if ASMJIT_ARCH_ARM32
     166             :   armPopulateBaselineA32Features(cpuInfo);
     167             : 
     168             :   // `AT_HWCAP` provides ARMv7 (and less) related flags.
     169             :   static const LinuxHWCapMapping hwCapMapping[] = {
     170             :     { /* HWCAP_VFP     */ (1 <<  6), CpuInfo::kArmFeatureVFPv2     },
     171             :     { /* HWCAP_EDSP    */ (1 <<  7), CpuInfo::kArmFeatureEDSP      },
     172             :     { /* HWCAP_NEON    */ (1 << 12), CpuInfo::kArmFeatureASIMD     },
     173             :     { /* HWCAP_VFPv3   */ (1 << 13), CpuInfo::kArmFeatureVFPv3     },
     174             :     { /* HWCAP_VFPv4   */ (1 << 16), CpuInfo::kArmFeatureVFPv4     },
     175             :     { /* HWCAP_IDIVA   */ (1 << 17), CpuInfo::kArmFeatureIDIVA     },
     176             :     { /* HWCAP_IDIVT   */ (1 << 18), CpuInfo::kArmFeatureIDIVT     },
     177             :     { /* HWCAP_VFPD32  */ (1 << 19), CpuInfo::kArmFeatureVFP_D32   }
     178             :   };
     179             :   armDetectHWCaps(cpuInfo, AT_HWCAP, hwCapMapping, ASMJIT_ARRAY_SIZE(hwCapMapping));
     180             : 
     181             :   // VFPv3 implies VFPv2.
     182             :   if (cpuInfo->hasFeature(CpuInfo::kArmFeatureVFPv3)) {
     183             :     cpuInfo->addFeature(CpuInfo::kArmFeatureVFPv2);
     184             :   }
     185             : 
     186             :   // VFPv2 implies ARMv6.
     187             :   if (cpuInfo->hasFeature(CpuInfo::kArmFeatureVFPv2)) {
     188             :     cpuInfo->addFeature(CpuInfo::kArmFeatureV6);
     189             :   }
     190             : 
     191             :   // VFPv3 or ASIMD implies ARMv7.
     192             :   if (cpuInfo->hasFeature(CpuInfo::kArmFeatureVFPv3) ||
     193             :       cpuInfo->hasFeature(CpuInfo::kArmFeatureASIMD)) {
     194             :     cpuInfo->addFeature(CpuInfo::kArmFeatureV7);
     195             :   }
     196             : 
     197             :   // `AT_HWCAP2` provides ARMv8+ related flags.
     198             :   static const LinuxHWCapMapping hwCap2Mapping[] = {
     199             :     { /* HWCAP2_AES    */ (1 <<  0), CpuInfo::kArmFeatureAES       },
     200             :     { /* HWCAP2_PMULL  */ (1 <<  1), CpuInfo::kArmFeaturePMULL     },
     201             :     { /* HWCAP2_SHA1   */ (1 <<  2), CpuInfo::kArmFeatureSHA1      },
     202             :     { /* HWCAP2_SHA2   */ (1 <<  3), CpuInfo::kArmFeatureSHA256    },
     203             :     { /* HWCAP2_CRC32  */ (1 <<  4), CpuInfo::kArmFeatureCRC32     }
     204             :   };
     205             :   armDetectHWCaps(cpuInfo, AT_HWCAP2, hwCap2Mapping, ASMJIT_ARRAY_SIZE(hwCap2Mapping));
     206             : 
     207             :   if (cpuInfo->hasFeature(CpuInfo::kArmFeatureAES   ) ||
     208             :       cpuInfo->hasFeature(CpuInfo::kArmFeatureCRC32 ) ||
     209             :       cpuInfo->hasFeature(CpuInfo::kArmFeaturePMULL ) ||
     210             :       cpuInfo->hasFeature(CpuInfo::kArmFeatureSHA1  ) ||
     211             :       cpuInfo->hasFeature(CpuInfo::kArmFeatureSHA256)) {
     212             :     cpuInfo->addFeature(CpuInfo::kArmFeatureV8);
     213             :   }
     214             : #else
     215             :   armPopulateBaselineA64Features(cpuInfo);
     216             : 
     217             :   // `AT_HWCAP` provides ARMv8+ related flags.
     218             :   static const LinuxHWCapMapping hwCapMapping[] = {
     219             :     { /* HWCAP_ASIMD   */ (1 <<  1), CpuInfo::kArmFeatureASIMD     },
     220             :     { /* HWCAP_AES     */ (1 <<  3), CpuInfo::kArmFeatureAES       },
     221             :     { /* HWCAP_CRC32   */ (1 <<  7), CpuInfo::kArmFeatureCRC32     },
     222             :     { /* HWCAP_PMULL   */ (1 <<  4), CpuInfo::kArmFeaturePMULL     },
     223             :     { /* HWCAP_SHA1    */ (1 <<  5), CpuInfo::kArmFeatureSHA1      },
     224             :     { /* HWCAP_SHA2    */ (1 <<  6), CpuInfo::kArmFeatureSHA256    },
     225             :     { /* HWCAP_ATOMICS */ (1 <<  8), CpuInfo::kArmFeatureAtomics64 }
     226             :   };
     227             :   armDetectHWCaps(cpuInfo, AT_HWCAP, hwCapMapping, ASMJIT_ARRAY_SIZE(hwCapMapping));
     228             : 
     229             :   // `AT_HWCAP2` is not used at the moment.
     230             : #endif
     231             : }
     232             : #endif // ASMJIT_OS_LINUX
     233             : 
     234             : ASMJIT_FAVOR_SIZE static void armDetectCpuInfo(CpuInfo* cpuInfo) noexcept {
     235             : #if ASMJIT_OS_WINDOWS
     236             :   armDetectCpuInfoOnWindows(cpuInfo);
     237             : #elif ASMJIT_OS_LINUX
     238             :   armDetectCpuInfoOnLinux(cpuInfo);
     239             : #else
     240             : # error "[asmjit] armDetectCpuInfo() - Unsupported OS."
     241             : #endif
     242             : }
     243             : #endif // ASMJIT_ARCH_ARM32 || ASMJIT_ARCH_ARM64
     244             : 
     245             : // ============================================================================
     246             : // [asmjit::CpuInfo - Detect X86]
     247             : // ============================================================================
     248             : 
     249             : #if ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64
     250             : 
     251             : //! \internal
     252             : //!
     253             : //! X86 CPUID result.
     254             : struct CpuIdResult {
     255             :   uint32_t eax, ebx, ecx, edx;
     256             : };
     257             : 
     258             : //! \internal
     259             : //!
     260             : //! Content of XCR register, result of XGETBV instruction.
     261             : struct XGetBVResult {
     262             :   uint32_t eax, edx;
     263             : };
     264             : 
     265             : #if ASMJIT_CC_MSC && !ASMJIT_CC_MSC_GE(15, 0, 30729) && ASMJIT_ARCH_X64
     266             : //! \internal
     267             : //!
     268             : //! HACK: VS2008 or less, 64-bit mode - `__cpuidex` doesn't exist! However,
     269             : //! 64-bit calling convention specifies the first parameter to be passed by
     270             : //! ECX, so we may be lucky if compiler doesn't move the register, otherwise
     271             : //! the result would be wrong.
     272             : static void ASMJIT_NOINLINE void x86CallCpuIdWorkaround(uint32_t inEcx, uint32_t inEax, CpuIdResult* result) noexcept {
     273             :   __cpuid(reinterpret_cast<int*>(result), inEax);
     274             : }
     275             : #endif
     276             : 
     277             : //! \internal
     278             : //!
     279             : //! Wrapper to call `cpuid` instruction.
     280             : static void ASMJIT_INLINE x86CallCpuId(CpuIdResult* result, uint32_t inEax, uint32_t inEcx = 0) noexcept {
     281             : #if ASMJIT_CC_MSC && ASMJIT_CC_MSC_GE(15, 0, 30729)
     282             :   __cpuidex(reinterpret_cast<int*>(result), inEax, inEcx);
     283             : #elif ASMJIT_CC_MSC && ASMJIT_ARCH_X64
     284             :   x86CallCpuIdWorkaround(inEcx, inEax, result);
     285             : #elif ASMJIT_CC_MSC && ASMJIT_ARCH_X86
     286             :   uint32_t paramEax = inEax;
     287             :   uint32_t paramEcx = inEcx;
     288             :   uint32_t* out = reinterpret_cast<uint32_t*>(result);
     289             : 
     290             :   __asm {
     291             :     mov     eax, paramEax
     292             :     mov     ecx, paramEcx
     293             :     mov     edi, out
     294             :     cpuid
     295             :     mov     dword ptr[edi +  0], eax
     296             :     mov     dword ptr[edi +  4], ebx
     297             :     mov     dword ptr[edi +  8], ecx
     298             :     mov     dword ptr[edi + 12], edx
     299             :   }
     300             : #elif (ASMJIT_CC_GCC || ASMJIT_CC_CLANG) && ASMJIT_ARCH_X86
     301             :   __asm__ __volatile__(
     302             :     "mov %%ebx, %%edi\n"
     303             :     "cpuid\n"
     304             :     "xchg %%edi, %%ebx\n"
     305             :       : "=a"(result->eax),
     306             :         "=D"(result->ebx),
     307             :         "=c"(result->ecx),
     308             :         "=d"(result->edx)
     309             :       : "a"(inEax),
     310             :         "c"(inEcx));
     311             : #elif (ASMJIT_CC_GCC || ASMJIT_CC_CLANG || ASMJIT_CC_INTEL) && ASMJIT_ARCH_X64
     312         759 :   __asm__ __volatile__(
     313             :     "mov %%rbx, %%rdi\n"
     314             :     "cpuid\n"
     315             :     "xchg %%rdi, %%rbx\n"
     316             :       : "=a"(result->eax),
     317             :         "=D"(result->ebx),
     318             :         "=c"(result->ecx),
     319             :         "=d"(result->edx)
     320             :       : "a"(inEax),
     321             :         "c"(inEcx));
     322             : #else
     323             : # error "[asmjit] x86CallCpuid() - Unsupported compiler."
     324             : #endif
     325             : }
     326             : 
     327             : //! \internal
     328             : //!
     329             : //! Wrapper to call `xgetbv` instruction.
     330             : static ASMJIT_INLINE void x86CallXGetBV(XGetBVResult* result, uint32_t inEcx) noexcept {
     331             : #if ASMJIT_CC_MSC_GE(16, 0, 40219) // 2010SP1+
     332             :   uint64_t value = _xgetbv(inEcx);
     333             :   result->eax = static_cast<uint32_t>(value & 0xFFFFFFFFU);
     334             :   result->edx = static_cast<uint32_t>(value >> 32);
     335             : #elif ASMJIT_CC_GCC || ASMJIT_CC_CLANG
     336             :   uint32_t outEax;
     337             :   uint32_t outEdx;
     338             : 
     339             :   // Replaced, because the world is not perfect:
     340             :   //   __asm__ __volatile__("xgetbv" : "=a"(outEax), "=d"(outEdx) : "c"(inEcx));
     341          69 :   __asm__ __volatile__(".byte 0x0F, 0x01, 0xd0" : "=a"(outEax), "=d"(outEdx) : "c"(inEcx));
     342             : 
     343             :   result->eax = outEax;
     344             :   result->edx = outEdx;
     345             : #else
     346             :   result->eax = 0;
     347             :   result->edx = 0;
     348             : #endif
     349          69 : }
     350             : 
     351             : //! \internal
     352             : //!
     353             : //! Map a 12-byte vendor string returned by `cpuid` into a `CpuInfo::Vendor` ID.
     354             : static ASMJIT_INLINE uint32_t x86GetCpuVendorID(const char* vendorString) noexcept {
     355             :   struct VendorData {
     356             :     uint32_t id;
     357             :     char text[12];
     358             :   };
     359             : 
     360             :   static const VendorData vendorList[] = {
     361             :     { CpuInfo::kVendorIntel , { 'G', 'e', 'n', 'u', 'i', 'n', 'e', 'I', 'n', 't', 'e', 'l' } },
     362             :     { CpuInfo::kVendorAMD   , { 'A', 'u', 't', 'h', 'e', 'n', 't', 'i', 'c', 'A', 'M', 'D' } },
     363             :     { CpuInfo::kVendorVIA   , { 'V', 'I', 'A',  0 , 'V', 'I', 'A',  0 , 'V', 'I', 'A',  0  } },
     364             :     { CpuInfo::kVendorVIA   , { 'C', 'e', 'n', 't', 'a', 'u', 'r', 'H', 'a', 'u', 'l', 's' } }
     365             :   };
     366             : 
     367          69 :   uint32_t dw0 = reinterpret_cast<const uint32_t*>(vendorString)[0];
     368          69 :   uint32_t dw1 = reinterpret_cast<const uint32_t*>(vendorString)[1];
     369          69 :   uint32_t dw2 = reinterpret_cast<const uint32_t*>(vendorString)[2];
     370             : 
     371         138 :   for (uint32_t i = 0; i < ASMJIT_ARRAY_SIZE(vendorList); i++) {
     372         138 :     if (dw0 == reinterpret_cast<const uint32_t*>(vendorList[i].text)[0] &&
     373          69 :         dw1 == reinterpret_cast<const uint32_t*>(vendorList[i].text)[1] &&
     374          69 :         dw2 == reinterpret_cast<const uint32_t*>(vendorList[i].text)[2])
     375          69 :       return vendorList[i].id;
     376             :   }
     377             : 
     378             :   return CpuInfo::kVendorNone;
     379             : }
     380             : 
     381             : static ASMJIT_INLINE void x86SimplifyBrandString(char* s) noexcept {
     382             :   // Used to always clear the current character to ensure that the result
     383             :   // doesn't contain garbage after the new zero terminator.
     384             :   char* d = s;
     385             : 
     386             :   char prev = 0;
     387          69 :   char curr = s[0];
     388          69 :   s[0] = '\0';
     389             : 
     390             :   for (;;) {
     391        3312 :     if (curr == 0)
     392             :       break;
     393             : 
     394        3243 :     if (curr == ' ') {
     395        1380 :       if (prev == '@' || s[1] == ' ' || s[1] == '@')
     396        1035 :         goto L_Skip;
     397             :     }
     398             : 
     399        2208 :     d[0] = curr;
     400        2208 :     d++;
     401             :     prev = curr;
     402             : 
     403        3243 : L_Skip:
     404        3243 :     curr = *++s;
     405        3243 :     s[0] = '\0';
     406             :   }
     407             : 
     408          69 :   d[0] = '\0';
     409             : }
     410             : 
     411          69 : ASMJIT_FAVOR_SIZE static void x86DetectCpuInfo(CpuInfo* cpuInfo) noexcept {
     412             :   uint32_t i, maxId;
     413             : 
     414             :   CpuIdResult regs;
     415             :   XGetBVResult xcr0 = { 0, 0 };
     416             : 
     417          69 :   cpuInfo->_archInfo.init(ArchInfo::kTypeHost);
     418             :   cpuInfo->addFeature(CpuInfo::kX86FeatureI486);
     419             : 
     420             :   // --------------------------------------------------------------------------
     421             :   // [CPUID EAX=0x0]
     422             :   // --------------------------------------------------------------------------
     423             : 
     424             :   // Get vendor string/id.
     425             :   x86CallCpuId(&regs, 0x0);
     426             : 
     427          69 :   maxId = regs.eax;
     428          69 :   ::memcpy(cpuInfo->_vendorString + 0, &regs.ebx, 4);
     429          69 :   ::memcpy(cpuInfo->_vendorString + 4, &regs.edx, 4);
     430          69 :   ::memcpy(cpuInfo->_vendorString + 8, &regs.ecx, 4);
     431          69 :   cpuInfo->_vendorId = x86GetCpuVendorID(cpuInfo->_vendorString);
     432             : 
     433             :   // --------------------------------------------------------------------------
     434             :   // [CPUID EAX=0x1]
     435             :   // --------------------------------------------------------------------------
     436             : 
     437          69 :   if (maxId >= 0x1) {
     438             :     // Get feature flags in ECX/EDX and family/model in EAX.
     439             :     x86CallCpuId(&regs, 0x1);
     440             : 
     441             :     // Fill family and model fields.
     442          69 :     cpuInfo->_family   = (regs.eax >> 8) & 0x0F;
     443          69 :     cpuInfo->_model    = (regs.eax >> 4) & 0x0F;
     444          69 :     cpuInfo->_stepping = (regs.eax     ) & 0x0F;
     445             : 
     446             :     // Use extended family and model fields.
     447          69 :     if (cpuInfo->_family == 0x0F) {
     448          69 :       cpuInfo->_family += ((regs.eax >> 20) & 0xFF);
     449          69 :       cpuInfo->_model  += ((regs.eax >> 16) & 0x0F) << 4;
     450             :     }
     451             : 
     452          69 :     cpuInfo->_x86Data._processorType        = ((regs.eax >> 12) & 0x03);
     453          69 :     cpuInfo->_x86Data._brandIndex           = ((regs.ebx      ) & 0xFF);
     454          69 :     cpuInfo->_x86Data._flushCacheLineSize   = ((regs.ebx >>  8) & 0xFF) * 8;
     455          69 :     cpuInfo->_x86Data._maxLogicalProcessors = ((regs.ebx >> 16) & 0xFF);
     456             : 
     457          69 :     if (regs.ecx & 0x00000001U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSE3);
     458          69 :     if (regs.ecx & 0x00000002U) cpuInfo->addFeature(CpuInfo::kX86FeaturePCLMULQDQ);
     459          69 :     if (regs.ecx & 0x00000008U) cpuInfo->addFeature(CpuInfo::kX86FeatureMONITOR);
     460          69 :     if (regs.ecx & 0x00000200U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSSE3);
     461          69 :     if (regs.ecx & 0x00002000U) cpuInfo->addFeature(CpuInfo::kX86FeatureCMPXCHG16B);
     462          69 :     if (regs.ecx & 0x00080000U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSE4_1);
     463          69 :     if (regs.ecx & 0x00100000U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSE4_2);
     464          69 :     if (regs.ecx & 0x00400000U) cpuInfo->addFeature(CpuInfo::kX86FeatureMOVBE);
     465          69 :     if (regs.ecx & 0x00800000U) cpuInfo->addFeature(CpuInfo::kX86FeaturePOPCNT);
     466          69 :     if (regs.ecx & 0x02000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAESNI);
     467          69 :     if (regs.ecx & 0x04000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureXSAVE);
     468          69 :     if (regs.ecx & 0x08000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureOSXSAVE);
     469          69 :     if (regs.ecx & 0x40000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureRDRAND);
     470          69 :     if (regs.edx & 0x00000010U) cpuInfo->addFeature(CpuInfo::kX86FeatureRDTSC);
     471          69 :     if (regs.edx & 0x00000020U) cpuInfo->addFeature(CpuInfo::kX86FeatureMSR);
     472          69 :     if (regs.edx & 0x00000100U) cpuInfo->addFeature(CpuInfo::kX86FeatureCMPXCHG8B);
     473          69 :     if (regs.edx & 0x00008000U) cpuInfo->addFeature(CpuInfo::kX86FeatureCMOV);
     474          69 :     if (regs.edx & 0x00080000U) cpuInfo->addFeature(CpuInfo::kX86FeatureCLFLUSH);
     475          69 :     if (regs.edx & 0x00800000U) cpuInfo->addFeature(CpuInfo::kX86FeatureMMX);
     476          69 :     if (regs.edx & 0x01000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureFXSR);
     477          69 :     if (regs.edx & 0x02000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSE)
     478             :                                         .addFeature(CpuInfo::kX86FeatureMMX2);
     479          69 :     if (regs.edx & 0x04000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSE)
     480             :                                         .addFeature(CpuInfo::kX86FeatureSSE2);
     481          69 :     if (regs.edx & 0x10000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureMT);
     482             : 
     483             :     // Get the content of XCR0 if supported by CPU and enabled by OS.
     484          69 :     if ((regs.ecx & 0x0C000000U) == 0x0C000000U) {
     485             :       x86CallXGetBV(&xcr0, 0);
     486             :     }
     487             : 
     488             :     // Detect AVX+.
     489          69 :     if (regs.ecx & 0x10000000U) {
     490             :       // - XCR0[2:1] == 11b
     491             :       //   XMM & YMM states need to be enabled by OS.
     492          69 :       if ((xcr0.eax & 0x00000006U) == 0x00000006U) {
     493             :         cpuInfo->addFeature(CpuInfo::kX86FeatureAVX);
     494             : 
     495          69 :         if (regs.ecx & 0x00001000U) cpuInfo->addFeature(CpuInfo::kX86FeatureFMA);
     496          69 :         if (regs.ecx & 0x20000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureF16C);
     497             :       }
     498             :     }
     499             :   }
     500             : 
     501             :   // --------------------------------------------------------------------------
     502             :   // [CPUID EAX=0x7]
     503             :   // --------------------------------------------------------------------------
     504             : 
     505             :   // Detect new features if the processor supports CPUID-07.
     506             :   bool maybeMPX = false;
     507             : 
     508          69 :   if (maxId >= 0x7) {
     509             :     x86CallCpuId(&regs, 0x7);
     510             : 
     511          69 :     if (regs.ebx & 0x00000001U) cpuInfo->addFeature(CpuInfo::kX86FeatureFSGSBASE);
     512          69 :     if (regs.ebx & 0x00000008U) cpuInfo->addFeature(CpuInfo::kX86FeatureBMI);
     513          69 :     if (regs.ebx & 0x00000010U) cpuInfo->addFeature(CpuInfo::kX86FeatureHLE);
     514          69 :     if (regs.ebx & 0x00000080U) cpuInfo->addFeature(CpuInfo::kX86FeatureSMEP);
     515          69 :     if (regs.ebx & 0x00000100U) cpuInfo->addFeature(CpuInfo::kX86FeatureBMI2);
     516          69 :     if (regs.ebx & 0x00000200U) cpuInfo->addFeature(CpuInfo::kX86FeatureERMS);
     517          69 :     if (regs.ebx & 0x00000800U) cpuInfo->addFeature(CpuInfo::kX86FeatureRTM);
     518          69 :     if (regs.ebx & 0x00004000U) maybeMPX = true;
     519          69 :     if (regs.ebx & 0x00040000U) cpuInfo->addFeature(CpuInfo::kX86FeatureRDSEED);
     520          69 :     if (regs.ebx & 0x00080000U) cpuInfo->addFeature(CpuInfo::kX86FeatureADX);
     521          69 :     if (regs.ebx & 0x00100000U) cpuInfo->addFeature(CpuInfo::kX86FeatureSMAP);
     522          69 :     if (regs.ebx & 0x00400000U) cpuInfo->addFeature(CpuInfo::kX86FeaturePCOMMIT);
     523          69 :     if (regs.ebx & 0x00800000U) cpuInfo->addFeature(CpuInfo::kX86FeatureCLFLUSHOPT);
     524          69 :     if (regs.ebx & 0x01000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureCLWB);
     525          69 :     if (regs.ebx & 0x20000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureSHA);
     526          69 :     if (regs.ecx & 0x00000001U) cpuInfo->addFeature(CpuInfo::kX86FeaturePREFETCHWT1);
     527             : 
     528             :     // TSX is supported if at least one of `HLE` and `RTM` is supported.
     529          69 :     if (regs.ebx & 0x00000810U) cpuInfo->addFeature(CpuInfo::kX86FeatureTSX);
     530             : 
     531             :     // Detect AVX2.
     532          69 :     if (cpuInfo->hasFeature(CpuInfo::kX86FeatureAVX)) {
     533          69 :       if (regs.ebx & 0x00000020U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX2);
     534             :     }
     535             : 
     536             :     // Detect AVX-512+.
     537          69 :     if (regs.ebx & 0x00010000U) {
     538             :       // - XCR0[2:1] == 11b
     539             :       //   XMM/YMM states need to be enabled by OS.
     540             :       // - XCR0[7:5] == 111b
     541             :       //   Upper 256-bit of ZMM0-XMM15 and ZMM16-ZMM31 need to be enabled by the OS.
     542           0 :       if ((xcr0.eax & 0x000000E6U) == 0x000000E6U) {
     543             :         cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512_F);
     544             : 
     545           0 :         if (regs.ebx & 0x00020000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512_DQ);
     546           0 :         if (regs.ebx & 0x00200000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512_IFMA);
     547           0 :         if (regs.ebx & 0x04000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512_PFI);
     548           0 :         if (regs.ebx & 0x08000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512_ERI);
     549           0 :         if (regs.ebx & 0x10000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512_CDI);
     550           0 :         if (regs.ebx & 0x40000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512_BW);
     551           0 :         if (regs.ebx & 0x80000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512_VL);
     552           0 :         if (regs.ecx & 0x00000002U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512_VBMI);
     553           0 :         if (regs.ecx & 0x00004000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512_VPOPCNTDQ);
     554           0 :         if (regs.edx & 0x00000004U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512_4VNNIW);
     555           0 :         if (regs.edx & 0x00000008U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512_4FMAPS);
     556             :       }
     557             :     }
     558             :   }
     559             : 
     560             :   // --------------------------------------------------------------------------
     561             :   // [CPUID EAX=0xD]
     562             :   // --------------------------------------------------------------------------
     563             : 
     564          69 :   if (maxId >= 0xD) {
     565             :     x86CallCpuId(&regs, 0xD, 0);
     566             : 
     567             :     // Both CPUID result and XCR0 has to be enabled to have support for MPX.
     568          69 :     if (((regs.eax & xcr0.eax) & 0x00000018U) == 0x00000018U && maybeMPX)
     569             :       cpuInfo->addFeature(CpuInfo::kX86FeatureMPX);
     570             : 
     571             :     x86CallCpuId(&regs, 0xD, 1);
     572          69 :     if (regs.eax & 0x00000001U) cpuInfo->addFeature(CpuInfo::kX86FeatureXSAVEOPT);
     573          69 :     if (regs.eax & 0x00000002U) cpuInfo->addFeature(CpuInfo::kX86FeatureXSAVEC);
     574          69 :     if (regs.eax & 0x00000008U) cpuInfo->addFeature(CpuInfo::kX86FeatureXSAVES);
     575             :   }
     576             : 
     577             :   // --------------------------------------------------------------------------
     578             :   // [CPUID EAX=0x80000000...maxId]
     579             :   // --------------------------------------------------------------------------
     580             : 
     581             :   // The highest EAX that we understand.
     582          69 :   uint32_t kHighestProcessedEAX = 0x80000008U;
     583             : 
     584             :   // Several CPUID calls are required to get the whole branc string. It's easy
     585             :   // to copy one DWORD at a time instead of performing a byte copy.
     586          69 :   uint32_t* brand = reinterpret_cast<uint32_t*>(cpuInfo->_brandString);
     587             : 
     588             :   i = maxId = 0x80000000U;
     589             :   do {
     590             :     x86CallCpuId(&regs, i);
     591         414 :     switch (i) {
     592             :       case 0x80000000U:
     593          69 :         maxId = std::min<uint32_t>(regs.eax, kHighestProcessedEAX);
     594          69 :         break;
     595             : 
     596          69 :       case 0x80000001U:
     597          69 :         if (regs.ecx & 0x00000001U) cpuInfo->addFeature(CpuInfo::kX86FeatureLAHFSAHF);
     598          69 :         if (regs.ecx & 0x00000020U) cpuInfo->addFeature(CpuInfo::kX86FeatureLZCNT);
     599          69 :         if (regs.ecx & 0x00000040U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSE4A);
     600          69 :         if (regs.ecx & 0x00000080U) cpuInfo->addFeature(CpuInfo::kX86FeatureMSSE);
     601          69 :         if (regs.ecx & 0x00000100U) cpuInfo->addFeature(CpuInfo::kX86FeaturePREFETCHW);
     602          69 :         if (regs.ecx & 0x00200000U) cpuInfo->addFeature(CpuInfo::kX86FeatureTBM);
     603          69 :         if (regs.edx & 0x00100000U) cpuInfo->addFeature(CpuInfo::kX86FeatureNX);
     604          69 :         if (regs.edx & 0x00200000U) cpuInfo->addFeature(CpuInfo::kX86FeatureFXSROPT);
     605          69 :         if (regs.edx & 0x00400000U) cpuInfo->addFeature(CpuInfo::kX86FeatureMMX2);
     606          69 :         if (regs.edx & 0x08000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureRDTSCP);
     607          69 :         if (regs.edx & 0x40000000U) cpuInfo->addFeature(CpuInfo::kX86Feature3DNOW2)
     608             :                                             .addFeature(CpuInfo::kX86FeatureMMX2);
     609          69 :         if (regs.edx & 0x80000000U) cpuInfo->addFeature(CpuInfo::kX86Feature3DNOW);
     610             : 
     611          69 :         if (cpuInfo->hasFeature(CpuInfo::kX86FeatureAVX)) {
     612          69 :           if (regs.ecx & 0x00000800U) cpuInfo->addFeature(CpuInfo::kX86FeatureXOP);
     613          69 :           if (regs.ecx & 0x00010000U) cpuInfo->addFeature(CpuInfo::kX86FeatureFMA4);
     614             :         }
     615             : 
     616             :         // These seem to be only supported by AMD.
     617          69 :         if (cpuInfo->getVendorId() == CpuInfo::kVendorAMD) {
     618          69 :           if (regs.ecx & 0x00000010U) cpuInfo->addFeature(CpuInfo::kX86FeatureALTMOVCR8);
     619             :         }
     620             :         break;
     621             : 
     622         207 :       case 0x80000002U:
     623             :       case 0x80000003U:
     624             :       case 0x80000004U:
     625         207 :         *brand++ = regs.eax;
     626         207 :         *brand++ = regs.ebx;
     627         207 :         *brand++ = regs.ecx;
     628         207 :         *brand++ = regs.edx;
     629             : 
     630             :         // Go directly to the last one.
     631         207 :         if (i == 0x80000004U) i = 0x80000008U - 1;
     632             :         break;
     633             : 
     634          69 :       case 0x80000008U:
     635          69 :         if (regs.ebx & 0x00000001U) cpuInfo->addFeature(CpuInfo::kX86FeatureCLZERO);
     636             :         break;
     637             :     }
     638         414 :   } while (++i <= maxId);
     639             : 
     640             :   // Simplify CPU brand string by removing unnecessary spaces.
     641             :   x86SimplifyBrandString(cpuInfo->_brandString);
     642          69 : }
     643             : #endif // ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64
     644             : 
     645             : // ============================================================================
     646             : // [asmjit::CpuInfo - Detect - HWThreadsCount]
     647             : // ============================================================================
     648             : 
     649             : static ASMJIT_INLINE uint32_t cpuDetectHWThreadsCount() noexcept {
     650             : #if ASMJIT_OS_WINDOWS
     651             :   SYSTEM_INFO info;
     652             :   ::GetSystemInfo(&info);
     653             :   return info.dwNumberOfProcessors;
     654             : #elif ASMJIT_OS_POSIX && defined(_SC_NPROCESSORS_ONLN)
     655          69 :   long res = ::sysconf(_SC_NPROCESSORS_ONLN);
     656             :   if (res <= 0) return 1;
     657             :   return static_cast<uint32_t>(res);
     658             : #else
     659             :   return 1;
     660             : #endif
     661             : }
     662             : 
     663             : // ============================================================================
     664             : // [asmjit::CpuInfo - Detect]
     665             : // ============================================================================
     666             : 
     667          69 : ASMJIT_FAVOR_SIZE void CpuInfo::detect() noexcept {
     668             :   reset();
     669             : 
     670             : #if ASMJIT_ARCH_ARM32 || ASMJIT_ARCH_ARM64
     671             :   armDetectCpuInfo(this);
     672             : #endif // ASMJIT_ARCH_ARM32 || ASMJIT_ARCH_ARM64
     673             : 
     674             : #if ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64
     675          69 :   x86DetectCpuInfo(this);
     676             : #endif // ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64
     677             : 
     678          69 :   _hwThreadsCount = cpuDetectHWThreadsCount();
     679          69 : }
     680             : 
     681             : // ============================================================================
     682             : // [asmjit::CpuInfo - GetHost]
     683             : // ============================================================================
     684             : 
     685             : struct HostCpuInfo : public CpuInfo {
     686          69 :   ASMJIT_INLINE HostCpuInfo() noexcept : CpuInfo() { detect(); }
     687             : };
     688             : 
     689        1972 : const CpuInfo& CpuInfo::getHost() noexcept {
     690        2041 :   static HostCpuInfo host;
     691        1972 :   return host;
     692             : }
     693             : 
     694             : } // asmjit namespace
     695             : } // namespace PLMD
     696             : 
     697             : // [Api-End]
     698             : #include "./asmjit_apiend.h"
     699             : #pragma GCC diagnostic pop
     700             : #endif // __PLUMED_HAS_ASMJIT

Generated by: LCOV version 1.15