Line data Source code
1 : /* +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
2 : Copyright (c) 2008-2017, Petr Kobalicek
3 :
4 : This software is provided 'as-is', without any express or implied
5 : warranty. In no event will the authors be held liable for any damages
6 : arising from the use of this software.
7 :
8 : Permission is granted to anyone to use this software for any purpose,
9 : including commercial applications, and to alter it and redistribute it
10 : freely, subject to the following restrictions:
11 :
12 : 1. The origin of this software must not be misrepresented; you must not
13 : claim that you wrote the original software. If you use this software
14 : in a product, an acknowledgment in the product documentation would be
15 : appreciated but is not required.
16 : 2. Altered source versions must be plainly marked as such, and must not be
17 : misrepresented as being the original software.
18 : 3. This notice may not be removed or altered from any source distribution.
19 : +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ */
20 : #ifdef __PLUMED_HAS_ASMJIT
21 : #pragma GCC diagnostic push
22 : #pragma GCC diagnostic ignored "-Wpedantic"
23 : // [AsmJit]
24 : // Complete x86/x64 JIT and Remote Assembler for C++.
25 : //
26 : // [License]
27 : // Zlib - See LICENSE.md file in the package.
28 :
29 : // [Export]
30 : #define ASMJIT_EXPORTS
31 :
32 : // [Dependencies]
33 : #include "./cpuinfo.h"
34 : #include "./utils.h"
35 :
36 : #if ASMJIT_OS_POSIX
37 : # include <errno.h>
38 : # include <sys/utsname.h>
39 : # include <unistd.h>
40 : #endif // ASMJIT_OS_POSIX
41 :
42 : #if ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64
43 : # if ASMJIT_CC_MSC_GE(14, 0, 0)
44 : # include <intrin.h> // Required by `__cpuid()` and `_xgetbv()`.
45 : # endif // _MSC_VER >= 1400
46 : #endif
47 :
48 : #if ASMJIT_ARCH_ARM32 || ASMJIT_ARCH_ARM64
49 : # if ASMJIT_OS_LINUX
50 : # include <sys/auxv.h> // Required by `getauxval()`.
51 : # endif
52 : #endif
53 :
54 : // [Api-Begin]
55 : #include "./asmjit_apibegin.h"
56 :
57 : namespace PLMD {
58 : namespace asmjit {
59 :
60 : // ============================================================================
61 : // [asmjit::CpuInfo - Detect ARM]
62 : // ============================================================================
63 :
64 : // ARM information has to be retrieved by the OS (this is how ARM was designed).
65 : #if ASMJIT_ARCH_ARM32 || ASMJIT_ARCH_ARM64
66 :
67 : #if ASMJIT_ARCH_ARM32
68 : static ASMJIT_INLINE void armPopulateBaselineA32Features(CpuInfo* cpuInfo) noexcept {
69 : cpuInfo->_archInfo.init(ArchInfo::kTypeA32);
70 : }
71 : #endif // ASMJIT_ARCH_ARM32
72 :
73 : #if ASMJIT_ARCH_ARM64
74 : static ASMJIT_INLINE void armPopulateBaselineA64Features(CpuInfo* cpuInfo) noexcept {
75 : cpuInfo->_archInfo.init(ArchInfo::kTypeA64);
76 :
77 : // Thumb (including all variations) is supported on A64 (but not accessible from A64).
78 : cpuInfo->addFeature(CpuInfo::kArmFeatureTHUMB);
79 : cpuInfo->addFeature(CpuInfo::kArmFeatureTHUMB2);
80 :
81 : // A64 is based on ARMv8 and newer.
82 : cpuInfo->addFeature(CpuInfo::kArmFeatureV6);
83 : cpuInfo->addFeature(CpuInfo::kArmFeatureV7);
84 : cpuInfo->addFeature(CpuInfo::kArmFeatureV8);
85 :
86 : // A64 comes with these features by default.
87 : cpuInfo->addFeature(CpuInfo::kArmFeatureVFPv2);
88 : cpuInfo->addFeature(CpuInfo::kArmFeatureVFPv3);
89 : cpuInfo->addFeature(CpuInfo::kArmFeatureVFPv4);
90 : cpuInfo->addFeature(CpuInfo::kArmFeatureEDSP);
91 : cpuInfo->addFeature(CpuInfo::kArmFeatureASIMD);
92 : cpuInfo->addFeature(CpuInfo::kArmFeatureIDIVA);
93 : cpuInfo->addFeature(CpuInfo::kArmFeatureIDIVT);
94 : }
95 : #endif // ASMJIT_ARCH_ARM64
96 :
97 : #if ASMJIT_OS_WINDOWS
98 : //! \internal
99 : //!
100 : //! Detect ARM CPU features on Windows.
101 : //!
102 : //! The detection is based on `IsProcessorFeaturePresent()` API call.
103 : static ASMJIT_INLINE void armDetectCpuInfoOnWindows(CpuInfo* cpuInfo) noexcept {
104 : #if ASMJIT_ARCH_ARM32
105 : armPopulateBaselineA32Features(cpuInfo);
106 :
107 : // Windows for ARM requires at least ARMv7 with DSP extensions.
108 : cpuInfo->addFeature(CpuInfo::kArmFeatureV6);
109 : cpuInfo->addFeature(CpuInfo::kArmFeatureV7);
110 : cpuInfo->addFeature(CpuInfo::kArmFeatureEDSP);
111 :
112 : // Windows for ARM requires VFPv3.
113 : cpuInfo->addFeature(CpuInfo::kArmFeatureVFPv2);
114 : cpuInfo->addFeature(CpuInfo::kArmFeatureVFPv3);
115 :
116 : // Windows for ARM requires and uses THUMB2.
117 : cpuInfo->addFeature(CpuInfo::kArmFeatureTHUMB);
118 : cpuInfo->addFeature(CpuInfo::kArmFeatureTHUMB2);
119 : #else
120 : armPopulateBaselineA64Features(cpuInfo);
121 : #endif
122 :
123 : // Windows for ARM requires ASIMD.
124 : cpuInfo->addFeature(CpuInfo::kArmFeatureASIMD);
125 :
126 : // Detect additional CPU features by calling `IsProcessorFeaturePresent()`.
127 : struct WinPFPMapping {
128 : uint32_t pfpId;
129 : uint32_t featureId;
130 : };
131 :
132 : static const WinPFPMapping mapping[] = {
133 : { PF_ARM_FMAC_INSTRUCTIONS_AVAILABLE , CpuInfo::kArmFeatureVFPv4 },
134 : { PF_ARM_VFP_32_REGISTERS_AVAILABLE , CpuInfo::kArmFeatureVFP_D32 },
135 : { PF_ARM_DIVIDE_INSTRUCTION_AVAILABLE, CpuInfo::kArmFeatureIDIVT },
136 : { PF_ARM_64BIT_LOADSTORE_ATOMIC , CpuInfo::kArmFeatureAtomics64 }
137 : };
138 :
139 : for (uint32_t i = 0; i < ASMJIT_ARRAY_SIZE(mapping); i++)
140 : if (::IsProcessorFeaturePresent(mapping[i].pfpId))
141 : cpuInfo->addFeature(mapping[i].featureId);
142 : }
143 : #endif // ASMJIT_OS_WINDOWS
144 :
145 : #if ASMJIT_OS_LINUX
146 : struct LinuxHWCapMapping {
147 : uint32_t hwcapMask;
148 : uint32_t featureId;
149 : };
150 :
151 : static void armDetectHWCaps(CpuInfo* cpuInfo, unsigned long type, const LinuxHWCapMapping* mapping, size_t length) noexcept {
152 : unsigned long mask = getauxval(type);
153 :
154 : for (size_t i = 0; i < length; i++)
155 : if ((mask & mapping[i].hwcapMask) == mapping[i].hwcapMask)
156 : cpuInfo->addFeature(mapping[i].featureId);
157 : }
158 :
159 : //! \internal
160 : //!
161 : //! Detect ARM CPU features on Linux.
162 : //!
163 : //! The detection is based on `getauxval()`.
164 : ASMJIT_FAVOR_SIZE static void armDetectCpuInfoOnLinux(CpuInfo* cpuInfo) noexcept {
165 : #if ASMJIT_ARCH_ARM32
166 : armPopulateBaselineA32Features(cpuInfo);
167 :
168 : // `AT_HWCAP` provides ARMv7 (and less) related flags.
169 : static const LinuxHWCapMapping hwCapMapping[] = {
170 : { /* HWCAP_VFP */ (1 << 6), CpuInfo::kArmFeatureVFPv2 },
171 : { /* HWCAP_EDSP */ (1 << 7), CpuInfo::kArmFeatureEDSP },
172 : { /* HWCAP_NEON */ (1 << 12), CpuInfo::kArmFeatureASIMD },
173 : { /* HWCAP_VFPv3 */ (1 << 13), CpuInfo::kArmFeatureVFPv3 },
174 : { /* HWCAP_VFPv4 */ (1 << 16), CpuInfo::kArmFeatureVFPv4 },
175 : { /* HWCAP_IDIVA */ (1 << 17), CpuInfo::kArmFeatureIDIVA },
176 : { /* HWCAP_IDIVT */ (1 << 18), CpuInfo::kArmFeatureIDIVT },
177 : { /* HWCAP_VFPD32 */ (1 << 19), CpuInfo::kArmFeatureVFP_D32 }
178 : };
179 : armDetectHWCaps(cpuInfo, AT_HWCAP, hwCapMapping, ASMJIT_ARRAY_SIZE(hwCapMapping));
180 :
181 : // VFPv3 implies VFPv2.
182 : if (cpuInfo->hasFeature(CpuInfo::kArmFeatureVFPv3)) {
183 : cpuInfo->addFeature(CpuInfo::kArmFeatureVFPv2);
184 : }
185 :
186 : // VFPv2 implies ARMv6.
187 : if (cpuInfo->hasFeature(CpuInfo::kArmFeatureVFPv2)) {
188 : cpuInfo->addFeature(CpuInfo::kArmFeatureV6);
189 : }
190 :
191 : // VFPv3 or ASIMD implies ARMv7.
192 : if (cpuInfo->hasFeature(CpuInfo::kArmFeatureVFPv3) ||
193 : cpuInfo->hasFeature(CpuInfo::kArmFeatureASIMD)) {
194 : cpuInfo->addFeature(CpuInfo::kArmFeatureV7);
195 : }
196 :
197 : // `AT_HWCAP2` provides ARMv8+ related flags.
198 : static const LinuxHWCapMapping hwCap2Mapping[] = {
199 : { /* HWCAP2_AES */ (1 << 0), CpuInfo::kArmFeatureAES },
200 : { /* HWCAP2_PMULL */ (1 << 1), CpuInfo::kArmFeaturePMULL },
201 : { /* HWCAP2_SHA1 */ (1 << 2), CpuInfo::kArmFeatureSHA1 },
202 : { /* HWCAP2_SHA2 */ (1 << 3), CpuInfo::kArmFeatureSHA256 },
203 : { /* HWCAP2_CRC32 */ (1 << 4), CpuInfo::kArmFeatureCRC32 }
204 : };
205 : armDetectHWCaps(cpuInfo, AT_HWCAP2, hwCap2Mapping, ASMJIT_ARRAY_SIZE(hwCap2Mapping));
206 :
207 : if (cpuInfo->hasFeature(CpuInfo::kArmFeatureAES ) ||
208 : cpuInfo->hasFeature(CpuInfo::kArmFeatureCRC32 ) ||
209 : cpuInfo->hasFeature(CpuInfo::kArmFeaturePMULL ) ||
210 : cpuInfo->hasFeature(CpuInfo::kArmFeatureSHA1 ) ||
211 : cpuInfo->hasFeature(CpuInfo::kArmFeatureSHA256)) {
212 : cpuInfo->addFeature(CpuInfo::kArmFeatureV8);
213 : }
214 : #else
215 : armPopulateBaselineA64Features(cpuInfo);
216 :
217 : // `AT_HWCAP` provides ARMv8+ related flags.
218 : static const LinuxHWCapMapping hwCapMapping[] = {
219 : { /* HWCAP_ASIMD */ (1 << 1), CpuInfo::kArmFeatureASIMD },
220 : { /* HWCAP_AES */ (1 << 3), CpuInfo::kArmFeatureAES },
221 : { /* HWCAP_CRC32 */ (1 << 7), CpuInfo::kArmFeatureCRC32 },
222 : { /* HWCAP_PMULL */ (1 << 4), CpuInfo::kArmFeaturePMULL },
223 : { /* HWCAP_SHA1 */ (1 << 5), CpuInfo::kArmFeatureSHA1 },
224 : { /* HWCAP_SHA2 */ (1 << 6), CpuInfo::kArmFeatureSHA256 },
225 : { /* HWCAP_ATOMICS */ (1 << 8), CpuInfo::kArmFeatureAtomics64 }
226 : };
227 : armDetectHWCaps(cpuInfo, AT_HWCAP, hwCapMapping, ASMJIT_ARRAY_SIZE(hwCapMapping));
228 :
229 : // `AT_HWCAP2` is not used at the moment.
230 : #endif
231 : }
232 : #endif // ASMJIT_OS_LINUX
233 :
234 : ASMJIT_FAVOR_SIZE static void armDetectCpuInfo(CpuInfo* cpuInfo) noexcept {
235 : #if ASMJIT_OS_WINDOWS
236 : armDetectCpuInfoOnWindows(cpuInfo);
237 : #elif ASMJIT_OS_LINUX
238 : armDetectCpuInfoOnLinux(cpuInfo);
239 : #else
240 : # error "[asmjit] armDetectCpuInfo() - Unsupported OS."
241 : #endif
242 : }
243 : #endif // ASMJIT_ARCH_ARM32 || ASMJIT_ARCH_ARM64
244 :
245 : // ============================================================================
246 : // [asmjit::CpuInfo - Detect X86]
247 : // ============================================================================
248 :
249 : #if ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64
250 :
251 : //! \internal
252 : //!
253 : //! X86 CPUID result.
254 : struct CpuIdResult {
255 : uint32_t eax, ebx, ecx, edx;
256 : };
257 :
258 : //! \internal
259 : //!
260 : //! Content of XCR register, result of XGETBV instruction.
261 : struct XGetBVResult {
262 : uint32_t eax, edx;
263 : };
264 :
265 : #if ASMJIT_CC_MSC && !ASMJIT_CC_MSC_GE(15, 0, 30729) && ASMJIT_ARCH_X64
266 : //! \internal
267 : //!
268 : //! HACK: VS2008 or less, 64-bit mode - `__cpuidex` doesn't exist! However,
269 : //! 64-bit calling convention specifies the first parameter to be passed by
270 : //! ECX, so we may be lucky if compiler doesn't move the register, otherwise
271 : //! the result would be wrong.
272 : static void ASMJIT_NOINLINE void x86CallCpuIdWorkaround(uint32_t inEcx, uint32_t inEax, CpuIdResult* result) noexcept {
273 : __cpuid(reinterpret_cast<int*>(result), inEax);
274 : }
275 : #endif
276 :
277 : //! \internal
278 : //!
279 : //! Wrapper to call `cpuid` instruction.
280 : static void ASMJIT_INLINE x86CallCpuId(CpuIdResult* result, uint32_t inEax, uint32_t inEcx = 0) noexcept {
281 : #if ASMJIT_CC_MSC && ASMJIT_CC_MSC_GE(15, 0, 30729)
282 : __cpuidex(reinterpret_cast<int*>(result), inEax, inEcx);
283 : #elif ASMJIT_CC_MSC && ASMJIT_ARCH_X64
284 : x86CallCpuIdWorkaround(inEcx, inEax, result);
285 : #elif ASMJIT_CC_MSC && ASMJIT_ARCH_X86
286 : uint32_t paramEax = inEax;
287 : uint32_t paramEcx = inEcx;
288 : uint32_t* out = reinterpret_cast<uint32_t*>(result);
289 :
290 : __asm {
291 : mov eax, paramEax
292 : mov ecx, paramEcx
293 : mov edi, out
294 : cpuid
295 : mov dword ptr[edi + 0], eax
296 : mov dword ptr[edi + 4], ebx
297 : mov dword ptr[edi + 8], ecx
298 : mov dword ptr[edi + 12], edx
299 : }
300 : #elif (ASMJIT_CC_GCC || ASMJIT_CC_CLANG) && ASMJIT_ARCH_X86
301 : __asm__ __volatile__(
302 : "mov %%ebx, %%edi\n"
303 : "cpuid\n"
304 : "xchg %%edi, %%ebx\n"
305 : : "=a"(result->eax),
306 : "=D"(result->ebx),
307 : "=c"(result->ecx),
308 : "=d"(result->edx)
309 : : "a"(inEax),
310 : "c"(inEcx));
311 : #elif (ASMJIT_CC_GCC || ASMJIT_CC_CLANG || ASMJIT_CC_INTEL) && ASMJIT_ARCH_X64
312 2860 : __asm__ __volatile__(
313 : "mov %%rbx, %%rdi\n"
314 : "cpuid\n"
315 : "xchg %%rdi, %%rbx\n"
316 : : "=a"(result->eax),
317 : "=D"(result->ebx),
318 : "=c"(result->ecx),
319 : "=d"(result->edx)
320 : : "a"(inEax),
321 : "c"(inEcx));
322 : #else
323 : # error "[asmjit] x86CallCpuid() - Unsupported compiler."
324 : #endif
325 : }
326 :
327 : //! \internal
328 : //!
329 : //! Wrapper to call `xgetbv` instruction.
330 : static ASMJIT_INLINE void x86CallXGetBV(XGetBVResult* result, uint32_t inEcx) noexcept {
331 : #if ASMJIT_CC_MSC_GE(16, 0, 40219) // 2010SP1+
332 : uint64_t value = _xgetbv(inEcx);
333 : result->eax = static_cast<uint32_t>(value & 0xFFFFFFFFU);
334 : result->edx = static_cast<uint32_t>(value >> 32);
335 : #elif ASMJIT_CC_GCC || ASMJIT_CC_CLANG
336 : uint32_t outEax;
337 : uint32_t outEdx;
338 :
339 : // Replaced, because the world is not perfect:
340 : // __asm__ __volatile__("xgetbv" : "=a"(outEax), "=d"(outEdx) : "c"(inEcx));
341 260 : __asm__ __volatile__(".byte 0x0F, 0x01, 0xd0" : "=a"(outEax), "=d"(outEdx) : "c"(inEcx));
342 :
343 : result->eax = outEax;
344 : result->edx = outEdx;
345 : #else
346 : result->eax = 0;
347 : result->edx = 0;
348 : #endif
349 260 : }
350 :
351 : //! \internal
352 : //!
353 : //! Map a 12-byte vendor string returned by `cpuid` into a `CpuInfo::Vendor` ID.
354 : static ASMJIT_INLINE uint32_t x86GetCpuVendorID(const char* vendorString) noexcept {
355 : struct VendorData {
356 : uint32_t id;
357 : char text[12];
358 : };
359 :
360 : static const VendorData vendorList[] = {
361 : { CpuInfo::kVendorIntel , { 'G', 'e', 'n', 'u', 'i', 'n', 'e', 'I', 'n', 't', 'e', 'l' } },
362 : { CpuInfo::kVendorAMD , { 'A', 'u', 't', 'h', 'e', 'n', 't', 'i', 'c', 'A', 'M', 'D' } },
363 : { CpuInfo::kVendorVIA , { 'V', 'I', 'A', 0 , 'V', 'I', 'A', 0 , 'V', 'I', 'A', 0 } },
364 : { CpuInfo::kVendorVIA , { 'C', 'e', 'n', 't', 'a', 'u', 'r', 'H', 'a', 'u', 'l', 's' } }
365 : };
366 :
367 260 : uint32_t dw0 = reinterpret_cast<const uint32_t*>(vendorString)[0];
368 260 : uint32_t dw1 = reinterpret_cast<const uint32_t*>(vendorString)[1];
369 260 : uint32_t dw2 = reinterpret_cast<const uint32_t*>(vendorString)[2];
370 :
371 520 : for (uint32_t i = 0; i < ASMJIT_ARRAY_SIZE(vendorList); i++) {
372 520 : if (dw0 == reinterpret_cast<const uint32_t*>(vendorList[i].text)[0] &&
373 260 : dw1 == reinterpret_cast<const uint32_t*>(vendorList[i].text)[1] &&
374 260 : dw2 == reinterpret_cast<const uint32_t*>(vendorList[i].text)[2])
375 260 : return vendorList[i].id;
376 : }
377 :
378 : return CpuInfo::kVendorNone;
379 : }
380 :
381 : static ASMJIT_INLINE void x86SimplifyBrandString(char* s) noexcept {
382 : // Used to always clear the current character to ensure that the result
383 : // doesn't contain garbage after the new zero terminator.
384 : char* d = s;
385 :
386 : char prev = 0;
387 260 : char curr = s[0];
388 260 : s[0] = '\0';
389 :
390 : for (;;) {
391 12480 : if (curr == 0)
392 : break;
393 :
394 12220 : if (curr == ' ') {
395 5200 : if (prev == '@' || s[1] == ' ' || s[1] == '@')
396 3900 : goto L_Skip;
397 : }
398 :
399 8320 : d[0] = curr;
400 8320 : d++;
401 : prev = curr;
402 :
403 12220 : L_Skip:
404 12220 : curr = *++s;
405 12220 : s[0] = '\0';
406 : }
407 :
408 260 : d[0] = '\0';
409 : }
410 :
411 260 : ASMJIT_FAVOR_SIZE static void x86DetectCpuInfo(CpuInfo* cpuInfo) noexcept {
412 : uint32_t i, maxId;
413 :
414 : CpuIdResult regs;
415 : XGetBVResult xcr0 = { 0, 0 };
416 :
417 260 : cpuInfo->_archInfo.init(ArchInfo::kTypeHost);
418 : cpuInfo->addFeature(CpuInfo::kX86FeatureI486);
419 :
420 : // --------------------------------------------------------------------------
421 : // [CPUID EAX=0x0]
422 : // --------------------------------------------------------------------------
423 :
424 : // Get vendor string/id.
425 : x86CallCpuId(®s, 0x0);
426 :
427 260 : maxId = regs.eax;
428 260 : ::memcpy(cpuInfo->_vendorString + 0, ®s.ebx, 4);
429 260 : ::memcpy(cpuInfo->_vendorString + 4, ®s.edx, 4);
430 260 : ::memcpy(cpuInfo->_vendorString + 8, ®s.ecx, 4);
431 260 : cpuInfo->_vendorId = x86GetCpuVendorID(cpuInfo->_vendorString);
432 :
433 : // --------------------------------------------------------------------------
434 : // [CPUID EAX=0x1]
435 : // --------------------------------------------------------------------------
436 :
437 260 : if (maxId >= 0x1) {
438 : // Get feature flags in ECX/EDX and family/model in EAX.
439 : x86CallCpuId(®s, 0x1);
440 :
441 : // Fill family and model fields.
442 260 : cpuInfo->_family = (regs.eax >> 8) & 0x0F;
443 260 : cpuInfo->_model = (regs.eax >> 4) & 0x0F;
444 260 : cpuInfo->_stepping = (regs.eax ) & 0x0F;
445 :
446 : // Use extended family and model fields.
447 260 : if (cpuInfo->_family == 0x0F) {
448 260 : cpuInfo->_family += ((regs.eax >> 20) & 0xFF);
449 260 : cpuInfo->_model += ((regs.eax >> 16) & 0x0F) << 4;
450 : }
451 :
452 260 : cpuInfo->_x86Data._processorType = ((regs.eax >> 12) & 0x03);
453 260 : cpuInfo->_x86Data._brandIndex = ((regs.ebx ) & 0xFF);
454 260 : cpuInfo->_x86Data._flushCacheLineSize = ((regs.ebx >> 8) & 0xFF) * 8;
455 260 : cpuInfo->_x86Data._maxLogicalProcessors = ((regs.ebx >> 16) & 0xFF);
456 :
457 260 : if (regs.ecx & 0x00000001U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSE3);
458 260 : if (regs.ecx & 0x00000002U) cpuInfo->addFeature(CpuInfo::kX86FeaturePCLMULQDQ);
459 260 : if (regs.ecx & 0x00000008U) cpuInfo->addFeature(CpuInfo::kX86FeatureMONITOR);
460 260 : if (regs.ecx & 0x00000200U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSSE3);
461 260 : if (regs.ecx & 0x00002000U) cpuInfo->addFeature(CpuInfo::kX86FeatureCMPXCHG16B);
462 260 : if (regs.ecx & 0x00080000U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSE4_1);
463 260 : if (regs.ecx & 0x00100000U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSE4_2);
464 260 : if (regs.ecx & 0x00400000U) cpuInfo->addFeature(CpuInfo::kX86FeatureMOVBE);
465 260 : if (regs.ecx & 0x00800000U) cpuInfo->addFeature(CpuInfo::kX86FeaturePOPCNT);
466 260 : if (regs.ecx & 0x02000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAESNI);
467 260 : if (regs.ecx & 0x04000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureXSAVE);
468 260 : if (regs.ecx & 0x08000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureOSXSAVE);
469 260 : if (regs.ecx & 0x40000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureRDRAND);
470 260 : if (regs.edx & 0x00000010U) cpuInfo->addFeature(CpuInfo::kX86FeatureRDTSC);
471 260 : if (regs.edx & 0x00000020U) cpuInfo->addFeature(CpuInfo::kX86FeatureMSR);
472 260 : if (regs.edx & 0x00000100U) cpuInfo->addFeature(CpuInfo::kX86FeatureCMPXCHG8B);
473 260 : if (regs.edx & 0x00008000U) cpuInfo->addFeature(CpuInfo::kX86FeatureCMOV);
474 260 : if (regs.edx & 0x00080000U) cpuInfo->addFeature(CpuInfo::kX86FeatureCLFLUSH);
475 260 : if (regs.edx & 0x00800000U) cpuInfo->addFeature(CpuInfo::kX86FeatureMMX);
476 260 : if (regs.edx & 0x01000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureFXSR);
477 260 : if (regs.edx & 0x02000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSE)
478 : .addFeature(CpuInfo::kX86FeatureMMX2);
479 260 : if (regs.edx & 0x04000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSE)
480 : .addFeature(CpuInfo::kX86FeatureSSE2);
481 260 : if (regs.edx & 0x10000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureMT);
482 :
483 : // Get the content of XCR0 if supported by CPU and enabled by OS.
484 260 : if ((regs.ecx & 0x0C000000U) == 0x0C000000U) {
485 : x86CallXGetBV(&xcr0, 0);
486 : }
487 :
488 : // Detect AVX+.
489 260 : if (regs.ecx & 0x10000000U) {
490 : // - XCR0[2:1] == 11b
491 : // XMM & YMM states need to be enabled by OS.
492 260 : if ((xcr0.eax & 0x00000006U) == 0x00000006U) {
493 : cpuInfo->addFeature(CpuInfo::kX86FeatureAVX);
494 :
495 260 : if (regs.ecx & 0x00001000U) cpuInfo->addFeature(CpuInfo::kX86FeatureFMA);
496 260 : if (regs.ecx & 0x20000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureF16C);
497 : }
498 : }
499 : }
500 :
501 : // --------------------------------------------------------------------------
502 : // [CPUID EAX=0x7]
503 : // --------------------------------------------------------------------------
504 :
505 : // Detect new features if the processor supports CPUID-07.
506 : bool maybeMPX = false;
507 :
508 260 : if (maxId >= 0x7) {
509 : x86CallCpuId(®s, 0x7);
510 :
511 260 : if (regs.ebx & 0x00000001U) cpuInfo->addFeature(CpuInfo::kX86FeatureFSGSBASE);
512 260 : if (regs.ebx & 0x00000008U) cpuInfo->addFeature(CpuInfo::kX86FeatureBMI);
513 260 : if (regs.ebx & 0x00000010U) cpuInfo->addFeature(CpuInfo::kX86FeatureHLE);
514 260 : if (regs.ebx & 0x00000080U) cpuInfo->addFeature(CpuInfo::kX86FeatureSMEP);
515 260 : if (regs.ebx & 0x00000100U) cpuInfo->addFeature(CpuInfo::kX86FeatureBMI2);
516 260 : if (regs.ebx & 0x00000200U) cpuInfo->addFeature(CpuInfo::kX86FeatureERMS);
517 260 : if (regs.ebx & 0x00000800U) cpuInfo->addFeature(CpuInfo::kX86FeatureRTM);
518 260 : if (regs.ebx & 0x00004000U) maybeMPX = true;
519 260 : if (regs.ebx & 0x00040000U) cpuInfo->addFeature(CpuInfo::kX86FeatureRDSEED);
520 260 : if (regs.ebx & 0x00080000U) cpuInfo->addFeature(CpuInfo::kX86FeatureADX);
521 260 : if (regs.ebx & 0x00100000U) cpuInfo->addFeature(CpuInfo::kX86FeatureSMAP);
522 260 : if (regs.ebx & 0x00400000U) cpuInfo->addFeature(CpuInfo::kX86FeaturePCOMMIT);
523 260 : if (regs.ebx & 0x00800000U) cpuInfo->addFeature(CpuInfo::kX86FeatureCLFLUSHOPT);
524 260 : if (regs.ebx & 0x01000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureCLWB);
525 260 : if (regs.ebx & 0x20000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureSHA);
526 260 : if (regs.ecx & 0x00000001U) cpuInfo->addFeature(CpuInfo::kX86FeaturePREFETCHWT1);
527 :
528 : // TSX is supported if at least one of `HLE` and `RTM` is supported.
529 260 : if (regs.ebx & 0x00000810U) cpuInfo->addFeature(CpuInfo::kX86FeatureTSX);
530 :
531 : // Detect AVX2.
532 260 : if (cpuInfo->hasFeature(CpuInfo::kX86FeatureAVX)) {
533 260 : if (regs.ebx & 0x00000020U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX2);
534 : }
535 :
536 : // Detect AVX-512+.
537 260 : if (regs.ebx & 0x00010000U) {
538 : // - XCR0[2:1] == 11b
539 : // XMM/YMM states need to be enabled by OS.
540 : // - XCR0[7:5] == 111b
541 : // Upper 256-bit of ZMM0-XMM15 and ZMM16-ZMM31 need to be enabled by the OS.
542 0 : if ((xcr0.eax & 0x000000E6U) == 0x000000E6U) {
543 : cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512_F);
544 :
545 0 : if (regs.ebx & 0x00020000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512_DQ);
546 0 : if (regs.ebx & 0x00200000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512_IFMA);
547 0 : if (regs.ebx & 0x04000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512_PFI);
548 0 : if (regs.ebx & 0x08000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512_ERI);
549 0 : if (regs.ebx & 0x10000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512_CDI);
550 0 : if (regs.ebx & 0x40000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512_BW);
551 0 : if (regs.ebx & 0x80000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512_VL);
552 0 : if (regs.ecx & 0x00000002U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512_VBMI);
553 0 : if (regs.ecx & 0x00004000U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512_VPOPCNTDQ);
554 0 : if (regs.edx & 0x00000004U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512_4VNNIW);
555 0 : if (regs.edx & 0x00000008U) cpuInfo->addFeature(CpuInfo::kX86FeatureAVX512_4FMAPS);
556 : }
557 : }
558 : }
559 :
560 : // --------------------------------------------------------------------------
561 : // [CPUID EAX=0xD]
562 : // --------------------------------------------------------------------------
563 :
564 260 : if (maxId >= 0xD) {
565 : x86CallCpuId(®s, 0xD, 0);
566 :
567 : // Both CPUID result and XCR0 has to be enabled to have support for MPX.
568 260 : if (((regs.eax & xcr0.eax) & 0x00000018U) == 0x00000018U && maybeMPX)
569 : cpuInfo->addFeature(CpuInfo::kX86FeatureMPX);
570 :
571 : x86CallCpuId(®s, 0xD, 1);
572 260 : if (regs.eax & 0x00000001U) cpuInfo->addFeature(CpuInfo::kX86FeatureXSAVEOPT);
573 260 : if (regs.eax & 0x00000002U) cpuInfo->addFeature(CpuInfo::kX86FeatureXSAVEC);
574 260 : if (regs.eax & 0x00000008U) cpuInfo->addFeature(CpuInfo::kX86FeatureXSAVES);
575 : }
576 :
577 : // --------------------------------------------------------------------------
578 : // [CPUID EAX=0x80000000...maxId]
579 : // --------------------------------------------------------------------------
580 :
581 : // The highest EAX that we understand.
582 260 : uint32_t kHighestProcessedEAX = 0x80000008U;
583 :
584 : // Several CPUID calls are required to get the whole branc string. It's easy
585 : // to copy one DWORD at a time instead of performing a byte copy.
586 260 : uint32_t* brand = reinterpret_cast<uint32_t*>(cpuInfo->_brandString);
587 :
588 : i = maxId = 0x80000000U;
589 : do {
590 : x86CallCpuId(®s, i);
591 1560 : switch (i) {
592 : case 0x80000000U:
593 260 : maxId = std::min<uint32_t>(regs.eax, kHighestProcessedEAX);
594 260 : break;
595 :
596 260 : case 0x80000001U:
597 260 : if (regs.ecx & 0x00000001U) cpuInfo->addFeature(CpuInfo::kX86FeatureLAHFSAHF);
598 260 : if (regs.ecx & 0x00000020U) cpuInfo->addFeature(CpuInfo::kX86FeatureLZCNT);
599 260 : if (regs.ecx & 0x00000040U) cpuInfo->addFeature(CpuInfo::kX86FeatureSSE4A);
600 260 : if (regs.ecx & 0x00000080U) cpuInfo->addFeature(CpuInfo::kX86FeatureMSSE);
601 260 : if (regs.ecx & 0x00000100U) cpuInfo->addFeature(CpuInfo::kX86FeaturePREFETCHW);
602 260 : if (regs.ecx & 0x00200000U) cpuInfo->addFeature(CpuInfo::kX86FeatureTBM);
603 260 : if (regs.edx & 0x00100000U) cpuInfo->addFeature(CpuInfo::kX86FeatureNX);
604 260 : if (regs.edx & 0x00200000U) cpuInfo->addFeature(CpuInfo::kX86FeatureFXSROPT);
605 260 : if (regs.edx & 0x00400000U) cpuInfo->addFeature(CpuInfo::kX86FeatureMMX2);
606 260 : if (regs.edx & 0x08000000U) cpuInfo->addFeature(CpuInfo::kX86FeatureRDTSCP);
607 260 : if (regs.edx & 0x40000000U) cpuInfo->addFeature(CpuInfo::kX86Feature3DNOW2)
608 : .addFeature(CpuInfo::kX86FeatureMMX2);
609 260 : if (regs.edx & 0x80000000U) cpuInfo->addFeature(CpuInfo::kX86Feature3DNOW);
610 :
611 260 : if (cpuInfo->hasFeature(CpuInfo::kX86FeatureAVX)) {
612 260 : if (regs.ecx & 0x00000800U) cpuInfo->addFeature(CpuInfo::kX86FeatureXOP);
613 260 : if (regs.ecx & 0x00010000U) cpuInfo->addFeature(CpuInfo::kX86FeatureFMA4);
614 : }
615 :
616 : // These seem to be only supported by AMD.
617 260 : if (cpuInfo->getVendorId() == CpuInfo::kVendorAMD) {
618 260 : if (regs.ecx & 0x00000010U) cpuInfo->addFeature(CpuInfo::kX86FeatureALTMOVCR8);
619 : }
620 : break;
621 :
622 780 : case 0x80000002U:
623 : case 0x80000003U:
624 : case 0x80000004U:
625 780 : *brand++ = regs.eax;
626 780 : *brand++ = regs.ebx;
627 780 : *brand++ = regs.ecx;
628 780 : *brand++ = regs.edx;
629 :
630 : // Go directly to the last one.
631 780 : if (i == 0x80000004U) i = 0x80000008U - 1;
632 : break;
633 :
634 260 : case 0x80000008U:
635 260 : if (regs.ebx & 0x00000001U) cpuInfo->addFeature(CpuInfo::kX86FeatureCLZERO);
636 : break;
637 : }
638 1560 : } while (++i <= maxId);
639 :
640 : // Simplify CPU brand string by removing unnecessary spaces.
641 : x86SimplifyBrandString(cpuInfo->_brandString);
642 260 : }
643 : #endif // ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64
644 :
645 : // ============================================================================
646 : // [asmjit::CpuInfo - Detect - HWThreadsCount]
647 : // ============================================================================
648 :
649 : static ASMJIT_INLINE uint32_t cpuDetectHWThreadsCount() noexcept {
650 : #if ASMJIT_OS_WINDOWS
651 : SYSTEM_INFO info;
652 : ::GetSystemInfo(&info);
653 : return info.dwNumberOfProcessors;
654 : #elif ASMJIT_OS_POSIX && defined(_SC_NPROCESSORS_ONLN)
655 260 : long res = ::sysconf(_SC_NPROCESSORS_ONLN);
656 : if (res <= 0) return 1;
657 : return static_cast<uint32_t>(res);
658 : #else
659 : return 1;
660 : #endif
661 : }
662 :
663 : // ============================================================================
664 : // [asmjit::CpuInfo - Detect]
665 : // ============================================================================
666 :
667 260 : ASMJIT_FAVOR_SIZE void CpuInfo::detect() noexcept {
668 : reset();
669 :
670 : #if ASMJIT_ARCH_ARM32 || ASMJIT_ARCH_ARM64
671 : armDetectCpuInfo(this);
672 : #endif // ASMJIT_ARCH_ARM32 || ASMJIT_ARCH_ARM64
673 :
674 : #if ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64
675 260 : x86DetectCpuInfo(this);
676 : #endif // ASMJIT_ARCH_X86 || ASMJIT_ARCH_X64
677 :
678 260 : _hwThreadsCount = cpuDetectHWThreadsCount();
679 260 : }
680 :
681 : // ============================================================================
682 : // [asmjit::CpuInfo - GetHost]
683 : // ============================================================================
684 :
685 : struct HostCpuInfo : public CpuInfo {
686 260 : ASMJIT_INLINE HostCpuInfo() noexcept : CpuInfo() { detect(); }
687 : };
688 :
689 32141 : const CpuInfo& CpuInfo::getHost() noexcept {
690 32401 : static HostCpuInfo host;
691 32141 : return host;
692 : }
693 :
694 : } // asmjit namespace
695 : } // namespace PLMD
696 :
697 : // [Api-End]
698 : #include "./asmjit_apiend.h"
699 : #pragma GCC diagnostic pop
700 : #endif // __PLUMED_HAS_ASMJIT
|