diff options
Diffstat (limited to 'platform/linux-generic/arch/x86')
-rw-r--r-- | platform/linux-generic/arch/x86/cpu_flags.c | 150 | ||||
-rw-r--r-- | platform/linux-generic/arch/x86/odp/api/abi/sync_inlines.h | 31 | ||||
-rw-r--r-- | platform/linux-generic/arch/x86/odp/api/abi/time_cpu.h (renamed from platform/linux-generic/arch/x86/odp/api/abi/cpu_time.h) | 12 | ||||
-rw-r--r-- | platform/linux-generic/arch/x86/odp/api/abi/time_inlines.h | 7 | ||||
-rw-r--r-- | platform/linux-generic/arch/x86/odp_sysinfo_parse.c | 6 | ||||
-rw-r--r-- | platform/linux-generic/arch/x86/odp_time_cpu.c (renamed from platform/linux-generic/arch/x86/odp_global_time.c) | 29 |
6 files changed, 141 insertions, 94 deletions
diff --git a/platform/linux-generic/arch/x86/cpu_flags.c b/platform/linux-generic/arch/x86/cpu_flags.c index 036645dbc..9211df002 100644 --- a/platform/linux-generic/arch/x86/cpu_flags.c +++ b/platform/linux-generic/arch/x86/cpu_flags.c @@ -1,45 +1,23 @@ /* Copyright (c) 2017-2018, Linaro Limited + * Copyright (c) 2023, Nokia * All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause */ -/*- - * BSD LICENSE - * - * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2010-2015 Intel Corporation */ #include "cpu_flags.h" + +#include <odp/api/abi/time_cpu.h> + #include <odp_debug_internal.h> -#include <odp/api/abi/cpu_time.h> +#include <odp_global_data.h> + +#include <cpuid.h> +#include <errno.h> #include <stdio.h> #include <stdint.h> @@ -74,6 +52,7 @@ enum rte_cpu_flag_t { RTE_CPUFLAG_AVX, /**< AVX */ RTE_CPUFLAG_F16C, /**< F16C */ RTE_CPUFLAG_RDRAND, /**< RDRAND */ + RTE_CPUFLAG_HYPERVISOR, /**< Running in a VM */ /* (EAX 01h) EDX features */ RTE_CPUFLAG_FPU, /**< FPU */ @@ -130,6 +109,7 @@ enum rte_cpu_flag_t { RTE_CPUFLAG_INVPCID, /**< INVPCID */ RTE_CPUFLAG_RTM, /**< Transactional memory */ RTE_CPUFLAG_AVX512F, /**< AVX512F */ + RTE_CPUFLAG_RDSEED, /**< RDSEED instruction */ /* (EAX 80000001h) ECX features */ RTE_CPUFLAG_LAHF_SAHF, /**< LAHF_SAHF */ @@ -145,8 +125,29 @@ enum rte_cpu_flag_t { /* (EAX 80000007h) EDX features */ RTE_CPUFLAG_INVTSC, /**< INVTSC */ + RTE_CPUFLAG_AVX512DQ, /**< AVX512 Doubleword and Quadword */ + RTE_CPUFLAG_AVX512IFMA, /**< AVX512 Integer Fused Multiply-Add */ + RTE_CPUFLAG_AVX512CD, /**< AVX512 Conflict Detection*/ + RTE_CPUFLAG_AVX512BW, /**< AVX512 Byte and Word */ + RTE_CPUFLAG_AVX512VL, /**< AVX512 Vector Length */ + RTE_CPUFLAG_AVX512VBMI, /**< AVX512 Vector Bit Manipulation */ + RTE_CPUFLAG_AVX512VBMI2, /**< AVX512 Vector Bit Manipulation 2 */ + RTE_CPUFLAG_GFNI, /**< Galois Field New Instructions */ + RTE_CPUFLAG_VAES, /**< Vector AES */ + RTE_CPUFLAG_VPCLMULQDQ, /**< Vector Carry-less Multiply */ + RTE_CPUFLAG_AVX512VNNI, + /**< AVX512 Vector Neural Network Instructions */ + RTE_CPUFLAG_AVX512BITALG, /**< AVX512 Bit Algorithms */ + RTE_CPUFLAG_AVX512VPOPCNTDQ, /**< AVX512 Vector Popcount */ + RTE_CPUFLAG_CLDEMOTE, /**< Cache Line Demote */ + RTE_CPUFLAG_MOVDIRI, /**< Direct Store Instructions */ + RTE_CPUFLAG_MOVDIR64B, /**< Direct Store Instructions 64B */ + RTE_CPUFLAG_AVX512VP2INTERSECT, /**< AVX512 Two Register Intersection */ + + RTE_CPUFLAG_WAITPKG, /**< UMONITOR/UMWAIT/TPAUSE */ + /* The last item */ - RTE_CPUFLAG_NUMFLAGS, /**< This should always be the last! */ + RTE_CPUFLAG_NUMFLAGS, /**< This should always be the last! */ }; enum cpu_register_t { @@ -203,6 +204,7 @@ static const struct feature_entry cpu_feature_table[] = { FEAT_DEF(AVX, 0x00000001, 0, RTE_REG_ECX, 28) FEAT_DEF(F16C, 0x00000001, 0, RTE_REG_ECX, 29) FEAT_DEF(RDRAND, 0x00000001, 0, RTE_REG_ECX, 30) + FEAT_DEF(HYPERVISOR, 0x00000001, 0, RTE_REG_ECX, 31) FEAT_DEF(FPU, 0x00000001, 0, RTE_REG_EDX, 0) FEAT_DEF(VME, 0x00000001, 0, RTE_REG_EDX, 1) @@ -246,15 +248,36 @@ static const struct feature_entry cpu_feature_table[] = { FEAT_DEF(ENERGY_EFF, 0x00000006, 0, RTE_REG_ECX, 3) FEAT_DEF(FSGSBASE, 0x00000007, 0, RTE_REG_EBX, 0) - FEAT_DEF(BMI1, 0x00000007, 0, RTE_REG_EBX, 2) + FEAT_DEF(BMI1, 0x00000007, 0, RTE_REG_EBX, 3) FEAT_DEF(HLE, 0x00000007, 0, RTE_REG_EBX, 4) FEAT_DEF(AVX2, 0x00000007, 0, RTE_REG_EBX, 5) - FEAT_DEF(SMEP, 0x00000007, 0, RTE_REG_EBX, 6) - FEAT_DEF(BMI2, 0x00000007, 0, RTE_REG_EBX, 7) - FEAT_DEF(ERMS, 0x00000007, 0, RTE_REG_EBX, 8) + FEAT_DEF(SMEP, 0x00000007, 0, RTE_REG_EBX, 7) + FEAT_DEF(BMI2, 0x00000007, 0, RTE_REG_EBX, 8) + FEAT_DEF(ERMS, 0x00000007, 0, RTE_REG_EBX, 9) FEAT_DEF(INVPCID, 0x00000007, 0, RTE_REG_EBX, 10) FEAT_DEF(RTM, 0x00000007, 0, RTE_REG_EBX, 11) FEAT_DEF(AVX512F, 0x00000007, 0, RTE_REG_EBX, 16) + FEAT_DEF(AVX512DQ, 0x00000007, 0, RTE_REG_EBX, 17) + FEAT_DEF(RDSEED, 0x00000007, 0, RTE_REG_EBX, 18) + FEAT_DEF(AVX512IFMA, 0x00000007, 0, RTE_REG_EBX, 21) + FEAT_DEF(AVX512CD, 0x00000007, 0, RTE_REG_EBX, 28) + FEAT_DEF(AVX512BW, 0x00000007, 0, RTE_REG_EBX, 30) + FEAT_DEF(AVX512VL, 0x00000007, 0, RTE_REG_EBX, 31) + + FEAT_DEF(AVX512VBMI, 0x00000007, 0, RTE_REG_ECX, 1) + FEAT_DEF(WAITPKG, 0x00000007, 0, RTE_REG_ECX, 5) + FEAT_DEF(AVX512VBMI2, 0x00000007, 0, RTE_REG_ECX, 6) + FEAT_DEF(GFNI, 0x00000007, 0, RTE_REG_ECX, 8) + FEAT_DEF(VAES, 0x00000007, 0, RTE_REG_ECX, 9) + FEAT_DEF(VPCLMULQDQ, 0x00000007, 0, RTE_REG_ECX, 10) + FEAT_DEF(AVX512VNNI, 0x00000007, 0, RTE_REG_ECX, 11) + FEAT_DEF(AVX512BITALG, 0x00000007, 0, RTE_REG_ECX, 12) + FEAT_DEF(AVX512VPOPCNTDQ, 0x00000007, 0, RTE_REG_ECX, 14) + FEAT_DEF(CLDEMOTE, 0x00000007, 0, RTE_REG_ECX, 25) + FEAT_DEF(MOVDIRI, 0x00000007, 0, RTE_REG_ECX, 27) + FEAT_DEF(MOVDIR64B, 0x00000007, 0, RTE_REG_ECX, 28) + + FEAT_DEF(AVX512VP2INTERSECT, 0x00000007, 0, RTE_REG_EDX, 8) FEAT_DEF(LAHF_SAHF, 0x80000001, 0, RTE_REG_ECX, 0) FEAT_DEF(LZCNT, 0x80000001, 0, RTE_REG_ECX, 4) @@ -268,55 +291,30 @@ static const struct feature_entry cpu_feature_table[] = { FEAT_DEF(INVTSC, 0x80000007, 0, RTE_REG_EDX, 8) }; -/* - * Execute CPUID instruction and get contents of a specific register - * - * This function, when compiled with GCC, will generate architecture-neutral - * code, as per GCC manual. - */ -static void cpu_get_features(uint32_t leaf, uint32_t subleaf, - cpuid_registers_t out) -{ -#if defined(__i386__) && defined(__PIC__) - /* %ebx is a forbidden register if we compile with -fPIC or -fPIE */ - __asm__ __volatile__("movl %%ebx,%0 ; cpuid ; xchgl %%ebx,%0" - : "=r" (out[RTE_REG_EBX]), - "=a" (out[RTE_REG_EAX]), - "=c" (out[RTE_REG_ECX]), - "=d" (out[RTE_REG_EDX]) - : "a" (leaf), "c" (subleaf)); -#else - __asm__ __volatile__("cpuid" - : "=a" (out[RTE_REG_EAX]), - "=b" (out[RTE_REG_EBX]), - "=c" (out[RTE_REG_ECX]), - "=d" (out[RTE_REG_EDX]) - : "a" (leaf), "c" (subleaf)); -#endif -} - static int cpu_get_flag_enabled(enum rte_cpu_flag_t feature) { const struct feature_entry *feat; cpuid_registers_t regs; + unsigned int maxleaf; if (feature >= RTE_CPUFLAG_NUMFLAGS) /* Flag does not match anything in the feature tables */ - return -1; + return -ENOENT; feat = &cpu_feature_table[feature]; if (!feat->leaf) /* This entry in the table wasn't filled out! */ - return -1; + return -EFAULT; + + maxleaf = __get_cpuid_max(feat->leaf & 0x80000000, NULL); - cpu_get_features(feat->leaf & 0xffff0000, 0, regs); - if (((regs[RTE_REG_EAX] ^ feat->leaf) & 0xffff0000) || - regs[RTE_REG_EAX] < feat->leaf) + if (maxleaf < feat->leaf) return 0; - /* get the cpuid leaf containing the desired feature */ - cpu_get_features(feat->leaf, feat->subleaf, regs); + __cpuid_count(feat->leaf, feat->subleaf, + regs[RTE_REG_EAX], regs[RTE_REG_EBX], + regs[RTE_REG_ECX], regs[RTE_REG_EDX]); /* check if the feature is enabled */ return (regs[feat->reg] >> feat->bit) & 1; @@ -359,12 +357,16 @@ void _odp_cpu_flags_print_all(void) _ODP_PRINT("%s", str); } -int _odp_cpu_has_global_time(void) +int _odp_time_cpu_global_freq_is_const(void) { - if (cpu_get_flag_enabled(RTE_CPUFLAG_INVTSC) > 0) + if (odp_global_ro.system_info.cpu_constant_tsc || + cpu_get_flag_enabled(RTE_CPUFLAG_INVTSC) > 0) return 1; - return 0; + _ODP_ERR("WARN: assuming constant TSC based on CPU arch, but could not confirm from CPU " + "flags\n"); + + return 1; } int _odp_cpu_flags_has_rdtsc(void) diff --git a/platform/linux-generic/arch/x86/odp/api/abi/sync_inlines.h b/platform/linux-generic/arch/x86/odp/api/abi/sync_inlines.h new file mode 100644 index 000000000..bebe6b571 --- /dev/null +++ b/platform/linux-generic/arch/x86/odp/api/abi/sync_inlines.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2023 Nokia + */ + +#ifndef ODP_ARCH_SYNC_INLINES_H_ +#define ODP_ARCH_SYNC_INLINES_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +static inline void _odp_mb_sync(void) +{ + __asm__ volatile("mfence" ::: "memory"); +} + +static inline void _odp_mb_sync_load(void) +{ + __asm__ volatile("lfence" ::: "memory"); +} + +static inline void _odp_mb_sync_store(void) +{ + __asm__ volatile("sfence" ::: "memory"); +} + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/platform/linux-generic/arch/x86/odp/api/abi/cpu_time.h b/platform/linux-generic/arch/x86/odp/api/abi/time_cpu.h index c74c4d606..baf79ad3f 100644 --- a/platform/linux-generic/arch/x86/odp/api/abi/cpu_time.h +++ b/platform/linux-generic/arch/x86/odp/api/abi/time_cpu.h @@ -4,8 +4,8 @@ * SPDX-License-Identifier: BSD-3-Clause */ -#ifndef ODP_ARCH_CPU_TIME_H_ -#define ODP_ARCH_CPU_TIME_H_ +#ifndef ODP_ARCH_TIME_CPU_H_ +#define ODP_ARCH_TIME_CPU_H_ #ifdef __cplusplus extern "C" { @@ -14,19 +14,19 @@ extern "C" { #include <stdint.h> #include <odp/api/abi/cpu_rdtsc.h> -static inline uint64_t _odp_cpu_global_time(void) +static inline uint64_t _odp_time_cpu_global(void) { return _odp_cpu_rdtsc(); } -static inline uint64_t _odp_cpu_global_time_strict(void) +static inline uint64_t _odp_time_cpu_global_strict(void) { __atomic_thread_fence(__ATOMIC_SEQ_CST); return _odp_cpu_rdtsc(); } -int _odp_cpu_has_global_time(void); -uint64_t _odp_cpu_global_time_freq(void); +int _odp_time_cpu_global_freq_is_const(void); +uint64_t _odp_time_cpu_global_freq(void); #ifdef __cplusplus } diff --git a/platform/linux-generic/arch/x86/odp/api/abi/time_inlines.h b/platform/linux-generic/arch/x86/odp/api/abi/time_inlines.h new file mode 100644 index 000000000..331d1996f --- /dev/null +++ b/platform/linux-generic/arch/x86/odp/api/abi/time_inlines.h @@ -0,0 +1,7 @@ +/* Copyright (c) 2023, Nokia + * All rights reserved. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include <odp/api/abi/time_cpu_inlines.h> diff --git a/platform/linux-generic/arch/x86/odp_sysinfo_parse.c b/platform/linux-generic/arch/x86/odp_sysinfo_parse.c index c74c52045..3cbdb2037 100644 --- a/platform/linux-generic/arch/x86/odp_sysinfo_parse.c +++ b/platform/linux-generic/arch/x86/odp_sysinfo_parse.c @@ -1,4 +1,5 @@ /* Copyright (c) 2016-2018, Linaro Limited + * Copyright (c) 2023, Nokia * All rights reserved. * * SPDX-License-Identifier: BSD-3-Clause @@ -30,6 +31,11 @@ int _odp_cpuinfo_parser(FILE *file, system_info_t *sysinfo) strcpy(sysinfo->cpu_arch_str, "x86"); while (fgets(str, sizeof(str), file) != NULL && id < CONFIG_NUM_CPU_IDS) { + if (strstr(str, "flags") && strstr(str, "constant_tsc")) { + sysinfo->cpu_constant_tsc = 1; + continue; + } + pos = strstr(str, "model name"); if (pos) { freq_set = false; diff --git a/platform/linux-generic/arch/x86/odp_global_time.c b/platform/linux-generic/arch/x86/odp_time_cpu.c index 00e3a1ac1..aa00ac04e 100644 --- a/platform/linux-generic/arch/x86/odp_global_time.c +++ b/platform/linux-generic/arch/x86/odp_time_cpu.c @@ -6,17 +6,18 @@ #include <odp_posix_extensions.h> -#include <time.h> - #include <odp/api/hints.h> +#include <odp/api/time_types.h> + +#include <odp/api/abi/time_cpu.h> + #include <odp_debug_internal.h> -#include <odp/api/abi/cpu_time.h> -#define SEC_IN_NS 1000000000ULL +#include <time.h> /* Measure TSC frequency. Frequency information registers are defined for x86, * but those are often not enumerated. */ -uint64_t _odp_cpu_global_time_freq(void) +uint64_t _odp_time_cpu_global_freq(void) { struct timespec sleep, ts1, ts2; uint64_t t1, t2, ts_nsec, cycles, hz; @@ -29,35 +30,35 @@ uint64_t _odp_cpu_global_time_freq(void) sleep.tv_sec = 0; if (warm_up) - sleep.tv_nsec = SEC_IN_NS / 1000; + sleep.tv_nsec = ODP_TIME_SEC_IN_NS / 1000; else - sleep.tv_nsec = SEC_IN_NS / 4; + sleep.tv_nsec = ODP_TIME_SEC_IN_NS / 4; if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts1)) { - _ODP_DBG("clock_gettime failed\n"); + _ODP_ERR("clock_gettime() failed\n"); return 0; } - t1 = _odp_cpu_global_time(); + t1 = _odp_time_cpu_global(); if (nanosleep(&sleep, NULL) < 0) { - _ODP_DBG("nanosleep failed\n"); + _ODP_ERR("nanosleep() failed\n"); return 0; } if (clock_gettime(CLOCK_MONOTONIC_RAW, &ts2)) { - _ODP_DBG("clock_gettime failed\n"); + _ODP_ERR("clock_gettime() failed\n"); return 0; } - t2 = _odp_cpu_global_time(); + t2 = _odp_time_cpu_global(); - ts_nsec = (ts2.tv_sec - ts1.tv_sec) * SEC_IN_NS; + ts_nsec = (ts2.tv_sec - ts1.tv_sec) * ODP_TIME_SEC_IN_NS; ts_nsec += ts2.tv_nsec - ts1.tv_nsec; cycles = t2 - t1; - hz = (cycles * SEC_IN_NS) / ts_nsec; + hz = (cycles * ODP_TIME_SEC_IN_NS) / ts_nsec; if (warm_up) warm_up = 0; |