mirror of
git://sourceware.org/git/glibc.git
synced 2025-03-06 20:58:33 +01:00
x86: Enable non-temporal memset tunable for AMD
In commit 46b5e98ef6
("x86: Add seperate non-temporal tunable for
memset") a tunable threshold for enabling non-temporal memset was added,
but only for Intel hardware.
Since that commit, new benchmark results suggest that non-temporal
memset is beneficial on AMD, as well, so allow this tunable to be set
for AMD.
See:
https://docs.google.com/spreadsheets/d/1opzukzvum4n6-RUVHTGddV6RjAEil4P2uMjjQGLbLcU/edit?usp=sharing
which has been updated to include data using different stategies for
large memset on AMD Zen2, Zen3, and Zen4.
Signed-off-by: Joe Damato <jdamato@fastly.com>
Reviewed-by: Noah Goldstein <goldstein.w.n@gmail.com>
This commit is contained in:
parent
5968125f55
commit
bef2a827a5
1 changed files with 4 additions and 4 deletions
|
@ -986,11 +986,11 @@ dl_init_cacheinfo (struct cpu_features *cpu_features)
|
|||
if (CPU_FEATURE_USABLE_P (cpu_features, FSRM))
|
||||
rep_movsb_threshold = 2112;
|
||||
|
||||
/* Non-temporal stores in memset have only been tested on Intel hardware.
|
||||
Until we benchmark data on other x86 processor, disable non-temporal
|
||||
stores in memset. */
|
||||
/* Non-temporal stores are more performant on Intel and AMD hardware above
|
||||
non_temporal_threshold. Enable this for both Intel and AMD hardware. */
|
||||
unsigned long int memset_non_temporal_threshold = SIZE_MAX;
|
||||
if (cpu_features->basic.kind == arch_kind_intel)
|
||||
if (cpu_features->basic.kind == arch_kind_intel
|
||||
|| cpu_features->basic.kind == arch_kind_amd)
|
||||
memset_non_temporal_threshold = non_temporal_threshold;
|
||||
|
||||
/* For AMD CPUs that support ERMS (Zen3+), REP MOVSB is in a lot of
|
||||
|
|
Loading…
Add table
Reference in a new issue