From affef66b65889a0ea0060e13e5f7fe569897d787 Mon Sep 17 00:00:00 2001 From: WANG Rui Date: Wed, 8 Nov 2023 14:12:15 +0800 Subject: [PATCH] LoongArch: Relax memory ordering for atomic operations This patch relaxes the implementation while satisfying the memory ordering requirements for atomic operations, which will help improve performance on LA664+. Unixbench with full threads (8) before after Dhrystone 2 using register variables 203910714.2 203909539.8 0.00% Double-Precision Whetstone 37930.9 37931 0.00% Execl Throughput 29431.5 29545.8 0.39% File Copy 1024 bufsize 2000 maxblocks 6645759.5 6676320 0.46% File Copy 256 bufsize 500 maxblocks 2138772.4 2144182.4 0.25% File Copy 4096 bufsize 8000 maxblocks 11640698.4 11602703 -0.33% Pipe Throughput 8849077.7 8917009.4 0.77% Pipe-based Context Switching 1255108.5 1287277.3 2.56% Process Creation 50825.9 50442.1 -0.76% Shell Scripts (1 concurrent) 25795.8 25942.3 0.57% Shell Scripts (8 concurrent) 3812.6 3835.2 0.59% System Call Overhead 9248212.6 9353348.6 1.14% ======= System Benchmarks Index Score 8076.6 8114.4 0.47% Signed-off-by: WANG Rui Signed-off-by: Huacai Chen --- arch/loongarch/include/asm/atomic.h | 88 ++++++++++++++++++++++------- 1 file changed, 68 insertions(+), 20 deletions(-) diff --git a/arch/loongarch/include/asm/atomic.h b/arch/loongarch/include/asm/atomic.h index e27f0c72d324..99af8b3160a8 100644 --- a/arch/loongarch/include/asm/atomic.h +++ b/arch/loongarch/include/asm/atomic.h @@ -36,19 +36,19 @@ static inline void arch_atomic_##op(int i, atomic_t *v) \ { \ __asm__ __volatile__( \ - "am"#asm_op"_db.w" " $zero, %1, %0 \n" \ + "am"#asm_op".w" " $zero, %1, %0 \n" \ : "+ZB" (v->counter) \ : "r" (I) \ : "memory"); \ } -#define ATOMIC_OP_RETURN(op, I, asm_op, c_op) \ -static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \ +#define ATOMIC_OP_RETURN(op, I, asm_op, c_op, mb, suffix) \ +static inline int arch_atomic_##op##_return##suffix(int i, atomic_t *v) \ { \ int result; \ \ __asm__ __volatile__( \ - "am"#asm_op"_db.w" " %1, %2, %0 \n" \ + "am"#asm_op#mb".w" " %1, %2, %0 \n" \ : "+ZB" (v->counter), "=&r" (result) \ : "r" (I) \ : "memory"); \ @@ -56,13 +56,13 @@ static inline int arch_atomic_##op##_return_relaxed(int i, atomic_t *v) \ return result c_op I; \ } -#define ATOMIC_FETCH_OP(op, I, asm_op) \ -static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \ +#define ATOMIC_FETCH_OP(op, I, asm_op, mb, suffix) \ +static inline int arch_atomic_fetch_##op##suffix(int i, atomic_t *v) \ { \ int result; \ \ __asm__ __volatile__( \ - "am"#asm_op"_db.w" " %1, %2, %0 \n" \ + "am"#asm_op#mb".w" " %1, %2, %0 \n" \ : "+ZB" (v->counter), "=&r" (result) \ : "r" (I) \ : "memory"); \ @@ -72,29 +72,53 @@ static inline int arch_atomic_fetch_##op##_relaxed(int i, atomic_t *v) \ #define ATOMIC_OPS(op, I, asm_op, c_op) \ ATOMIC_OP(op, I, asm_op) \ - ATOMIC_OP_RETURN(op, I, asm_op, c_op) \ - ATOMIC_FETCH_OP(op, I, asm_op) + ATOMIC_OP_RETURN(op, I, asm_op, c_op, _db, ) \ + ATOMIC_OP_RETURN(op, I, asm_op, c_op, , _relaxed) \ + ATOMIC_FETCH_OP(op, I, asm_op, _db, ) \ + ATOMIC_FETCH_OP(op, I, asm_op, , _relaxed) ATOMIC_OPS(add, i, add, +) ATOMIC_OPS(sub, -i, add, +) +#define arch_atomic_add_return arch_atomic_add_return +#define arch_atomic_add_return_acquire arch_atomic_add_return +#define arch_atomic_add_return_release arch_atomic_add_return #define arch_atomic_add_return_relaxed arch_atomic_add_return_relaxed +#define arch_atomic_sub_return arch_atomic_sub_return +#define arch_atomic_sub_return_acquire arch_atomic_sub_return +#define arch_atomic_sub_return_release arch_atomic_sub_return #define arch_atomic_sub_return_relaxed arch_atomic_sub_return_relaxed +#define arch_atomic_fetch_add arch_atomic_fetch_add +#define arch_atomic_fetch_add_acquire arch_atomic_fetch_add +#define arch_atomic_fetch_add_release arch_atomic_fetch_add #define arch_atomic_fetch_add_relaxed arch_atomic_fetch_add_relaxed +#define arch_atomic_fetch_sub arch_atomic_fetch_sub +#define arch_atomic_fetch_sub_acquire arch_atomic_fetch_sub +#define arch_atomic_fetch_sub_release arch_atomic_fetch_sub #define arch_atomic_fetch_sub_relaxed arch_atomic_fetch_sub_relaxed #undef ATOMIC_OPS #define ATOMIC_OPS(op, I, asm_op) \ ATOMIC_OP(op, I, asm_op) \ - ATOMIC_FETCH_OP(op, I, asm_op) + ATOMIC_FETCH_OP(op, I, asm_op, _db, ) \ + ATOMIC_FETCH_OP(op, I, asm_op, , _relaxed) ATOMIC_OPS(and, i, and) ATOMIC_OPS(or, i, or) ATOMIC_OPS(xor, i, xor) +#define arch_atomic_fetch_and arch_atomic_fetch_and +#define arch_atomic_fetch_and_acquire arch_atomic_fetch_and +#define arch_atomic_fetch_and_release arch_atomic_fetch_and #define arch_atomic_fetch_and_relaxed arch_atomic_fetch_and_relaxed +#define arch_atomic_fetch_or arch_atomic_fetch_or +#define arch_atomic_fetch_or_acquire arch_atomic_fetch_or +#define arch_atomic_fetch_or_release arch_atomic_fetch_or #define arch_atomic_fetch_or_relaxed arch_atomic_fetch_or_relaxed +#define arch_atomic_fetch_xor arch_atomic_fetch_xor +#define arch_atomic_fetch_xor_acquire arch_atomic_fetch_xor +#define arch_atomic_fetch_xor_release arch_atomic_fetch_xor #define arch_atomic_fetch_xor_relaxed arch_atomic_fetch_xor_relaxed #undef ATOMIC_OPS @@ -172,18 +196,18 @@ static inline int arch_atomic_sub_if_positive(int i, atomic_t *v) static inline void arch_atomic64_##op(long i, atomic64_t *v) \ { \ __asm__ __volatile__( \ - "am"#asm_op"_db.d " " $zero, %1, %0 \n" \ + "am"#asm_op".d " " $zero, %1, %0 \n" \ : "+ZB" (v->counter) \ : "r" (I) \ : "memory"); \ } -#define ATOMIC64_OP_RETURN(op, I, asm_op, c_op) \ -static inline long arch_atomic64_##op##_return_relaxed(long i, atomic64_t *v) \ +#define ATOMIC64_OP_RETURN(op, I, asm_op, c_op, mb, suffix) \ +static inline long arch_atomic64_##op##_return##suffix(long i, atomic64_t *v) \ { \ long result; \ __asm__ __volatile__( \ - "am"#asm_op"_db.d " " %1, %2, %0 \n" \ + "am"#asm_op#mb".d " " %1, %2, %0 \n" \ : "+ZB" (v->counter), "=&r" (result) \ : "r" (I) \ : "memory"); \ @@ -191,13 +215,13 @@ static inline long arch_atomic64_##op##_return_relaxed(long i, atomic64_t *v) \ return result c_op I; \ } -#define ATOMIC64_FETCH_OP(op, I, asm_op) \ -static inline long arch_atomic64_fetch_##op##_relaxed(long i, atomic64_t *v) \ +#define ATOMIC64_FETCH_OP(op, I, asm_op, mb, suffix) \ +static inline long arch_atomic64_fetch_##op##suffix(long i, atomic64_t *v) \ { \ long result; \ \ __asm__ __volatile__( \ - "am"#asm_op"_db.d " " %1, %2, %0 \n" \ + "am"#asm_op#mb".d " " %1, %2, %0 \n" \ : "+ZB" (v->counter), "=&r" (result) \ : "r" (I) \ : "memory"); \ @@ -207,29 +231,53 @@ static inline long arch_atomic64_fetch_##op##_relaxed(long i, atomic64_t *v) \ #define ATOMIC64_OPS(op, I, asm_op, c_op) \ ATOMIC64_OP(op, I, asm_op) \ - ATOMIC64_OP_RETURN(op, I, asm_op, c_op) \ - ATOMIC64_FETCH_OP(op, I, asm_op) + ATOMIC64_OP_RETURN(op, I, asm_op, c_op, _db, ) \ + ATOMIC64_OP_RETURN(op, I, asm_op, c_op, , _relaxed) \ + ATOMIC64_FETCH_OP(op, I, asm_op, _db, ) \ + ATOMIC64_FETCH_OP(op, I, asm_op, , _relaxed) ATOMIC64_OPS(add, i, add, +) ATOMIC64_OPS(sub, -i, add, +) +#define arch_atomic64_add_return arch_atomic64_add_return +#define arch_atomic64_add_return_acquire arch_atomic64_add_return +#define arch_atomic64_add_return_release arch_atomic64_add_return #define arch_atomic64_add_return_relaxed arch_atomic64_add_return_relaxed +#define arch_atomic64_sub_return arch_atomic64_sub_return +#define arch_atomic64_sub_return_acquire arch_atomic64_sub_return +#define arch_atomic64_sub_return_release arch_atomic64_sub_return #define arch_atomic64_sub_return_relaxed arch_atomic64_sub_return_relaxed +#define arch_atomic64_fetch_add arch_atomic64_fetch_add +#define arch_atomic64_fetch_add_acquire arch_atomic64_fetch_add +#define arch_atomic64_fetch_add_release arch_atomic64_fetch_add #define arch_atomic64_fetch_add_relaxed arch_atomic64_fetch_add_relaxed +#define arch_atomic64_fetch_sub arch_atomic64_fetch_sub +#define arch_atomic64_fetch_sub_acquire arch_atomic64_fetch_sub +#define arch_atomic64_fetch_sub_release arch_atomic64_fetch_sub #define arch_atomic64_fetch_sub_relaxed arch_atomic64_fetch_sub_relaxed #undef ATOMIC64_OPS #define ATOMIC64_OPS(op, I, asm_op) \ ATOMIC64_OP(op, I, asm_op) \ - ATOMIC64_FETCH_OP(op, I, asm_op) + ATOMIC64_FETCH_OP(op, I, asm_op, _db, ) \ + ATOMIC64_FETCH_OP(op, I, asm_op, , _relaxed) ATOMIC64_OPS(and, i, and) ATOMIC64_OPS(or, i, or) ATOMIC64_OPS(xor, i, xor) +#define arch_atomic64_fetch_and arch_atomic64_fetch_and +#define arch_atomic64_fetch_and_acquire arch_atomic64_fetch_and +#define arch_atomic64_fetch_and_release arch_atomic64_fetch_and #define arch_atomic64_fetch_and_relaxed arch_atomic64_fetch_and_relaxed +#define arch_atomic64_fetch_or arch_atomic64_fetch_or +#define arch_atomic64_fetch_or_acquire arch_atomic64_fetch_or +#define arch_atomic64_fetch_or_release arch_atomic64_fetch_or #define arch_atomic64_fetch_or_relaxed arch_atomic64_fetch_or_relaxed +#define arch_atomic64_fetch_xor arch_atomic64_fetch_xor +#define arch_atomic64_fetch_xor_acquire arch_atomic64_fetch_xor +#define arch_atomic64_fetch_xor_release arch_atomic64_fetch_xor #define arch_atomic64_fetch_xor_relaxed arch_atomic64_fetch_xor_relaxed #undef ATOMIC64_OPS