]> git.dujemihanovic.xyz Git - linux.git/commitdiff
LoongArch: Support dbar with different hints
authorHuacai Chen <chenhuacai@loongson.cn>
Thu, 29 Jun 2023 12:58:44 +0000 (20:58 +0800)
committerHuacai Chen <chenhuacai@loongson.cn>
Thu, 29 Jun 2023 12:58:44 +0000 (20:58 +0800)
Traditionally, LoongArch uses "dbar 0" (full completion barrier) for
everything. But the full completion barrier is a performance killer, so
Loongson-3A6000 and newer processors have made finer granularity hints
available:

Bit4: ordering or completion (0: completion, 1: ordering)
Bit3: barrier for previous read (0: true, 1: false)
Bit2: barrier for previous write (0: true, 1: false)
Bit1: barrier for succeeding read (0: true, 1: false)
Bit0: barrier for succeeding write (0: true, 1: false)

Hint 0x700: barrier for "read after read" from the same address, which
is needed by LL-SC loops on old models (dbar 0x700 behaves the same as
nop if such reordering is disabled on new models).

This patch makes use of the various new hints for different kinds of
memory barriers. It brings performance improvements on Loongson-3A6000
series, while not affecting the existing models because all variants are
treated as 'dbar 0' there.

Why override queued_spin_unlock()?
After commit 01e3b958efe85a26d9b ("drivers: Remove explicit invocations
of mmiowb()") we need a completion barrier in queued_spin_unlock(), but
the generic implementation use smp_store_release() which only provide an
ordering barrier.

Signed-off-by: Jun Yi <yijun@loongson.cn>
Signed-off-by: Huacai Chen <chenhuacai@loongson.cn>
arch/loongarch/include/asm/Kbuild
arch/loongarch/include/asm/barrier.h
arch/loongarch/include/asm/io.h
arch/loongarch/include/asm/qspinlock.h [new file with mode: 0644]
arch/loongarch/kernel/smp.c
arch/loongarch/mm/tlbex.S

index 77ad8e6f0906c4be1e7e1b52c568de5ac977f416..6b222f227342b4a3c22cbdfc5bfe59a7458cd5ae 100644 (file)
@@ -5,7 +5,6 @@ generic-y += mcs_spinlock.h
 generic-y += parport.h
 generic-y += early_ioremap.h
 generic-y += qrwlock.h
-generic-y += qspinlock.h
 generic-y += rwsem.h
 generic-y += segment.h
 generic-y += user.h
index cda9776758544e5e72f8f42b9bf4862a67ce8a1a..4b663f19770611b7cb5d1d0aa88a4a4de1d9b48a 100644 (file)
@@ -5,27 +5,56 @@
 #ifndef __ASM_BARRIER_H
 #define __ASM_BARRIER_H
 
-#define __sync()       __asm__ __volatile__("dbar 0" : : : "memory")
+/*
+ * Hint encoding:
+ *
+ * Bit4: ordering or completion (0: completion, 1: ordering)
+ * Bit3: barrier for previous read (0: true, 1: false)
+ * Bit2: barrier for previous write (0: true, 1: false)
+ * Bit1: barrier for succeeding read (0: true, 1: false)
+ * Bit0: barrier for succeeding write (0: true, 1: false)
+ *
+ * Hint 0x700: barrier for "read after read" from the same address
+ */
+
+#define DBAR(hint) __asm__ __volatile__("dbar %0 " : : "I"(hint) : "memory")
+
+#define crwrw          0b00000
+#define cr_r_          0b00101
+#define c_w_w          0b01010
 
-#define fast_wmb()     __sync()
-#define fast_rmb()     __sync()
-#define fast_mb()      __sync()
-#define fast_iob()     __sync()
-#define wbflush()      __sync()
+#define orwrw          0b10000
+#define or_r_          0b10101
+#define o_w_w          0b11010
 
-#define wmb()          fast_wmb()
-#define rmb()          fast_rmb()
-#define mb()           fast_mb()
-#define iob()          fast_iob()
+#define orw_w          0b10010
+#define or_rw          0b10100
 
-#define __smp_mb()     __asm__ __volatile__("dbar 0" : : : "memory")
-#define __smp_rmb()    __asm__ __volatile__("dbar 0" : : : "memory")
-#define __smp_wmb()    __asm__ __volatile__("dbar 0" : : : "memory")
+#define c_sync()       DBAR(crwrw)
+#define c_rsync()      DBAR(cr_r_)
+#define c_wsync()      DBAR(c_w_w)
+
+#define o_sync()       DBAR(orwrw)
+#define o_rsync()      DBAR(or_r_)
+#define o_wsync()      DBAR(o_w_w)
+
+#define ldacq_mb()     DBAR(or_rw)
+#define strel_mb()     DBAR(orw_w)
+
+#define mb()           c_sync()
+#define rmb()          c_rsync()
+#define wmb()          c_wsync()
+#define iob()          c_sync()
+#define wbflush()      c_sync()
+
+#define __smp_mb()     o_sync()
+#define __smp_rmb()    o_rsync()
+#define __smp_wmb()    o_wsync()
 
 #ifdef CONFIG_SMP
-#define __WEAK_LLSC_MB         "       dbar 0  \n"
+#define __WEAK_LLSC_MB         "       dbar 0x700      \n"
 #else
-#define __WEAK_LLSC_MB         "               \n"
+#define __WEAK_LLSC_MB         "                       \n"
 #endif
 
 #define __smp_mb__before_atomic()      barrier()
@@ -59,68 +88,19 @@ static inline unsigned long array_index_mask_nospec(unsigned long index,
        return mask;
 }
 
-#define __smp_load_acquire(p)                                                  \
-({                                                                             \
-       union { typeof(*p) __val; char __c[1]; } __u;                           \
-       unsigned long __tmp = 0;                                                        \
-       compiletime_assert_atomic_type(*p);                                     \
-       switch (sizeof(*p)) {                                                   \
-       case 1:                                                                 \
-               *(__u8 *)__u.__c = *(volatile __u8 *)p;                         \
-               __smp_mb();                                                     \
-               break;                                                          \
-       case 2:                                                                 \
-               *(__u16 *)__u.__c = *(volatile __u16 *)p;                       \
-               __smp_mb();                                                     \
-               break;                                                          \
-       case 4:                                                                 \
-               __asm__ __volatile__(                                           \
-               "amor_db.w %[val], %[tmp], %[mem]       \n"                             \
-               : [val] "=&r" (*(__u32 *)__u.__c)                               \
-               : [mem] "ZB" (*(u32 *) p), [tmp] "r" (__tmp)                    \
-               : "memory");                                                    \
-               break;                                                          \
-       case 8:                                                                 \
-               __asm__ __volatile__(                                           \
-               "amor_db.d %[val], %[tmp], %[mem]       \n"                             \
-               : [val] "=&r" (*(__u64 *)__u.__c)                               \
-               : [mem] "ZB" (*(u64 *) p), [tmp] "r" (__tmp)                    \
-               : "memory");                                                    \
-               break;                                                          \
-       }                                                                       \
-       (typeof(*p))__u.__val;                                                          \
+#define __smp_load_acquire(p)                          \
+({                                                     \
+       typeof(*p) ___p1 = READ_ONCE(*p);               \
+       compiletime_assert_atomic_type(*p);             \
+       ldacq_mb();                                     \
+       ___p1;                                          \
 })
 
-#define __smp_store_release(p, v)                                              \
-do {                                                                           \
-       union { typeof(*p) __val; char __c[1]; } __u =                          \
-               { .__val = (__force typeof(*p)) (v) };                          \
-       unsigned long __tmp;                                                    \
-       compiletime_assert_atomic_type(*p);                                     \
-       switch (sizeof(*p)) {                                                   \
-       case 1:                                                                 \
-               __smp_mb();                                                     \
-               *(volatile __u8 *)p = *(__u8 *)__u.__c;                         \
-               break;                                                          \
-       case 2:                                                                 \
-               __smp_mb();                                                     \
-               *(volatile __u16 *)p = *(__u16 *)__u.__c;                       \
-               break;                                                          \
-       case 4:                                                                 \
-               __asm__ __volatile__(                                           \
-               "amswap_db.w %[tmp], %[val], %[mem]     \n"                     \
-               : [mem] "+ZB" (*(u32 *)p), [tmp] "=&r" (__tmp)                  \
-               : [val] "r" (*(__u32 *)__u.__c)                                 \
-               : );                                                            \
-               break;                                                          \
-       case 8:                                                                 \
-               __asm__ __volatile__(                                           \
-               "amswap_db.d %[tmp], %[val], %[mem]     \n"                     \
-               : [mem] "+ZB" (*(u64 *)p), [tmp] "=&r" (__tmp)                  \
-               : [val] "r" (*(__u64 *)__u.__c)                                 \
-               : );                                                            \
-               break;                                                          \
-       }                                                                       \
+#define __smp_store_release(p, v)                      \
+do {                                                   \
+       compiletime_assert_atomic_type(*p);             \
+       strel_mb();                                     \
+       WRITE_ONCE(*p, v);                              \
 } while (0)
 
 #define __smp_store_mb(p, v)                                                   \
index 545e2708fbf7042f6a61f29c1423be1f11de7a7e..1c94102200407f2f62f573bcbb923614db1b52e1 100644 (file)
@@ -62,7 +62,7 @@ extern pgprot_t pgprot_wc;
 #define ioremap_cache(offset, size)    \
        ioremap_prot((offset), (size), pgprot_val(PAGE_KERNEL))
 
-#define mmiowb() asm volatile ("dbar 0" ::: "memory")
+#define mmiowb() wmb()
 
 /*
  * String version of I/O memory access operations.
diff --git a/arch/loongarch/include/asm/qspinlock.h b/arch/loongarch/include/asm/qspinlock.h
new file mode 100644 (file)
index 0000000..34f43f8
--- /dev/null
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_QSPINLOCK_H
+#define _ASM_QSPINLOCK_H
+
+#include <asm-generic/qspinlock_types.h>
+
+#define queued_spin_unlock queued_spin_unlock
+
+static inline void queued_spin_unlock(struct qspinlock *lock)
+{
+       compiletime_assert_atomic_type(lock->locked);
+       c_sync();
+       WRITE_ONCE(lock->locked, 0);
+}
+
+#include <asm-generic/qspinlock.h>
+
+#endif /* _ASM_QSPINLOCK_H */
index 255967ff8c363aad372b285879bdf992322c5b4c..8ea1bbcf13a7e3287e50ba7366ef2151aad72277 100644 (file)
@@ -115,7 +115,7 @@ static u32 ipi_read_clear(int cpu)
        action = iocsr_read32(LOONGARCH_IOCSR_IPI_STATUS);
        /* Clear the ipi register to clear the interrupt */
        iocsr_write32(action, LOONGARCH_IOCSR_IPI_CLEAR);
-       smp_mb();
+       wbflush();
 
        return action;
 }
index 244e2f5aeee564c75c0459569caf4f879876cbe3..240ced55586e2c145ca9b22e007d71b8ee97f6e9 100644 (file)
@@ -184,7 +184,7 @@ tlb_huge_update_load:
        ertn
 
 nopage_tlb_load:
-       dbar            0
+       dbar            0x700
        csrrd           ra, EXCEPTION_KS2
        la_abs          t0, tlb_do_page_fault_0
        jr              t0
@@ -333,7 +333,7 @@ tlb_huge_update_store:
        ertn
 
 nopage_tlb_store:
-       dbar            0
+       dbar            0x700
        csrrd           ra, EXCEPTION_KS2
        la_abs          t0, tlb_do_page_fault_1
        jr              t0
@@ -480,7 +480,7 @@ tlb_huge_update_modify:
        ertn
 
 nopage_tlb_modify:
-       dbar            0
+       dbar            0x700
        csrrd           ra, EXCEPTION_KS2
        la_abs          t0, tlb_do_page_fault_1
        jr              t0