]> git.dujemihanovic.xyz Git - linux.git/commitdiff
arm64, bpf: add internal-only MOV instruction to resolve per-CPU addrs
authorPuranjay Mohan <puranjay12@gmail.com>
Thu, 2 May 2024 15:18:53 +0000 (15:18 +0000)
committerAlexei Starovoitov <ast@kernel.org>
Sun, 12 May 2024 23:54:34 +0000 (16:54 -0700)
Support an instruction for resolving absolute addresses of per-CPU
data from their per-CPU offsets. This instruction is internal-only and
users are not allowed to use them directly. They will only be used for
internal inlining optimizations for now between BPF verifier and BPF
JITs.

Since commit 7158627686f0 ("arm64: percpu: implement optimised pcpu
access using tpidr_el1"), the per-cpu offset for the CPU is stored in
the tpidr_el1/2 register of that CPU.

To support this BPF instruction in the ARM64 JIT, the following ARM64
instructions are emitted:

mov dst, src // Move src to dst, if src != dst
mrs tmp, tpidr_el1/2 // Move per-cpu offset of the current cpu in tmp.
add dst, dst, tmp // Add the per cpu offset to the dst.

To measure the performance improvement provided by this change, the
benchmark in [1] was used:

Before:
glob-arr-inc   :   23.597 ± 0.012M/s
arr-inc        :   23.173 ± 0.019M/s
hash-inc       :   12.186 ± 0.028M/s

After:
glob-arr-inc   :   23.819 ± 0.034M/s
arr-inc        :   23.285 ± 0.017M/s
hash-inc       :   12.419 ± 0.011M/s

[1] https://github.com/anakryiko/linux/commit/8dec900975ef

Signed-off-by: Puranjay Mohan <puranjay12@gmail.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20240502151854.9810-4-puranjay@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
arch/arm64/include/asm/insn.h
arch/arm64/lib/insn.c
arch/arm64/net/bpf_jit.h
arch/arm64/net/bpf_jit_comp.c

index db1aeacd4cd99d971838f2eee10e679456c96035..8de0e39b29f388c985ae7e686149130ce99be632 100644 (file)
@@ -135,6 +135,11 @@ enum aarch64_insn_special_register {
        AARCH64_INSN_SPCLREG_SP_EL2     = 0xF210
 };
 
+enum aarch64_insn_system_register {
+       AARCH64_INSN_SYSREG_TPIDR_EL1   = 0x4684,
+       AARCH64_INSN_SYSREG_TPIDR_EL2   = 0x6682,
+};
+
 enum aarch64_insn_variant {
        AARCH64_INSN_VARIANT_32BIT,
        AARCH64_INSN_VARIANT_64BIT
@@ -686,6 +691,8 @@ u32 aarch64_insn_gen_cas(enum aarch64_insn_register result,
 }
 #endif
 u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type);
+u32 aarch64_insn_gen_mrs(enum aarch64_insn_register result,
+                        enum aarch64_insn_system_register sysreg);
 
 s32 aarch64_get_branch_offset(u32 insn);
 u32 aarch64_set_branch_offset(u32 insn, s32 offset);
index a635ab83fee359421cd7c26bf6b59324feb21522..b008a9b46a7ff40541d32ac8e4ddedd319929cb9 100644 (file)
@@ -1515,3 +1515,14 @@ u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type)
 
        return insn;
 }
+
+u32 aarch64_insn_gen_mrs(enum aarch64_insn_register result,
+                        enum aarch64_insn_system_register sysreg)
+{
+       u32 insn = aarch64_insn_get_mrs_value();
+
+       insn &= ~GENMASK(19, 0);
+       insn |= sysreg << 5;
+       return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT,
+                                           insn, result);
+}
index 23b1b34db088ec0dda2c366cb874c1a80104112e..b627ef7188c71e98030d839c62aebb46c2b1380e 100644 (file)
 #define A64_ADR(Rd, offset) \
        aarch64_insn_gen_adr(0, offset, Rd, AARCH64_INSN_ADR_TYPE_ADR)
 
+/* MRS */
+#define A64_MRS_TPIDR_EL1(Rt) \
+       aarch64_insn_gen_mrs(Rt, AARCH64_INSN_SYSREG_TPIDR_EL1)
+#define A64_MRS_TPIDR_EL2(Rt) \
+       aarch64_insn_gen_mrs(Rt, AARCH64_INSN_SYSREG_TPIDR_EL2)
+
 #endif /* _BPF_JIT_H */
index 53347d4217f4b431d694b5eb170e3b2981b9872e..4e7954e9829d8983d465df3d98d1eee73d336bc8 100644 (file)
@@ -890,6 +890,15 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx,
                        emit(A64_ORR(1, tmp, dst, tmp), ctx);
                        emit(A64_MOV(1, dst, tmp), ctx);
                        break;
+               } else if (insn_is_mov_percpu_addr(insn)) {
+                       if (dst != src)
+                               emit(A64_MOV(1, dst, src), ctx);
+                       if (cpus_have_cap(ARM64_HAS_VIRT_HOST_EXTN))
+                               emit(A64_MRS_TPIDR_EL2(tmp), ctx);
+                       else
+                               emit(A64_MRS_TPIDR_EL1(tmp), ctx);
+                       emit(A64_ADD(1, dst, dst, tmp), ctx);
+                       break;
                }
                switch (insn->off) {
                case 0:
@@ -2559,6 +2568,11 @@ bool bpf_jit_supports_insn(struct bpf_insn *insn, bool in_arena)
        return true;
 }
 
+bool bpf_jit_supports_percpu_insn(void)
+{
+       return true;
+}
+
 void bpf_jit_free(struct bpf_prog *prog)
 {
        if (prog->jited) {