]> git.dujemihanovic.xyz Git - u-boot.git/commitdiff
arm: Use builtins for ffs/fls
authorSean Anderson <sean.anderson@seco.com>
Mon, 31 Jul 2023 21:27:33 +0000 (17:27 -0400)
committerTom Rini <trini@konsulko.com>
Thu, 17 Aug 2023 20:39:20 +0000 (16:39 -0400)
Since ARMv5, the clz instruction allows for efficient implementation of
ffs/fls with builtins. Until ARMv7 (with Thumb-2), this instruction is
only available in ARM mode. LTO makes it difficult to force specific
functions to be in ARM mode, as it is effectively a form of very
aggressive inlining. To work around this, fls/ffs are implemented in
assembly for ARMv5 and ARMv6 when compiling U-Boot in Thumb mode.
Overall, this saves around 75 bytes per call.

This code is synced with v5.15 of the Linux kernel.

Signed-off-by: Sean Anderson <sean.anderson@seco.com>
Reviewed-by: Tom Rini <trini@konsulko.com>
arch/arm/include/asm/bitops.h
arch/arm/lib/Makefile
arch/arm/lib/bitops.S [new file with mode: 0644]
include/asm-generic/bitops/builtin-__ffs.h [new file with mode: 0644]
include/asm-generic/bitops/builtin-__fls.h [new file with mode: 0644]
include/asm-generic/bitops/builtin-ffs.h [new file with mode: 0644]
include/asm-generic/bitops/builtin-fls.h [new file with mode: 0644]

index fa8548624a049979f4d8895375cfb1a3d090f488..8e897833bb144b6c4ac12d26c2ecc5852dc6b08b 100644 (file)
 #ifndef __ASM_ARM_BITOPS_H
 #define __ASM_ARM_BITOPS_H
 
+#if __LINUX_ARM_ARCH__ < 5
+
 #include <asm-generic/bitops/__ffs.h>
 #include <asm-generic/bitops/__fls.h>
 #include <asm-generic/bitops/fls.h>
+
+#else
+
+#define PLATFORM_FFS
+#define PLATFORM_FLS
+
+#if !IS_ENABLED(CONFIG_HAS_THUMB2) && CONFIG_IS_ENABLED(SYS_THUMB_BUILD)
+
+unsigned long __fls(unsigned long word);
+unsigned long __ffs(unsigned long word);
+int fls(unsigned int x);
+int ffs(int x);
+
+#else
+
+#include <asm-generic/bitops/builtin-__fls.h>
+#include <asm-generic/bitops/builtin-__ffs.h>
+#include <asm-generic/bitops/builtin-fls.h>
+#include <asm-generic/bitops/builtin-ffs.h>
+
+#endif
+#endif
+
 #include <asm-generic/bitops/fls64.h>
 
 #ifdef __KERNEL__
@@ -113,7 +138,7 @@ static inline int test_bit(int nr, const void * addr)
 
 static inline int __ilog2(unsigned int x)
 {
-       return generic_fls(x) - 1;
+       return fls(x) - 1;
 }
 
 #define ffz(x)  __ffs(~(x))
index 62cf80f3739c821aad74901466a786d1d758dc83..b1bcd3746625a53b66c41ff98fec3c41e4027244 100644 (file)
@@ -113,6 +113,11 @@ AFLAGS_REMOVE_memset.o := -mthumb -mthumb-interwork
 AFLAGS_REMOVE_memcpy.o := -mthumb -mthumb-interwork
 AFLAGS_memset.o := -DMEMSET_NO_THUMB_BUILD
 AFLAGS_memcpy.o := -DMEMCPY_NO_THUMB_BUILD
+
+# This is only necessary to force ARM mode on THUMB1 targets.
+ifneq ($(CONFIG_SYS_ARM_ARCH),4)
+obj-y   += bitops.o
+endif
 endif
 endif
 
diff --git a/arch/arm/lib/bitops.S b/arch/arm/lib/bitops.S
new file mode 100644 (file)
index 0000000..29d1524
--- /dev/null
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023 Sean Anderson <sean.anderson@seco.com>
+ *
+ * ARM bitops to call when using THUMB1, which doesn't have these instructions.
+ */
+#include <linux/linkage.h>
+#include <asm/assembler.h>
+
+.pushsection .text.__fls
+ENTRY(__fls)
+       clz     r0, r0
+       rsb     r0, r0, #31
+       ret     lr
+ENDPROC(__fls)
+.popsection
+
+.pushsection .text.__ffs
+ENTRY(__ffs)
+       rsb     r3, r0, #0
+       and     r0, r0, r3
+       clz     r0, r0
+       rsb     r0, r0, #31
+       ret     lr
+ENDPROC(__ffs)
+.popsection
+
+.pushsection .text.fls
+ENTRY(fls)
+       cmp     r0, #0
+       clzne   r0, r0
+       rsbne   r0, r0, #32
+       ret     lr
+ENDPROC(fls)
+.popsection
+
+.pushsection .text.ffs
+ENTRY(ffs)
+       rsb     r3, r0, #0
+       and     r0, r0, r3
+       clz     r0, r0
+       rsb     r0, r0, #32
+       ret     lr
+ENDPROC(ffs)
+.popsection
diff --git a/include/asm-generic/bitops/builtin-__ffs.h b/include/asm-generic/bitops/builtin-__ffs.h
new file mode 100644 (file)
index 0000000..87024da
--- /dev/null
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_BITOPS_BUILTIN___FFS_H_
+#define _ASM_GENERIC_BITOPS_BUILTIN___FFS_H_
+
+/**
+ * __ffs - find first bit in word.
+ * @word: The word to search
+ *
+ * Undefined if no bit exists, so code should check against 0 first.
+ */
+static __always_inline unsigned long __ffs(unsigned long word)
+{
+       return __builtin_ctzl(word);
+}
+
+#endif
diff --git a/include/asm-generic/bitops/builtin-__fls.h b/include/asm-generic/bitops/builtin-__fls.h
new file mode 100644 (file)
index 0000000..43a5aa9
--- /dev/null
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_BITOPS_BUILTIN___FLS_H_
+#define _ASM_GENERIC_BITOPS_BUILTIN___FLS_H_
+
+/**
+ * __fls - find last (most-significant) set bit in a long word
+ * @word: the word to search
+ *
+ * Undefined if no set bit exists, so code should check against 0 first.
+ */
+static __always_inline unsigned long __fls(unsigned long word)
+{
+       return (sizeof(word) * 8) - 1 - __builtin_clzl(word);
+}
+
+#endif
diff --git a/include/asm-generic/bitops/builtin-ffs.h b/include/asm-generic/bitops/builtin-ffs.h
new file mode 100644 (file)
index 0000000..7b12932
--- /dev/null
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_BITOPS_BUILTIN_FFS_H_
+#define _ASM_GENERIC_BITOPS_BUILTIN_FFS_H_
+
+/**
+ * ffs - find first bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as
+ * the libc and compiler builtin ffs routines, therefore
+ * differs in spirit from ffz (man ffs).
+ */
+#define ffs(x) __builtin_ffs(x)
+
+#endif
diff --git a/include/asm-generic/bitops/builtin-fls.h b/include/asm-generic/bitops/builtin-fls.h
new file mode 100644 (file)
index 0000000..c8455cc
--- /dev/null
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_GENERIC_BITOPS_BUILTIN_FLS_H_
+#define _ASM_GENERIC_BITOPS_BUILTIN_FLS_H_
+
+/**
+ * fls - find last (most-significant) bit set
+ * @x: the word to search
+ *
+ * This is defined the same way as ffs.
+ * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
+ */
+static __always_inline int fls(unsigned int x)
+{
+       return x ? sizeof(x) * 8 - __builtin_clz(x) : 0;
+}
+
+#endif