From 270f8710f92fff3911a830b6c0bcb6fd229ccddd Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 30 Aug 2021 15:05:23 +0200 Subject: [PATCH] crc32: Add crc32 implementation using __builtin_aarch64_crc32b ARMv8.0 has optional crc32 instruction for crc32 calculation. The instruction is mandatory since ARMv8.1. The crc32 calculation is faster using the dedicated instruction, e.g. 1.4 GHz iMX8MN gives: => time crc32 0x50000000 0x2000000 time: 0.126 seconds # crc32 instruction time: 0.213 seconds # software crc32 Add implementation using the compiler builtin wrapper for the crc32 instruction and enable it by default, since we don't support any platforms which do not implement this instruction. Signed-off-by: Marek Vasut Cc: Simon Glass [trini: Make crc32_table guarded by CONFIG_ARM64_CRC32] Signed-off-by: Tom Rini --- arch/arm/Kconfig | 10 ++++++++++ arch/arm/Makefile | 4 ++++ lib/crc32.c | 9 ++++++++- 3 files changed, 22 insertions(+), 1 deletion(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index f0fd57f8d6..95102d386b 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -9,6 +9,16 @@ config ARM64 select PHYS_64BIT select SYS_CACHE_SHIFT_6 +config ARM64_CRC32 + bool "Enable support for CRC32 instruction" + depends on ARM64 + default y + help + ARMv8 implements dedicated crc32 instruction for crc32 calculation. + This is faster than software crc32 calculation. This instruction may + not be present on all ARMv8.0, but is always present on ARMv8.1 and + newer. + config POSITION_INDEPENDENT bool "Generate position-independent pre-relocation code" depends on ARM64 || CPU_V7A diff --git a/arch/arm/Makefile b/arch/arm/Makefile index c68e598a67..ce977bf632 100644 --- a/arch/arm/Makefile +++ b/arch/arm/Makefile @@ -18,7 +18,11 @@ arch-$(CONFIG_CPU_V7A) =$(call cc-option, -march=armv7-a, \ $(call cc-option, -march=armv7)) arch-$(CONFIG_CPU_V7M) =-march=armv7-m arch-$(CONFIG_CPU_V7R) =-march=armv7-r +ifeq ($(CONFIG_ARM64_CRC32),y) +arch-$(CONFIG_ARM64) =-march=armv8-a+crc +else arch-$(CONFIG_ARM64) =-march=armv8-a +endif # On Tegra systems we must build SPL for the armv4 core on the device # but otherwise we can use the value in CONFIG_SYS_ARM_ARCH diff --git a/lib/crc32.c b/lib/crc32.c index f2acc107fe..5a3127e03a 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -84,7 +84,7 @@ static void __efi_runtime make_crc_table(void) } crc_table_empty = 0; } -#else +#elif !defined(CONFIG_ARM64_CRC32) /* ======================================================================== * Table of CRC-32's of all single-byte values (made by make_crc_table) */ @@ -184,6 +184,12 @@ const uint32_t * ZEXPORT get_crc_table() */ uint32_t __efi_runtime crc32_no_comp(uint32_t crc, const Bytef *buf, uInt len) { +#ifdef CONFIG_ARM64_CRC32 + crc = cpu_to_le32(crc); + while (len--) + crc = __builtin_aarch64_crc32b(crc, *buf++); + return le32_to_cpu(crc); +#else const uint32_t *tab = crc_table; const uint32_t *b =(const uint32_t *)buf; size_t rem_len; @@ -221,6 +227,7 @@ uint32_t __efi_runtime crc32_no_comp(uint32_t crc, const Bytef *buf, uInt len) } return le32_to_cpu(crc); +#endif } #undef DO_CRC -- 2.39.5