From c1094987d1dc4701646c5f3aa254e7a92a02d305 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Marek=20Beh=C3=BAn?= Date: Thu, 20 May 2021 13:24:03 +0200 Subject: [PATCH] build: support building with Link Time Optimizations MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit Add plumbing for building U-Boot with Link Time Optimizations. When building with LTO, $(PLATFORM_LIBS) has to be in --whole-archive / --no-whole-archive group, otherwise some functions declared in assembly may not be resolved and linking may fail. Note: clang may throw away linker list symbols it thinks are unused when compiling with LTO. To force these symbols to be included, we refer to them via the __ADDRESSABLE macro in a C file generated from compiled built-in.o files before linking. Signed-off-by: Marek Behún Reviewed-by: Simon Glass --- Kbuild | 2 + Kconfig | 24 ++++++++++ Makefile | 67 ++++++++++++++++++++++++++- scripts/Makefile.lib | 3 ++ scripts/Makefile.spl | 44 +++++++++++++++++- scripts/gen_ll_addressable_symbols.sh | 12 +++++ 6 files changed, 149 insertions(+), 3 deletions(-) create mode 100755 scripts/gen_ll_addressable_symbols.sh diff --git a/Kbuild b/Kbuild index 1eac091594..bf52e54051 100644 --- a/Kbuild +++ b/Kbuild @@ -10,6 +10,8 @@ generic-offsets-file := include/generated/generic-asm-offsets.h always := $(generic-offsets-file) targets := lib/asm-offsets.s +CFLAGS_REMOVE_asm-offsets.o := $(LTO_CFLAGS) + $(obj)/$(generic-offsets-file): $(obj)/lib/asm-offsets.s FORCE $(call filechk,offsets,__GENERIC_ASM_OFFSETS_H__) diff --git a/Kconfig b/Kconfig index 86f0a39bb0..f8c1a77bed 100644 --- a/Kconfig +++ b/Kconfig @@ -85,6 +85,30 @@ config SPL_OPTIMIZE_INLINING do what it thinks is best, which is desirable in some cases for size reasons. +config ARCH_SUPPORTS_LTO + bool + +config LTO + bool "Enable Link Time Optimizations" + depends on ARCH_SUPPORTS_LTO + default n + help + This option enables Link Time Optimization (LTO), a mechanism which + allows the compiler to optimize between different compilation units. + + This can optimize away dead code paths, resulting in smaller binary + size (if CC_OPTIMIZE_FOR_SIZE is enabled). + + This option is not available for every architecture and may + introduce bugs. + + Currently, when compiling with GCC, due to a weird bug regarding + jobserver, the final linking will not respect make's --jobs argument. + Instead all available processors will be used (as reported by the + nproc command). + + If unsure, say n. + config TPL_OPTIMIZE_INLINING bool "Allow compiler to uninline functions marked 'inline' in TPL" depends on TPL diff --git a/Makefile b/Makefile index 03f74abe43..d2fdac7d31 100644 --- a/Makefile +++ b/Makefile @@ -676,6 +676,31 @@ else KBUILD_CFLAGS += -O2 endif +LTO_CFLAGS := +LTO_FINAL_LDFLAGS := +export LTO_CFLAGS LTO_FINAL_LDFLAGS +ifdef CONFIG_LTO + ifeq ($(cc-name),clang) + LTO_CFLAGS += -flto + LTO_FINAL_LDFLAGS += -flto + + AR = $(shell $(CC) -print-prog-name=llvm-ar) + NM = $(shell $(CC) -print-prog-name=llvm-nm) + else + NPROC := $(shell nproc 2>/dev/null || echo 1) + LTO_CFLAGS += -flto=$(NPROC) + LTO_FINAL_LDFLAGS += -fuse-linker-plugin -flto=$(NPROC) + + # use plugin aware tools + AR = $(CROSS_COMPILE)gcc-ar + NM = $(CROSS_COMPILE)gcc-nm + endif + + CFLAGS_NON_EFI += $(LTO_CFLAGS) + + KBUILD_CFLAGS += $(LTO_CFLAGS) +endif + ifeq ($(CONFIG_STACKPROTECTOR),y) KBUILD_CFLAGS += $(call cc-option,-fstack-protector-strong) CFLAGS_EFI += $(call cc-option,-fno-stack-protector) @@ -1708,8 +1733,45 @@ u-boot-swap.bin: u-boot.bin FORCE ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(ARCH)/Makefile.postlink) +# Generate linker list symbols references to force compiler to not optimize +# them away when compiling with LTO +ifdef CONFIG_LTO +u-boot-keep-syms-lto := keep-syms-lto.o +u-boot-keep-syms-lto_c := $(patsubst %.o,%.c,$(u-boot-keep-syms-lto)) + +quiet_cmd_keep_syms_lto = KSL $@ + cmd_keep_syms_lto = \ + NM=$(NM) $(srctree)/scripts/gen_ll_addressable_symbols.sh $^ >$@ + +quiet_cmd_keep_syms_lto_cc = KSLCC $@ + cmd_keep_syms_lto_cc = \ + $(CC) $(filter-out $(LTO_CFLAGS),$(c_flags)) -c -o $@ $< + +$(u-boot-keep-syms-lto_c): $(u-boot-main) + $(call if_changed,keep_syms_lto) +$(u-boot-keep-syms-lto): $(u-boot-keep-syms-lto_c) + $(call if_changed,keep_syms_lto_cc) +else +u-boot-keep-syms-lto := +endif + # Rule to link u-boot # May be overridden by arch/$(ARCH)/config.mk +ifdef CONFIG_LTO +quiet_cmd_u-boot__ ?= LTO $@ + cmd_u-boot__ ?= \ + $(CC) -nostdlib -nostartfiles \ + $(LTO_FINAL_LDFLAGS) $(c_flags) \ + $(KBUILD_LDFLAGS:%=-Wl,%) $(LDFLAGS_u-boot:%=-Wl,%) -o $@ \ + -T u-boot.lds $(u-boot-init) \ + -Wl,--whole-archive \ + $(u-boot-main) \ + $(u-boot-keep-syms-lto) \ + $(PLATFORM_LIBS) \ + -Wl,--no-whole-archive \ + -Wl,-Map,u-boot.map; \ + $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true) +else quiet_cmd_u-boot__ ?= LD $@ cmd_u-boot__ ?= $(LD) $(KBUILD_LDFLAGS) $(LDFLAGS_u-boot) -o $@ \ -T u-boot.lds $(u-boot-init) \ @@ -1718,6 +1780,7 @@ quiet_cmd_u-boot__ ?= LD $@ --no-whole-archive \ $(PLATFORM_LIBS) -Map u-boot.map; \ $(if $(ARCH_POSTLINK), $(MAKE) -f $(ARCH_POSTLINK) $@, true) +endif quiet_cmd_smap = GEN common/system_map.o cmd_smap = \ @@ -1726,7 +1789,7 @@ cmd_smap = \ $(CC) $(c_flags) -DSYSTEM_MAP="\"$${smap}\"" \ -c $(srctree)/common/system_map.c -o common/system_map.o -u-boot: $(u-boot-init) $(u-boot-main) u-boot.lds FORCE +u-boot: $(u-boot-init) $(u-boot-main) $(u-boot-keep-syms-lto) u-boot.lds FORCE +$(call if_changed,u-boot__) ifeq ($(CONFIG_KALLSYMS),y) $(call cmd,smap) @@ -2009,7 +2072,7 @@ CLEAN_FILES += include/bmp_logo.h include/bmp_logo_data.h tools/version.h \ boot* u-boot* MLO* SPL System.map fit-dtb.blob* \ u-boot-ivt.img.log u-boot-dtb.imx.log SPL.log u-boot.imx.log \ lpc32xx-* bl31.c bl31.elf bl31_*.bin image.map tispl.bin* \ - idbloader.img flash.bin flash.log defconfig + idbloader.img flash.bin flash.log defconfig keep-syms-lto.c # Directories & files removed with 'make mrproper' MRPROPER_DIRS += include/config include/generated spl tpl \ diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib index 78543c6dd1..78bbebe7e9 100644 --- a/scripts/Makefile.lib +++ b/scripts/Makefile.lib @@ -419,6 +419,9 @@ $(obj)/%_efi.so: $(obj)/%.o $(obj)/efi_crt0.o $(obj)/efi_reloc.o $(obj)/efi_free targets += $(obj)/efi_crt0.o $(obj)/efi_reloc.o $(obj)/efi_freestanding.o +CFLAGS_REMOVE_efi_reloc.o := $(LTO_CFLAGS) +CFLAGS_REMOVE_efi_freestanding.o := $(LTO_CFLAGS) + # ACPI # --------------------------------------------------------------------------- # diff --git a/scripts/Makefile.spl b/scripts/Makefile.spl index ac2d2033ba..7872cbaabe 100644 --- a/scripts/Makefile.spl +++ b/scripts/Makefile.spl @@ -448,8 +448,48 @@ quiet_cmd_sym ?= SYM $@ $(obj)/$(SPL_BIN).sym: $(obj)/$(SPL_BIN) FORCE $(call if_changed,sym) +# Generate linker list symbols references to force compiler to not optimize +# them away when compiling with LTO +ifdef CONFIG_LTO +u-boot-spl-keep-syms-lto := $(obj)/keep-syms-lto.o +u-boot-spl-keep-syms-lto_c := \ + $(patsubst $(obj)/%.o,$(obj)/%.c,$(u-boot-spl-keep-syms-lto)) + +quiet_cmd_keep_syms_lto = KSL $@ + cmd_keep_syms_lto = \ + NM=$(NM) $(srctree)/scripts/gen_ll_addressable_symbols.sh $^ >$@ + +quiet_cmd_keep_syms_lto_cc = KSLCC $@ + cmd_keep_syms_lto_cc = \ + $(CC) $(filter-out $(LTO_CFLAGS),$(c_flags)) -c -o $@ $< + +$(u-boot-spl-keep-syms-lto_c): $(u-boot-spl-main) $(u-boot-spl-platdata) + $(call if_changed,keep_syms_lto) +$(u-boot-spl-keep-syms-lto): $(u-boot-spl-keep-syms-lto_c) + $(call if_changed,keep_syms_lto_cc) +else +u-boot-spl-keep-syms-lto := +endif + # Rule to link u-boot-spl # May be overridden by arch/$(ARCH)/config.mk +ifdef CONFIG_LTO +quiet_cmd_u-boot-spl ?= LTO $@ + cmd_u-boot-spl ?= \ + ( \ + cd $(obj) && \ + $(CC) -nostdlib -nostartfiles $(LTO_FINAL_LDFLAGS) $(c_flags) \ + $(KBUILD_LDFLAGS:%=-Wl,%) $(LDFLAGS_$(@F):%=-Wl,%) \ + $(patsubst $(obj)/%,%,$(u-boot-spl-init)) \ + -Wl,--whole-archive \ + $(patsubst $(obj)/%,%,$(u-boot-spl-main)) \ + $(patsubst $(obj)/%,%,$(u-boot-spl-platdata)) \ + $(patsubst $(obj)/%,%,$(u-boot-spl-keep-syms-lto)) \ + $(PLATFORM_LIBS) \ + -Wl,--no-whole-archive \ + -Wl,-Map,$(SPL_BIN).map -o $(SPL_BIN) \ + ) +else quiet_cmd_u-boot-spl ?= LD $@ cmd_u-boot-spl ?= \ ( \ @@ -462,9 +502,11 @@ quiet_cmd_u-boot-spl ?= LD $@ --no-whole-archive \ $(PLATFORM_LIBS) -Map $(SPL_BIN).map -o $(SPL_BIN) \ ) +endif $(obj)/$(SPL_BIN): $(u-boot-spl-platdata) $(u-boot-spl-init) \ - $(u-boot-spl-main) $(obj)/u-boot-spl.lds FORCE + $(u-boot-spl-main) $(u-boot-spl-keep-syms-lto) \ + $(obj)/u-boot-spl.lds FORCE $(call if_changed,u-boot-spl) $(sort $(u-boot-spl-init) $(u-boot-spl-main)): $(u-boot-spl-dirs) ; diff --git a/scripts/gen_ll_addressable_symbols.sh b/scripts/gen_ll_addressable_symbols.sh new file mode 100755 index 0000000000..3978a39d97 --- /dev/null +++ b/scripts/gen_ll_addressable_symbols.sh @@ -0,0 +1,12 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# Copyright (C) 2020 Marek Behún + +# Generate __ADDRESSABLE(symbol) for every linker list entry symbol, so that LTO +# does not optimize these symbols away + +set -e + +echo '#include ' +$NM "$@" 2>/dev/null | grep -oe '_u_boot_list_2_[a-zA-Z0-9_]*_2_[a-zA-Z0-9_]*' | \ + sort -u | sed -e 's/^\(.*\)/extern char \1[];\n__ADDRESSABLE(\1);/' -- 2.39.5