From: York Sun Date: Mon, 8 Oct 2012 07:44:30 +0000 (+0000) Subject: powerpc/mpc85xx: Rewrite spin table to comply with ePAPR v1.1 X-Git-Tag: v2025.01-rc5-pxa1908~16880^2~9 X-Git-Url: http://git.dujemihanovic.xyz/img/static/git-logo.png?a=commitdiff_plain;h=ffd06e0231ac3fd0c5810f39f6e23527948df1c7;p=u-boot.git powerpc/mpc85xx: Rewrite spin table to comply with ePAPR v1.1 Move spin table to cached memory to comply with ePAPR v1.1. Load R3 with 64-bit value if CONFIG_SYS_PPC64 is defined. 'M' bit is set for DDR TLB to maintain cache coherence. See details in doc/README.mpc85xx-spin-table. Signed-off-by: York Sun Signed-off-by: Andy Fleming --- diff --git a/README b/README index df4aed14e4..4dad1594eb 100644 --- a/README +++ b/README @@ -363,6 +363,12 @@ The following options need to be configured: ICache only when Code runs from RAM. - 85xx CPU Options: + CONFIG_SYS_PPC64 + + Specifies that the core is a 64-bit PowerPC implementation (implements + the "64" category of the Power ISA). This is necessary for ePAPR + compliance, among other possible reasons. + CONFIG_SYS_FSL_TBCLK_DIV Defines the core time base clock divider ratio compared to the diff --git a/arch/powerpc/cpu/mpc85xx/fdt.c b/arch/powerpc/cpu/mpc85xx/fdt.c index 8221481359..a364ef216a 100644 --- a/arch/powerpc/cpu/mpc85xx/fdt.c +++ b/arch/powerpc/cpu/mpc85xx/fdt.c @@ -47,7 +47,7 @@ extern void ft_srio_setup(void *blob); void ft_fixup_cpu(void *blob, u64 memory_limit) { int off; - ulong spin_tbl_addr = get_spin_phys_addr(); + phys_addr_t spin_tbl_addr = get_spin_phys_addr(); u32 bootpg = determine_mp_bootpg(NULL); u32 id = get_my_id(); const char *enable_method; @@ -97,7 +97,16 @@ void ft_fixup_cpu(void *blob, u64 memory_limit) if ((u64)bootpg < memory_limit) { off = fdt_add_mem_rsv(blob, bootpg, (u64)4096); if (off < 0) - printf("%s: %s\n", __FUNCTION__, fdt_strerror(off)); + printf("Failed to reserve memory for bootpg: %s\n", + fdt_strerror(off)); + } + /* Reserve spin table page */ + if (spin_tbl_addr < memory_limit) { + off = fdt_add_mem_rsv(blob, + (spin_tbl_addr & ~0xffful), 4096); + if (off < 0) + printf("Failed to reserve memory for spin table: %s\n", + fdt_strerror(off)); } } #endif diff --git a/arch/powerpc/cpu/mpc85xx/mp.c b/arch/powerpc/cpu/mpc85xx/mp.c index 881b681755..e1197ac9e5 100644 --- a/arch/powerpc/cpu/mpc85xx/mp.c +++ b/arch/powerpc/cpu/mpc85xx/mp.c @@ -33,6 +33,8 @@ DECLARE_GLOBAL_DATA_PTR; u32 fsl_ddr_get_intl3r(void); +extern u32 __spin_table[]; + u32 get_my_id() { return mfspr(SPRN_PIR); @@ -78,10 +80,10 @@ int cpu_status(int nr) return 0; if (nr == id) { - table = (u32 *)get_spin_virt_addr(); + table = (u32 *)&__spin_table; printf("table base @ 0x%p\n", table); } else { - table = (u32 *)get_spin_virt_addr() + nr * NUM_BOOT_ENTRY; + table = (u32 *)&__spin_table + nr * NUM_BOOT_ENTRY; printf("Running on cpu %d\n", id); printf("\n"); printf("table @ 0x%p\n", table); @@ -154,7 +156,7 @@ static u8 boot_entry_map[4] = { int cpu_release(int nr, int argc, char * const argv[]) { - u32 i, val, *table = (u32 *)get_spin_virt_addr() + nr * NUM_BOOT_ENTRY; + u32 i, val, *table = (u32 *)&__spin_table + nr * NUM_BOOT_ENTRY; u64 boot_addr; if (hold_cores_in_reset(1)) @@ -200,11 +202,11 @@ u32 determine_mp_bootpg(unsigned int *pagesize) struct law_entry e; #endif - /* if we have 4G or more of memory, put the boot page at 4Gb-4k */ - if ((u64)gd->ram_size > 0xfffff000) - bootpg = 0xfffff000; - else - bootpg = gd->ram_size - 4096; + + /* use last 4K of mapped memory */ + bootpg = ((gd->ram_size > CONFIG_MAX_MEM_MAPPED) ? + CONFIG_MAX_MEM_MAPPED : gd->ram_size) + + CONFIG_SYS_SDRAM_BASE - 4096; if (pagesize) *pagesize = 4096; @@ -255,29 +257,16 @@ u32 determine_mp_bootpg(unsigned int *pagesize) return bootpg; } -ulong get_spin_phys_addr(void) -{ - extern ulong __secondary_start_page; - extern ulong __spin_table; - - return (determine_mp_bootpg() + - (ulong)&__spin_table - (ulong)&__secondary_start_page); -} - -ulong get_spin_virt_addr(void) +phys_addr_t get_spin_phys_addr(void) { - extern ulong __secondary_start_page; - extern ulong __spin_table; - - return (CONFIG_BPTR_VIRT_ADDR + - (ulong)&__spin_table - (ulong)&__secondary_start_page); + return virt_to_phys(&__spin_table); } #ifdef CONFIG_FSL_CORENET static void plat_mp_up(unsigned long bootpg, unsigned int pagesize) { u32 cpu_up_mask, whoami, brsize = LAW_SIZE_4K; - u32 *table = (u32 *)get_spin_virt_addr(); + u32 *table = (u32 *)&__spin_table; volatile ccsr_gur_t *gur; volatile ccsr_local_t *ccm; volatile ccsr_rcpm_t *rcpm; @@ -356,7 +345,7 @@ static void plat_mp_up(unsigned long bootpg, unsigned int pagesize) static void plat_mp_up(unsigned long bootpg, unsigned int pagesize) { u32 up, cpu_up_mask, whoami; - u32 *table = (u32 *)get_spin_virt_addr(); + u32 *table = (u32 *)&__spin_table; volatile u32 bpcr; volatile ccsr_local_ecm_t *ecm = (void *)(CONFIG_SYS_MPC85xx_ECM_ADDR); volatile ccsr_gur_t *gur = (void *)(CONFIG_SYS_MPC85xx_GUTS_ADDR); @@ -440,10 +429,11 @@ void cpu_mp_lmb_reserve(struct lmb *lmb) void setup_mp(void) { - extern ulong __secondary_start_page; - extern ulong __bootpg_addr; + extern u32 __secondary_start_page; + extern u32 __bootpg_addr, __spin_table_addr, __second_half_boot_page; - ulong fixup = (ulong)&__secondary_start_page; + int i; + ulong fixup = (u32)&__secondary_start_page; u32 bootpg, bootpg_map, pagesize; bootpg = determine_mp_bootpg(&pagesize); @@ -464,11 +454,20 @@ void setup_mp(void) if (hold_cores_in_reset(0)) return; - /* Store the bootpg's SDRAM address for use by secondary CPU cores */ - __bootpg_addr = bootpg; + /* + * Store the bootpg's cache-able half address for use by secondary + * CPU cores to continue to boot + */ + __bootpg_addr = (u32)virt_to_phys(&__second_half_boot_page); + + /* Store spin table's physical address for use by secondary cores */ + __spin_table_addr = (u32)get_spin_phys_addr(); + + /* flush bootpg it before copying invalidate any staled cacheline */ + flush_cache(bootpg, 4096); /* look for the tlb covering the reset page, there better be one */ - int i = find_tlb_idx((void *)CONFIG_BPTR_VIRT_ADDR, 1); + i = find_tlb_idx((void *)CONFIG_BPTR_VIRT_ADDR, 1); /* we found a match */ if (i != -1) { diff --git a/arch/powerpc/cpu/mpc85xx/mp.h b/arch/powerpc/cpu/mpc85xx/mp.h index 87bac37152..ad9950bcf5 100644 --- a/arch/powerpc/cpu/mpc85xx/mp.h +++ b/arch/powerpc/cpu/mpc85xx/mp.h @@ -3,8 +3,7 @@ #include -ulong get_spin_phys_addr(void); -ulong get_spin_virt_addr(void); +phys_addr_t get_spin_phys_addr(void); u32 get_my_id(void); int hold_cores_in_reset(int verbose); @@ -16,7 +15,7 @@ int hold_cores_in_reset(int verbose); #define BOOT_ENTRY_PIR 5 #define BOOT_ENTRY_R6_UPPER 6 #define BOOT_ENTRY_R6_LOWER 7 -#define NUM_BOOT_ENTRY 8 +#define NUM_BOOT_ENTRY 16 /* pad to 64 bytes */ #define SIZE_BOOT_ENTRY (NUM_BOOT_ENTRY * sizeof(u32)) #endif diff --git a/arch/powerpc/cpu/mpc85xx/release.S b/arch/powerpc/cpu/mpc85xx/release.S index d08b1f3dca..4ba44a9028 100644 --- a/arch/powerpc/cpu/mpc85xx/release.S +++ b/arch/powerpc/cpu/mpc85xx/release.S @@ -150,10 +150,14 @@ __secondary_start_page: #define toreset(x) (x - __secondary_start_page + 0xfffff000) /* get our PIR to figure out our table entry */ - lis r3,toreset(__spin_table)@h - ori r3,r3,toreset(__spin_table)@l + lis r3,toreset(__spin_table_addr)@h + ori r3,r3,toreset(__spin_table_addr)@l + lwz r3,0(r3) - /* r10 has the base address for the entry */ + /* + * r10 has the base address for the entry. + * we cannot access it yet before setting up a new TLB + */ mfspr r0,SPRN_PIR #if defined(CONFIG_E6500) /* @@ -180,7 +184,7 @@ __secondary_start_page: #else mr r4,r0 #endif - slwi r8,r4,5 + slwi r8,r4,6 /* spin table is padded to 64 byte */ add r10,r3,r8 #ifdef CONFIG_E6500 @@ -277,73 +281,111 @@ __secondary_start_page: beq 2b #endif 3: - -#define EPAPR_MAGIC (0x45504150) -#define ENTRY_ADDR_UPPER 0 -#define ENTRY_ADDR_LOWER 4 -#define ENTRY_R3_UPPER 8 -#define ENTRY_R3_LOWER 12 -#define ENTRY_RESV 16 -#define ENTRY_PIR 20 -#define ENTRY_R6_UPPER 24 -#define ENTRY_R6_LOWER 28 -#define ENTRY_SIZE 32 - - /* setup the entry */ - li r3,0 - li r8,1 - stw r4,ENTRY_PIR(r10) - stw r3,ENTRY_ADDR_UPPER(r10) - stw r8,ENTRY_ADDR_LOWER(r10) - stw r3,ENTRY_R3_UPPER(r10) - stw r4,ENTRY_R3_LOWER(r10) - stw r3,ENTRY_R6_UPPER(r10) - stw r3,ENTRY_R6_LOWER(r10) - - /* load r13 with the address of the 'bootpg' in SDRAM */ - lis r13,toreset(__bootpg_addr)@h - ori r13,r13,toreset(__bootpg_addr)@l + /* setup mapping for the spin table, WIMGE=0b00100 */ + lis r13,toreset(__spin_table_addr)@h + ori r13,r13,toreset(__spin_table_addr)@l lwz r13,0(r13) + /* mask by 4K */ + rlwinm r13,r13,0,0,19 - /* setup mapping for AS = 1, and jump there */ lis r11,(MAS0_TLBSEL(1)|MAS0_ESEL(1))@h mtspr SPRN_MAS0,r11 lis r11,(MAS1_VALID|MAS1_IPROT)@h ori r11,r11,(MAS1_TS|MAS1_TSIZE(BOOKE_PAGESZ_4K))@l mtspr SPRN_MAS1,r11 - oris r11,r13,(MAS2_I|MAS2_G)@h - ori r11,r13,(MAS2_I|MAS2_G)@l + oris r11,r13,(MAS2_M|MAS2_G)@h + ori r11,r13,(MAS2_M|MAS2_G)@l mtspr SPRN_MAS2,r11 oris r11,r13,(MAS3_SX|MAS3_SW|MAS3_SR)@h ori r11,r13,(MAS3_SX|MAS3_SW|MAS3_SR)@l mtspr SPRN_MAS3,r11 + li r11,0 + mtspr SPRN_MAS7,r11 tlbwe - bl 1f -1: mflr r11 /* - * OR in 0xfff to create a mask of the bootpg SDRAM address. We use - * this mask to fixup the cpu spin table and the address that we want - * to jump to, eg change them from 0xfffffxxx to 0x7ffffxxx if the - * bootpg is at 0x7ffff000 in SDRAM. + * __bootpg_addr has the address of __second_half_boot_page + * jump there in AS=1 space with cache enabled */ - ori r13,r13,0xfff - and r11, r11, r13 - and r10, r10, r13 - - addi r11,r11,(2f-1b) + lis r13,toreset(__bootpg_addr)@h + ori r13,r13,toreset(__bootpg_addr)@l + lwz r11,0(r13) + mtspr SPRN_SRR0,r11 mfmsr r13 ori r12,r13,MSR_IS|MSR_DS@l - - mtspr SPRN_SRR0,r11 mtspr SPRN_SRR1,r12 rfi + /* + * Allocate some space for the SDRAM address of the bootpg. + * This variable has to be in the boot page so that it can + * be accessed by secondary cores when they come out of reset. + */ + .align L1_CACHE_SHIFT + .globl __bootpg_addr +__bootpg_addr: + .long 0 + + .global __spin_table_addr +__spin_table_addr: + .long 0 + + /* + * This variable is set by cpu_init_r() after parsing hwconfig + * to enable workaround for erratum NMG_CPU_A011. + */ + .align L1_CACHE_SHIFT + .global enable_cpu_a011_workaround +enable_cpu_a011_workaround: + .long 1 + + /* Fill in the empty space. The actual reset vector is + * the last word of the page */ +__secondary_start_code_end: + .space 4092 - (__secondary_start_code_end - __secondary_start_page) +__secondary_reset_vector: + b __secondary_start_page + + +/* this is a separated page for the spin table and cacheable boot code */ + .align L1_CACHE_SHIFT + .global __second_half_boot_page +__second_half_boot_page: +#define EPAPR_MAGIC 0x45504150 +#define ENTRY_ADDR_UPPER 0 +#define ENTRY_ADDR_LOWER 4 +#define ENTRY_R3_UPPER 8 +#define ENTRY_R3_LOWER 12 +#define ENTRY_RESV 16 +#define ENTRY_PIR 20 +#define ENTRY_SIZE 64 + /* + * setup the entry + * r10 has the base address of the spin table. + * spin table is defined as + * struct { + * uint64_t entry_addr; + * uint64_t r3; + * uint32_t rsvd1; + * uint32_t pir; + * }; + * we pad this struct to 64 bytes so each entry is in its own cacheline + */ + li r3,0 + li r8,1 + mfspr r4,SPRN_PIR + stw r3,ENTRY_ADDR_UPPER(r10) + stw r3,ENTRY_R3_UPPER(r10) + stw r4,ENTRY_R3_LOWER(r10) + stw r3,ENTRY_RESV(r10) + stw r4,ENTRY_PIR(r10) + msync + stw r8,ENTRY_ADDR_LOWER(r10) + /* spin waiting for addr */ -2: - lwz r4,ENTRY_ADDR_LOWER(r10) +3: lwz r4,ENTRY_ADDR_LOWER(r10) andi. r11,r4,1 - bne 2b + bne 3b isync /* setup IVORs to match fixed offsets */ @@ -362,8 +404,17 @@ __secondary_start_page: /* mask by ~64M to setup our tlb we will jump to */ rlwinm r12,r4,0,0,5 - /* setup r3, r4, r5, r6, r7, r8, r9 */ + /* + * setup r3, r4, r5, r6, r7, r8, r9 + * r3 contains the value to put in the r3 register at secondary cpu + * entry. The high 32-bits are ignored on 32-bit chip implementations. + * 64-bit chip implementations however shall load all 64-bits + */ +#ifdef CONFIG_SYS_PPC64 + ld r3,ENTRY_R3_UPPER(r10) +#else lwz r3,ENTRY_R3_LOWER(r10) +#endif li r4,0 li r5,0 li r6,0 @@ -404,32 +455,10 @@ __secondary_start_page: mtspr SPRN_SRR1,r13 rfi - /* - * Allocate some space for the SDRAM address of the bootpg. - * This variable has to be in the boot page so that it can - * be accessed by secondary cores when they come out of reset. - */ - .globl __bootpg_addr -__bootpg_addr: - .long 0 - .align L1_CACHE_SHIFT + .align 6 .globl __spin_table __spin_table: .space CONFIG_MAX_CPUS*ENTRY_SIZE - - /* - * This variable is set by cpu_init_r() after parsing hwconfig - * to enable workaround for erratum NMG_CPU_A011. - */ - .align L1_CACHE_SHIFT - .global enable_cpu_a011_workaround -enable_cpu_a011_workaround: - .long 1 - - /* Fill in the empty space. The actual reset vector is - * the last word of the page */ -__secondary_start_code_end: - .space 4092 - (__secondary_start_code_end - __secondary_start_page) -__secondary_reset_vector: - b __secondary_start_page +__spin_table_end: + .space 4096 - (__spin_table_end - __spin_table) diff --git a/arch/powerpc/cpu/mpc85xx/tlb.c b/arch/powerpc/cpu/mpc85xx/tlb.c index 929f6a607e..a548dec9a7 100644 --- a/arch/powerpc/cpu/mpc85xx/tlb.c +++ b/arch/powerpc/cpu/mpc85xx/tlb.c @@ -249,7 +249,7 @@ setup_ddr_tlbs_phys(phys_addr_t p_addr, unsigned int memsize_in_meg) { int i; unsigned int tlb_size; - unsigned int wimge = 0; + unsigned int wimge = MAS2_M; unsigned int ram_tlb_address = (unsigned int)CONFIG_SYS_DDR_SDRAM_BASE; unsigned int max_cam; u64 size, memsize = (u64)memsize_in_meg << 20; diff --git a/arch/powerpc/include/asm/config_mpc85xx.h b/arch/powerpc/include/asm/config_mpc85xx.h index 92ca2ad74d..8a7c81b808 100644 --- a/arch/powerpc/include/asm/config_mpc85xx.h +++ b/arch/powerpc/include/asm/config_mpc85xx.h @@ -416,6 +416,7 @@ #define CONFIG_SYS_FSL_ERRATUM_SRIO_A004034 #elif defined(CONFIG_PPC_P5020) /* also supports P5010 */ +#define CONFIG_SYS_PPC64 /* 64-bit core */ #define CONFIG_SYS_FSL_QORIQ_CHASSIS1 #define CONFIG_MAX_CPUS 2 #define CONFIG_SYS_FSL_NUM_CC_PLLS 2 @@ -485,6 +486,7 @@ #define CONFIG_SYS_FSL_ERRATUM_ESDHC111 #elif defined(CONFIG_PPC_T4240) +#define CONFIG_SYS_PPC64 /* 64-bit core */ #define CONFIG_FSL_CORENET /* Freescale CoreNet platform */ #define CONFIG_SYS_FSL_QORIQ_CHASSIS2 /* Freescale Chassis generation 2 */ #define CONFIG_SYS_FSL_QMAN_V3 /* QMAN version 3 */ @@ -516,6 +518,7 @@ #define CONFIG_SYS_CCSRBAR_DEFAULT 0xfe000000 #elif defined(CONFIG_PPC_B4860) +#define CONFIG_SYS_PPC64 /* 64-bit core */ #define CONFIG_FSL_CORENET /* Freescale CoreNet platform */ #define CONFIG_SYS_FSL_QORIQ_CHASSIS2 /* Freescale Chassis generation 2 */ #define CONFIG_SYS_FSL_QMAN_V3 /* QMAN version 3 */ diff --git a/doc/README.mpc85xx-spin-table b/doc/README.mpc85xx-spin-table new file mode 100644 index 0000000000..8da768a2a6 --- /dev/null +++ b/doc/README.mpc85xx-spin-table @@ -0,0 +1,26 @@ +Spin table in cache +===================================== +As specified by ePAPR v1.1, the spin table needs to be in cached memory. After +DDR is initialized and U-boot relocates itself into DDR, the spin table is +accessible for core 0. It is part of release.S, within 4KB range after +__secondary_start_page. For other cores to use the spin table, the booting +process is described below: + +Core 0 sets up the reset page on the top 4K of memory (or 4GB if total memory +is more than 4GB), and creates a TLB to map it to 0xffff_f000, regardless of +the physical address of this page, with WIMGE=0b01010. Core 0 also enables boot +page translation for secondary cores to use this page of memory. Then 4KB +memory is copied from __secondary_start_page to the boot page, after flusing +cache because this page is mapped as normal DDR. Before copying the reset page, +core 0 puts the physical address of the spin table (which is in release.S and +relocated to the top of mapped memory) into a variable __spin_table_addr so +that secondary cores can see it. + +When secondary cores boot up from 0xffff_f000 page, they only have one default +TLB. While booting, they set up another TLB in AS=1 space and jump into +the new space. The new TLB covers the physical address of the spin table page, +with WIMGE =0b00100. Now secondary cores can keep polling the spin table +without stress DDR bus because both the code and the spin table is in cache. + +For the above to work, DDR has to set the 'M' bit of WIMGE, in order to keep +cache coherence.