From 1e6ad55c058200010bb0649524a2c874e7049242 Mon Sep 17 00:00:00 2001
From: York Sun <yorksun@freescale.com>
Date: Wed, 26 Feb 2014 13:26:04 -0800
Subject: [PATCH] armv8/cache: Change cache invalidate and flush function

When SoC first boots up, we should invalidate the cache but not flush it.
We can use the same function for invalid and flush mostly, with a wrapper.

Invalidating large cache can ben slow on emulator, so we postpone doing
so until I-cache is enabled, and before enabling D-cache.

Signed-off-by: York Sun <yorksun@freescale.com>
CC: David Feng <fenghua@phytium.com.cn>
---
 arch/arm/cpu/armv8/cache.S    | 53 +++++++++++++++++++++++++----------
 arch/arm/cpu/armv8/cache_v8.c |  3 +-
 arch/arm/cpu/armv8/start.S    | 10 ++++---
 arch/arm/include/asm/system.h |  1 +
 4 files changed, 47 insertions(+), 20 deletions(-)

diff --git a/arch/arm/cpu/armv8/cache.S b/arch/arm/cpu/armv8/cache.S
index 546a83e8f8..249799cd01 100644
--- a/arch/arm/cpu/armv8/cache.S
+++ b/arch/arm/cpu/armv8/cache.S
@@ -19,11 +19,12 @@
  * clean and invalidate one level cache.
  *
  * x0: cache level
- * x1~x9: clobbered
+ * x1: 0 flush & invalidate, 1 invalidate only
+ * x2~x9: clobbered
  */
 ENTRY(__asm_flush_dcache_level)
-	lsl	x1, x0, #1
-	msr	csselr_el1, x1		/* select cache level */
+	lsl	x12, x0, #1
+	msr	csselr_el1, x12		/* select cache level */
 	isb				/* sync change of cssidr_el1 */
 	mrs	x6, ccsidr_el1		/* read the new cssidr_el1 */
 	and	x2, x6, #7		/* x2 <- log2(cache line size)-4 */
@@ -35,7 +36,7 @@ ENTRY(__asm_flush_dcache_level)
 	clz	w5, w4			/* bit position of #ways */
 	mov	x4, #0x7fff
 	and	x4, x4, x6, lsr #13	/* x4 <- max number of #sets */
-	/* x1 <- cache level << 1 */
+	/* x12 <- cache level << 1 */
 	/* x2 <- line length offset */
 	/* x3 <- number of cache ways - 1 */
 	/* x4 <- number of cache sets - 1 */
@@ -45,11 +46,14 @@ loop_set:
 	mov	x6, x3			/* x6 <- working copy of #ways */
 loop_way:
 	lsl	x7, x6, x5
-	orr	x9, x1, x7		/* map way and level to cisw value */
+	orr	x9, x12, x7		/* map way and level to cisw value */
 	lsl	x7, x4, x2
 	orr	x9, x9, x7		/* map set number to cisw value */
-	dc	cisw, x9		/* clean & invalidate by set/way */
-	subs	x6, x6, #1		/* decrement the way */
+	tbz	w1, #0, 1f
+	dc	isw, x9
+	b	2f
+1:	dc	cisw, x9		/* clean & invalidate by set/way */
+2:	subs	x6, x6, #1		/* decrement the way */
 	b.ge	loop_way
 	subs	x4, x4, #1		/* decrement the set */
 	b.ge	loop_set
@@ -58,11 +62,14 @@ loop_way:
 ENDPROC(__asm_flush_dcache_level)
 
 /*
- * void __asm_flush_dcache_all(void)
+ * void __asm_flush_dcache_all(int invalidate_only)
+ *
+ * x0: 0 flush & invalidate, 1 invalidate only
  *
  * clean and invalidate all data cache by SET/WAY.
  */
-ENTRY(__asm_flush_dcache_all)
+ENTRY(__asm_dcache_all)
+	mov	x1, x0
 	dsb	sy
 	mrs	x10, clidr_el1		/* read clidr_el1 */
 	lsr	x11, x10, #24
@@ -76,13 +83,13 @@ ENTRY(__asm_flush_dcache_all)
 	/* x15 <- return address */
 
 loop_level:
-	lsl	x1, x0, #1
-	add	x1, x1, x0		/* x0 <- tripled cache level */
-	lsr	x1, x10, x1
-	and	x1, x1, #7		/* x1 <- cache type */
-	cmp	x1, #2
+	lsl	x12, x0, #1
+	add	x12, x12, x0		/* x0 <- tripled cache level */
+	lsr	x12, x10, x12
+	and	x12, x12, #7		/* x12 <- cache type */
+	cmp	x12, #2
 	b.lt	skip			/* skip if no cache or icache */
-	bl	__asm_flush_dcache_level
+	bl	__asm_flush_dcache_level	/* x1 = 0 flush, 1 invalidate */
 skip:
 	add	x0, x0, #1		/* increment cache level */
 	cmp	x11, x0
@@ -96,8 +103,24 @@ skip:
 
 finished:
 	ret
+ENDPROC(__asm_dcache_all)
+
+ENTRY(__asm_flush_dcache_all)
+	mov	x16, lr
+	mov	x0, #0
+	bl	__asm_dcache_all
+	mov	lr, x16
+	ret
 ENDPROC(__asm_flush_dcache_all)
 
+ENTRY(__asm_invalidate_dcache_all)
+	mov	x16, lr
+	mov	x0, #0xffff
+	bl	__asm_dcache_all
+	mov	lr, x16
+	ret
+ENDPROC(__asm_invalidate_dcache_all)
+
 /*
  * void __asm_flush_dcache_range(start, end)
  *
diff --git a/arch/arm/cpu/armv8/cache_v8.c b/arch/arm/cpu/armv8/cache_v8.c
index 7acae1b0ac..a96ecda7e3 100644
--- a/arch/arm/cpu/armv8/cache_v8.c
+++ b/arch/arm/cpu/armv8/cache_v8.c
@@ -80,7 +80,7 @@ static void mmu_setup(void)
  */
 void invalidate_dcache_all(void)
 {
-	__asm_flush_dcache_all();
+	__asm_invalidate_dcache_all();
 }
 
 /*
@@ -177,6 +177,7 @@ int dcache_status(void)
 
 void icache_enable(void)
 {
+	__asm_invalidate_icache_all();
 	set_sctlr(get_sctlr() | CR_I);
 }
 
diff --git a/arch/arm/cpu/armv8/start.S b/arch/arm/cpu/armv8/start.S
index e70c51d43d..4f95289b5e 100644
--- a/arch/arm/cpu/armv8/start.S
+++ b/arch/arm/cpu/armv8/start.S
@@ -64,10 +64,12 @@ reset:
 	msr	cpacr_el1, x0			/* Enable FP/SIMD */
 0:
 
-	/* Cache/BPB/TLB Invalidate */
-	bl	__asm_flush_dcache_all		/* dCache clean&invalidate */
-	bl	__asm_invalidate_icache_all	/* iCache invalidate */
-	bl	__asm_invalidate_tlb_all	/* invalidate TLBs */
+	/*
+	 * Cache/BPB/TLB Invalidate
+	 * i-cache is invalidated before enabled in icache_enable()
+	 * tlb is invalidated before mmu is enabled in dcache_enable()
+	 * d-cache is invalidated before enabled in dcache_enable()
+	 */
 
 	/* Processor specific initialization */
 	bl	lowlevel_init
diff --git a/arch/arm/include/asm/system.h b/arch/arm/include/asm/system.h
index 4178f8cf7e..74ee9a4df9 100644
--- a/arch/arm/include/asm/system.h
+++ b/arch/arm/include/asm/system.h
@@ -66,6 +66,7 @@ static inline void set_sctlr(unsigned int val)
 }
 
 void __asm_flush_dcache_all(void);
+void __asm_invalidate_dcache_all(void);
 void __asm_flush_dcache_range(u64 start, u64 end);
 void __asm_invalidate_tlb_all(void);
 void __asm_invalidate_icache_all(void);
-- 
2.39.5