]> git.dujemihanovic.xyz Git - u-boot.git/commitdiff
sunxi: video: Use frontend for dma on sun4i to fix memory bandwidth problems
authorHans de Goede <hdegoede@redhat.com>
Mon, 19 Jan 2015 07:44:07 +0000 (08:44 +0100)
committerHans de Goede <hdegoede@redhat.com>
Thu, 22 Jan 2015 11:34:56 +0000 (12:34 +0100)
Testing has shown that on sun4i the display backend engine does not have
deep enough fifo-s causing flickering / tearing in full-hd mode due to
fifo underruns. On sun4i use the display frontend engine to do the dma from
memory, as the frontend does have deep enough fifo-s.

As added advantage of this is that it results in much better memory bandwidth
as it reduces the amount of dram bank switches, for more details see:

http://ssvb.github.io/2014/11/11/revisiting-fullhd-x11-desktop-performance-of-the-allwinner-a10.html

Note that this changes the pipeline searched for in the simplefb node, we can
get away with doing this now, since no kernel has yet shipped with simplefb
dtb nodes, and I will make sure to get a simplefb node with the new pipeline
into 3.19 before it ships.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Acked-by: Ian Campbell <ijc@hellion.org.uk>
arch/arm/include/asm/arch-sunxi/clock_sun4i.h
arch/arm/include/asm/arch-sunxi/display.h
drivers/video/sunxi_display.c

index 5ebf8564d452c5baeaf6943497fb16798807a02f..05fbad3e111a59139f5b8106005e1fba9891943a 100644 (file)
@@ -186,6 +186,7 @@ struct sunxi_ccm_reg {
 
 /* ahb clock gate bit offset (second register) */
 #define AHB_GATE_OFFSET_GMAC           17
+#define AHB_GATE_OFFSET_DE_FE0         14
 #define AHB_GATE_OFFSET_DE_BE0         12
 #define AHB_GATE_OFFSET_HDMI           11
 #define AHB_GATE_OFFSET_LCD1           5
@@ -266,7 +267,10 @@ struct sunxi_ccm_reg {
 #define CCM_MMC_CTRL_PLL5              (0x2 << 24)
 #define CCM_MMC_CTRL_ENABLE            (0x1 << 31)
 
+#define CCM_DRAM_GATE_OFFSET_DE_FE1    24 /* Note the order of FE1 and */
+#define CCM_DRAM_GATE_OFFSET_DE_FE0    25 /* FE0 is swapped ! */
 #define CCM_DRAM_GATE_OFFSET_DE_BE0    26
+#define CCM_DRAM_GATE_OFFSET_DE_BE1    27
 
 #define CCM_LCD_CH0_CTRL_PLL3          (0 << 24)
 #define CCM_LCD_CH0_CTRL_PLL7          (1 << 24)
index ff92a10c40122b067a12625c343867809b2b26b6..5e9425320366d96e44a1512baba2780108e66095 100644 (file)
@@ -9,6 +9,107 @@
 #ifndef _SUNXI_DISPLAY_H
 #define _SUNXI_DISPLAY_H
 
+struct sunxi_de_fe_reg {
+       u32 enable;                     /* 0x000 */
+       u32 frame_ctrl;                 /* 0x004 */
+       u32 bypass;                     /* 0x008 */
+       u32 algorithm_sel;              /* 0x00c */
+       u32 line_int_ctrl;              /* 0x010 */
+       u8 res0[0x0c];                  /* 0x014 */
+       u32 ch0_addr;                   /* 0x020 */
+       u32 ch1_addr;                   /* 0x024 */
+       u32 ch2_addr;                   /* 0x028 */
+       u32 field_sequence;             /* 0x02c */
+       u32 ch0_offset;                 /* 0x030 */
+       u32 ch1_offset;                 /* 0x034 */
+       u32 ch2_offset;                 /* 0x038 */
+       u8 res1[0x04];                  /* 0x03c */
+       u32 ch0_stride;                 /* 0x040 */
+       u32 ch1_stride;                 /* 0x044 */
+       u32 ch2_stride;                 /* 0x048 */
+       u32 input_fmt;                  /* 0x04c */
+       u32 ch3_addr;                   /* 0x050 */
+       u32 ch4_addr;                   /* 0x054 */
+       u32 ch5_addr;                   /* 0x058 */
+       u32 output_fmt;                 /* 0x05c */
+       u32 int_enable;                 /* 0x060 */
+       u32 int_status;                 /* 0x064 */
+       u32 status;                     /* 0x068 */
+       u8 res2[0x04];                  /* 0x06c */
+       u32 csc_coef00;                 /* 0x070 */
+       u32 csc_coef01;                 /* 0x074 */
+       u32 csc_coef02;                 /* 0x078 */
+       u32 csc_coef03;                 /* 0x07c */
+       u32 csc_coef10;                 /* 0x080 */
+       u32 csc_coef11;                 /* 0x084 */
+       u32 csc_coef12;                 /* 0x088 */
+       u32 csc_coef13;                 /* 0x08c */
+       u32 csc_coef20;                 /* 0x090 */
+       u32 csc_coef21;                 /* 0x094 */
+       u32 csc_coef22;                 /* 0x098 */
+       u32 csc_coef23;                 /* 0x09c */
+       u32 deinterlace_ctrl;           /* 0x0a0 */
+       u32 deinterlace_diag;           /* 0x0a4 */
+       u32 deinterlace_tempdiff;       /* 0x0a8 */
+       u32 deinterlace_sawtooth;       /* 0x0ac */
+       u32 deinterlace_spatcomp;       /* 0x0b0 */
+       u32 deinterlace_burstlen;       /* 0x0b4 */
+       u32 deinterlace_preluma;        /* 0x0b8 */
+       u32 deinterlace_tile_addr;      /* 0x0bc */
+       u32 deinterlace_tile_stride;    /* 0x0c0 */
+       u8 res3[0x0c];                  /* 0x0c4 */
+       u32 wb_stride_enable;           /* 0x0d0 */
+       u32 ch3_stride;                 /* 0x0d4 */
+       u32 ch4_stride;                 /* 0x0d8 */
+       u32 ch5_stride;                 /* 0x0dc */
+       u32 fe_3d_ctrl;                 /* 0x0e0 */
+       u32 fe_3d_ch0_addr;             /* 0x0e4 */
+       u32 fe_3d_ch1_addr;             /* 0x0e8 */
+       u32 fe_3d_ch2_addr;             /* 0x0ec */
+       u32 fe_3d_ch0_offset;           /* 0x0f0 */
+       u32 fe_3d_ch1_offset;           /* 0x0f4 */
+       u32 fe_3d_ch2_offset;           /* 0x0f8 */
+       u8 res4[0x04];                  /* 0x0fc */
+       u32 ch0_insize;                 /* 0x100 */
+       u32 ch0_outsize;                /* 0x104 */
+       u32 ch0_horzfact;               /* 0x108 */
+       u32 ch0_vertfact;               /* 0x10c */
+       u32 ch0_horzphase;              /* 0x110 */
+       u32 ch0_vertphase0;             /* 0x114 */
+       u32 ch0_vertphase1;             /* 0x118 */
+       u8 res5[0x04];                  /* 0x11c */
+       u32 ch0_horztapoffset0;         /* 0x120 */
+       u32 ch0_horztapoffset1;         /* 0x124 */
+       u32 ch0_verttapoffset;          /* 0x128 */
+       u8 res6[0xd4];                  /* 0x12c */
+       u32 ch1_insize;                 /* 0x200 */
+       u32 ch1_outsize;                /* 0x204 */
+       u32 ch1_horzfact;               /* 0x208 */
+       u32 ch1_vertfact;               /* 0x20c */
+       u32 ch1_horzphase;              /* 0x210 */
+       u32 ch1_vertphase0;             /* 0x214 */
+       u32 ch1_vertphase1;             /* 0x218 */
+       u8 res7[0x04];                  /* 0x21c */
+       u32 ch1_horztapoffset0;         /* 0x220 */
+       u32 ch1_horztapoffset1;         /* 0x224 */
+       u32 ch1_verttapoffset;          /* 0x228 */
+       u8 res8[0x1d4];                 /* 0x22c */
+       u32 ch0_horzcoef0[32];          /* 0x400 */
+       u32 ch0_horzcoef1[32];          /* 0x480 */
+       u32 ch0_vertcoef[32];           /* 0x500 */
+       u8 res9[0x80];                  /* 0x580 */
+       u32 ch1_horzcoef0[32];          /* 0x600 */
+       u32 ch1_horzcoef1[32];          /* 0x680 */
+       u32 ch1_vertcoef[32];           /* 0x700 */
+       u8 res10[0x280];                /* 0x780 */
+       u32 vpp_enable;                 /* 0xa00 */
+       u32 vpp_dcti;                   /* 0xa04 */
+       u32 vpp_lp1;                    /* 0xa08 */
+       u32 vpp_lp2;                    /* 0xa0c */
+       u32 vpp_wle;                    /* 0xa10 */
+       u32 vpp_ble;                    /* 0xa14 */
+};
+
 struct sunxi_de_be_reg {
        u8 res0[0x800];                 /* 0x000 */
        u32 mode;                       /* 0x800 */
@@ -209,6 +310,20 @@ struct sunxi_tve_reg {
        u32 cfg2;                       /* 0x13c */
 };
 
+/*
+ * DE-FE register constants.
+ */
+#define SUNXI_DE_FE_WIDTH(x)                   (((x) - 1) << 0)
+#define SUNXI_DE_FE_HEIGHT(y)                  (((y) - 1) << 16)
+#define SUNXI_DE_FE_FACTOR_INT(n)              ((n) << 16)
+#define SUNXI_DE_FE_ENABLE_EN                  (1 << 0)
+#define SUNXI_DE_FE_FRAME_CTRL_REG_RDY         (1 << 0)
+#define SUNXI_DE_FE_FRAME_CTRL_COEF_RDY                (1 << 1)
+#define SUNXI_DE_FE_FRAME_CTRL_FRM_START       (1 << 16)
+#define SUNXI_DE_FE_BYPASS_CSC_BYPASS          (1 << 1)
+#define SUNXI_DE_FE_INPUT_FMT_ARGB8888         0x00000151
+#define SUNXI_DE_FE_OUTPUT_FMT_ARGB8888                0x00000002
+
 /*
  * DE-BE register constants.
  */
@@ -219,6 +334,7 @@ struct sunxi_tve_reg {
 #define SUNXI_DE_BE_MODE_LAYER0_ENABLE         (1 << 8)
 #define SUNXI_DE_BE_LAYER_STRIDE(x)            ((x) << 5)
 #define SUNXI_DE_BE_REG_CTRL_LOAD_REGS         (1 << 0)
+#define SUNXI_DE_BE_LAYER_ATTR0_SRC_FE0                0x00000002
 #define SUNXI_DE_BE_LAYER_ATTR1_FMT_XRGB8888   (0x09 << 8)
 
 /*
index 0505f3c96398578c91aad5d77a601fb74d840350..a6e3778ffe799ac63b4df9c829fd3b210404d229 100644 (file)
@@ -271,6 +271,114 @@ static int sunxi_hdmi_edid_get_mode(struct ctfb_res_modes *mode)
 
 #endif /* CONFIG_VIDEO_HDMI */
 
+#ifdef CONFIG_MACH_SUN4I
+/*
+ * Testing has shown that on sun4i the display backend engine does not have
+ * deep enough fifo-s causing flickering / tearing in full-hd mode due to
+ * fifo underruns. So on sun4i we use the display frontend engine to do the
+ * dma from memory, as the frontend does have deep enough fifo-s.
+ */
+
+static const u32 sun4i_vert_coef[32] = {
+       0x00004000, 0x000140ff, 0x00033ffe, 0x00043ffd,
+       0x00063efc, 0xff083dfc, 0x000a3bfb, 0xff0d39fb,
+       0xff0f37fb, 0xff1136fa, 0xfe1433fb, 0xfe1631fb,
+       0xfd192ffb, 0xfd1c2cfb, 0xfd1f29fb, 0xfc2127fc,
+       0xfc2424fc, 0xfc2721fc, 0xfb291ffd, 0xfb2c1cfd,
+       0xfb2f19fd, 0xfb3116fe, 0xfb3314fe, 0xfa3611ff,
+       0xfb370fff, 0xfb390dff, 0xfb3b0a00, 0xfc3d08ff,
+       0xfc3e0600, 0xfd3f0400, 0xfe3f0300, 0xff400100,
+};
+
+static const u32 sun4i_horz_coef[64] = {
+       0x40000000, 0x00000000, 0x40fe0000, 0x0000ff03,
+       0x3ffd0000, 0x0000ff05, 0x3ffc0000, 0x0000ff06,
+       0x3efb0000, 0x0000ff08, 0x3dfb0000, 0x0000ff09,
+       0x3bfa0000, 0x0000fe0d, 0x39fa0000, 0x0000fe0f,
+       0x38fa0000, 0x0000fe10, 0x36fa0000, 0x0000fe12,
+       0x33fa0000, 0x0000fd16, 0x31fa0000, 0x0000fd18,
+       0x2ffa0000, 0x0000fd1a, 0x2cfa0000, 0x0000fc1e,
+       0x29fa0000, 0x0000fc21, 0x27fb0000, 0x0000fb23,
+       0x24fb0000, 0x0000fb26, 0x21fb0000, 0x0000fb29,
+       0x1ffc0000, 0x0000fa2b, 0x1cfc0000, 0x0000fa2e,
+       0x19fd0000, 0x0000fa30, 0x16fd0000, 0x0000fa33,
+       0x14fd0000, 0x0000fa35, 0x11fe0000, 0x0000fa37,
+       0x0ffe0000, 0x0000fa39, 0x0dfe0000, 0x0000fa3b,
+       0x0afe0000, 0x0000fa3e, 0x08ff0000, 0x0000fb3e,
+       0x06ff0000, 0x0000fb40, 0x05ff0000, 0x0000fc40,
+       0x03ff0000, 0x0000fd41, 0x01ff0000, 0x0000fe42,
+};
+
+static void sunxi_frontend_init(void)
+{
+       struct sunxi_ccm_reg * const ccm =
+               (struct sunxi_ccm_reg *)SUNXI_CCM_BASE;
+       struct sunxi_de_fe_reg * const de_fe =
+               (struct sunxi_de_fe_reg *)SUNXI_DE_FE0_BASE;
+       int i;
+
+       /* Clocks on */
+       setbits_le32(&ccm->ahb_gate1, 1 << AHB_GATE_OFFSET_DE_FE0);
+       setbits_le32(&ccm->dram_clk_gate, 1 << CCM_DRAM_GATE_OFFSET_DE_FE0);
+       clock_set_de_mod_clock(&ccm->fe0_clk_cfg, 300000000);
+
+       setbits_le32(&de_fe->enable, SUNXI_DE_FE_ENABLE_EN);
+
+       for (i = 0; i < 32; i++) {
+               writel(sun4i_horz_coef[2 * i], &de_fe->ch0_horzcoef0[i]);
+               writel(sun4i_horz_coef[2 * i + 1], &de_fe->ch0_horzcoef1[i]);
+               writel(sun4i_vert_coef[i], &de_fe->ch0_vertcoef[i]);
+               writel(sun4i_horz_coef[2 * i], &de_fe->ch1_horzcoef0[i]);
+               writel(sun4i_horz_coef[2 * i + 1], &de_fe->ch1_horzcoef1[i]);
+               writel(sun4i_vert_coef[i], &de_fe->ch1_vertcoef[i]);
+       }
+
+       setbits_le32(&de_fe->frame_ctrl, SUNXI_DE_FE_FRAME_CTRL_COEF_RDY);
+}
+
+static void sunxi_frontend_mode_set(const struct ctfb_res_modes *mode,
+                                   unsigned int address)
+{
+       struct sunxi_de_fe_reg * const de_fe =
+               (struct sunxi_de_fe_reg *)SUNXI_DE_FE0_BASE;
+
+       setbits_le32(&de_fe->bypass, SUNXI_DE_FE_BYPASS_CSC_BYPASS);
+       writel(CONFIG_SYS_SDRAM_BASE + address, &de_fe->ch0_addr);
+       writel(mode->xres * 4, &de_fe->ch0_stride);
+       writel(SUNXI_DE_FE_INPUT_FMT_ARGB8888, &de_fe->input_fmt);
+       writel(SUNXI_DE_FE_OUTPUT_FMT_ARGB8888, &de_fe->output_fmt);
+
+       writel(SUNXI_DE_FE_HEIGHT(mode->yres) | SUNXI_DE_FE_WIDTH(mode->xres),
+              &de_fe->ch0_insize);
+       writel(SUNXI_DE_FE_HEIGHT(mode->yres) | SUNXI_DE_FE_WIDTH(mode->xres),
+              &de_fe->ch0_outsize);
+       writel(SUNXI_DE_FE_FACTOR_INT(1), &de_fe->ch0_horzfact);
+       writel(SUNXI_DE_FE_FACTOR_INT(1), &de_fe->ch0_vertfact);
+
+       writel(SUNXI_DE_FE_HEIGHT(mode->yres) | SUNXI_DE_FE_WIDTH(mode->xres),
+              &de_fe->ch1_insize);
+       writel(SUNXI_DE_FE_HEIGHT(mode->yres) | SUNXI_DE_FE_WIDTH(mode->xres),
+              &de_fe->ch1_outsize);
+       writel(SUNXI_DE_FE_FACTOR_INT(1), &de_fe->ch1_horzfact);
+       writel(SUNXI_DE_FE_FACTOR_INT(1), &de_fe->ch1_vertfact);
+
+       setbits_le32(&de_fe->frame_ctrl, SUNXI_DE_FE_FRAME_CTRL_REG_RDY);
+}
+
+static void sunxi_frontend_enable(void)
+{
+       struct sunxi_de_fe_reg * const de_fe =
+               (struct sunxi_de_fe_reg *)SUNXI_DE_FE0_BASE;
+
+       setbits_le32(&de_fe->frame_ctrl, SUNXI_DE_FE_FRAME_CTRL_FRM_START);
+}
+#else
+static void sunxi_frontend_init(void) {}
+static void sunxi_frontend_mode_set(const struct ctfb_res_modes *mode,
+                                   unsigned int address) {}
+static void sunxi_frontend_enable(void) {}
+#endif
+
 /*
  * This is the entity that mixes and matches the different layers and inputs.
  * Allwinner calls it the back-end, but i like composer better.
@@ -283,6 +391,8 @@ static void sunxi_composer_init(void)
                (struct sunxi_de_be_reg *)SUNXI_DE_BE0_BASE;
        int i;
 
+       sunxi_frontend_init();
+
 #if defined CONFIG_MACH_SUN6I || defined CONFIG_MACH_SUN8I
        /* Reset off */
        setbits_le32(&ccm->ahb_reset1_cfg, 1 << AHB_RESET_OFFSET_DE_BE0);
@@ -290,7 +400,9 @@ static void sunxi_composer_init(void)
 
        /* Clocks on */
        setbits_le32(&ccm->ahb_gate1, 1 << AHB_GATE_OFFSET_DE_BE0);
+#ifndef CONFIG_MACH_SUN4I /* On sun4i the frontend does the dma */
        setbits_le32(&ccm->dram_clk_gate, 1 << CCM_DRAM_GATE_OFFSET_DE_BE0);
+#endif
        clock_set_de_mod_clock(&ccm->be0_clk_cfg, 300000000);
 
        /* Engine bug, clear registers after reset */
@@ -306,13 +418,19 @@ static void sunxi_composer_mode_set(const struct ctfb_res_modes *mode,
        struct sunxi_de_be_reg * const de_be =
                (struct sunxi_de_be_reg *)SUNXI_DE_BE0_BASE;
 
+       sunxi_frontend_mode_set(mode, address);
+
        writel(SUNXI_DE_BE_HEIGHT(mode->yres) | SUNXI_DE_BE_WIDTH(mode->xres),
               &de_be->disp_size);
        writel(SUNXI_DE_BE_HEIGHT(mode->yres) | SUNXI_DE_BE_WIDTH(mode->xres),
               &de_be->layer0_size);
+#ifndef CONFIG_MACH_SUN4I /* On sun4i the frontend does the dma */
        writel(SUNXI_DE_BE_LAYER_STRIDE(mode->xres), &de_be->layer0_stride);
        writel(address << 3, &de_be->layer0_addr_low32b);
        writel(address >> 29, &de_be->layer0_addr_high4b);
+#else
+       writel(SUNXI_DE_BE_LAYER_ATTR0_SRC_FE0, &de_be->layer0_attr0_ctrl);
+#endif
        writel(SUNXI_DE_BE_LAYER_ATTR1_FMT_XRGB8888, &de_be->layer0_attr1_ctrl);
 
        setbits_le32(&de_be->mode, SUNXI_DE_BE_MODE_LAYER0_ENABLE);
@@ -323,6 +441,8 @@ static void sunxi_composer_enable(void)
        struct sunxi_de_be_reg * const de_be =
                (struct sunxi_de_be_reg *)SUNXI_DE_BE0_BASE;
 
+       sunxi_frontend_enable();
+
        setbits_le32(&de_be->reg_ctrl, SUNXI_DE_BE_REG_CTRL_LOAD_REGS);
        setbits_le32(&de_be->mode, SUNXI_DE_BE_MODE_START);
 }
@@ -1060,21 +1180,27 @@ int sunxi_simplefb_setup(void *blob)
        int offset, ret;
        const char *pipeline = NULL;
 
+#ifdef CONFIG_MACH_SUN4I
+#define PIPELINE_PREFIX "de_fe0-"
+#else
+#define PIPELINE_PREFIX
+#endif
+
        switch (sunxi_display.monitor) {
        case sunxi_monitor_none:
                return 0;
        case sunxi_monitor_dvi:
        case sunxi_monitor_hdmi:
-               pipeline = "de_be0-lcd0-hdmi";
+               pipeline = PIPELINE_PREFIX "de_be0-lcd0-hdmi";
                break;
        case sunxi_monitor_lcd:
-               pipeline = "de_be0-lcd0";
+               pipeline = PIPELINE_PREFIX "de_be0-lcd0";
                break;
        case sunxi_monitor_vga:
 #ifdef CONFIG_VIDEO_VGA
-               pipeline = "de_be0-lcd0-tve0";
+               pipeline = PIPELINE_PREFIX "de_be0-lcd0-tve0";
 #elif defined CONFIG_VIDEO_VGA_VIA_LCD
-               pipeline = "de_be0-lcd0";
+               pipeline = PIPELINE_PREFIX "de_be0-lcd0";
 #endif
                break;
        }