From: Marek Vasut Date: Thu, 7 Jan 2021 10:12:16 +0000 (+0100) Subject: net: dwc_eth_qos: Pad descriptors to cacheline size X-Git-Tag: v2025.01-rc5-pxa1908~2057^2~1 X-Git-Url: http://git.dujemihanovic.xyz/%22http:/kyber.dk/phpMyBuilder/static/%7B%7B%20%28.OutputFormats.Get?a=commitdiff_plain;h=6f1e668d964ebd3244a99288ea4bda7b7b8627c3;p=u-boot.git net: dwc_eth_qos: Pad descriptors to cacheline size The DWMAC4 IP has the possibility to skip up to 7 AXI bus width size words after the descriptor. Use this to pad the descriptors to cacheline size and remove the need for noncached memory altogether. Moreover, this lets Tegra use the generic cache flush / invalidate operations. Signed-off-by: Marek Vasut Cc: Joe Hershberger Cc: Patrice Chotard Cc: Patrick Delaunay Cc: Ramon Fried Cc: Stephen Warren Tested-by: Stephen Warren Reviewed-by: Stephen Warren Tested-by: Patrice Chotard --- diff --git a/drivers/net/dwc_eth_qos.c b/drivers/net/dwc_eth_qos.c index 45a1648ad5..944412958d 100644 --- a/drivers/net/dwc_eth_qos.c +++ b/drivers/net/dwc_eth_qos.c @@ -209,6 +209,7 @@ struct eqos_dma_regs { #define EQOS_DMA_SYSBUS_MODE_BLEN8 BIT(2) #define EQOS_DMA_SYSBUS_MODE_BLEN4 BIT(1) +#define EQOS_DMA_CH0_CONTROL_DSL_SHIFT 18 #define EQOS_DMA_CH0_CONTROL_PBLX8 BIT(16) #define EQOS_DMA_CH0_TX_CONTROL_TXPBL_SHIFT 16 @@ -239,37 +240,15 @@ struct eqos_tegra186_regs { #define EQOS_AUTO_CAL_STATUS_ACTIVE BIT(31) /* Descriptors */ - -#define EQOS_DESCRIPTOR_WORDS 4 -#define EQOS_DESCRIPTOR_SIZE (EQOS_DESCRIPTOR_WORDS * 4) /* We assume ARCH_DMA_MINALIGN >= 16; 16 is the EQOS HW minimum */ #define EQOS_DESCRIPTOR_ALIGN ARCH_DMA_MINALIGN #define EQOS_DESCRIPTORS_TX 4 #define EQOS_DESCRIPTORS_RX 4 #define EQOS_DESCRIPTORS_NUM (EQOS_DESCRIPTORS_TX + EQOS_DESCRIPTORS_RX) -#define EQOS_DESCRIPTORS_SIZE ALIGN(EQOS_DESCRIPTORS_NUM * \ - EQOS_DESCRIPTOR_SIZE, ARCH_DMA_MINALIGN) #define EQOS_BUFFER_ALIGN ARCH_DMA_MINALIGN #define EQOS_MAX_PACKET_SIZE ALIGN(1568, ARCH_DMA_MINALIGN) #define EQOS_RX_BUFFER_SIZE (EQOS_DESCRIPTORS_RX * EQOS_MAX_PACKET_SIZE) -/* - * Warn if the cache-line size is larger than the descriptor size. In such - * cases the driver will likely fail because the CPU needs to flush the cache - * when requeuing RX buffers, therefore descriptors written by the hardware - * may be discarded. Architectures with full IO coherence, such as x86, do not - * experience this issue, and hence are excluded from this condition. - * - * This can be fixed by defining CONFIG_SYS_NONCACHED_MEMORY which will cause - * the driver to allocate descriptors from a pool of non-cached memory. - */ -#if EQOS_DESCRIPTOR_SIZE < ARCH_DMA_MINALIGN -#if !defined(CONFIG_SYS_NONCACHED_MEMORY) && \ - !CONFIG_IS_ENABLED(SYS_DCACHE_OFF) && !defined(CONFIG_X86) -#warning Cache line size is larger than descriptor size -#endif -#endif - struct eqos_desc { u32 des0; u32 des1; @@ -282,12 +261,17 @@ struct eqos_desc { #define EQOS_DESC3_LD BIT(28) #define EQOS_DESC3_BUF1V BIT(24) +#define EQOS_AXI_WIDTH_32 4 +#define EQOS_AXI_WIDTH_64 8 +#define EQOS_AXI_WIDTH_128 16 + struct eqos_config { bool reg_access_always_ok; int mdio_wait; int swr_wait; int config_mac; int config_mac_mdio; + unsigned int axi_bus_width; phy_interface_t (*interface)(struct udevice *dev); struct eqos_ops *ops; }; @@ -330,9 +314,8 @@ struct eqos_priv { int phyaddr; u32 max_speed; void *descs; - struct eqos_desc *tx_descs; - struct eqos_desc *rx_descs; int tx_desc_idx, rx_desc_idx; + unsigned int desc_size; void *tx_dma_buf; void *rx_dma_buf; void *rx_pkt; @@ -358,63 +341,42 @@ struct eqos_priv { * not have the same constraints since they are 1536 bytes large, so they * are unlikely to share cache-lines. */ -static void *eqos_alloc_descs(unsigned int num) +static void *eqos_alloc_descs(struct eqos_priv *eqos, unsigned int num) { -#ifdef CONFIG_SYS_NONCACHED_MEMORY - return (void *)noncached_alloc(EQOS_DESCRIPTORS_SIZE, - EQOS_DESCRIPTOR_ALIGN); -#else - return memalign(EQOS_DESCRIPTOR_ALIGN, EQOS_DESCRIPTORS_SIZE); -#endif + eqos->desc_size = ALIGN(sizeof(struct eqos_desc), + (unsigned int)ARCH_DMA_MINALIGN); + + return memalign(eqos->desc_size, num * eqos->desc_size); } static void eqos_free_descs(void *descs) { -#ifdef CONFIG_SYS_NONCACHED_MEMORY - /* FIXME: noncached_alloc() has no opposite */ -#else free(descs); -#endif } -static void eqos_inval_desc_tegra186(void *desc) +static struct eqos_desc *eqos_get_desc(struct eqos_priv *eqos, + unsigned int num, bool rx) { -#ifndef CONFIG_SYS_NONCACHED_MEMORY - unsigned long start = (unsigned long)desc & ~(ARCH_DMA_MINALIGN - 1); - unsigned long end = ALIGN(start + EQOS_DESCRIPTOR_SIZE, - ARCH_DMA_MINALIGN); - - invalidate_dcache_range(start, end); -#endif + return eqos->descs + + ((rx ? EQOS_DESCRIPTORS_TX : 0) + num) * eqos->desc_size; } static void eqos_inval_desc_generic(void *desc) { -#ifndef CONFIG_SYS_NONCACHED_MEMORY - unsigned long start = rounddown((unsigned long)desc, ARCH_DMA_MINALIGN); - unsigned long end = roundup((unsigned long)desc + EQOS_DESCRIPTOR_SIZE, - ARCH_DMA_MINALIGN); + unsigned long start = (unsigned long)desc; + unsigned long end = ALIGN(start + sizeof(struct eqos_desc), + ARCH_DMA_MINALIGN); invalidate_dcache_range(start, end); -#endif -} - -static void eqos_flush_desc_tegra186(void *desc) -{ -#ifndef CONFIG_SYS_NONCACHED_MEMORY - flush_cache((unsigned long)desc, EQOS_DESCRIPTOR_SIZE); -#endif } static void eqos_flush_desc_generic(void *desc) { -#ifndef CONFIG_SYS_NONCACHED_MEMORY - unsigned long start = rounddown((unsigned long)desc, ARCH_DMA_MINALIGN); - unsigned long end = roundup((unsigned long)desc + EQOS_DESCRIPTOR_SIZE, - ARCH_DMA_MINALIGN); + unsigned long start = (unsigned long)desc; + unsigned long end = ALIGN(start + sizeof(struct eqos_desc), + ARCH_DMA_MINALIGN); flush_dcache_range(start, end); -#endif } static void eqos_inval_buffer_tegra186(void *buf, size_t size) @@ -1167,6 +1129,7 @@ static int eqos_start(struct udevice *dev) ulong rate; u32 val, tx_fifo_sz, rx_fifo_sz, tqs, rqs, pbl; ulong last_rx_desc; + ulong desc_pad; debug("%s(dev=%p):\n", __func__, dev); @@ -1405,8 +1368,12 @@ static int eqos_start(struct udevice *dev) EQOS_MAX_PACKET_SIZE << EQOS_DMA_CH0_RX_CONTROL_RBSZ_SHIFT); + desc_pad = (eqos->desc_size - sizeof(struct eqos_desc)) / + eqos->config->axi_bus_width; + setbits_le32(&eqos->dma_regs->ch0_control, - EQOS_DMA_CH0_CONTROL_PBLX8); + EQOS_DMA_CH0_CONTROL_PBLX8 | + (desc_pad << EQOS_DMA_CH0_CONTROL_DSL_SHIFT)); /* * Burst length must be < 1/2 FIFO size. @@ -1435,9 +1402,15 @@ static int eqos_start(struct udevice *dev) /* Set up descriptors */ - memset(eqos->descs, 0, EQOS_DESCRIPTORS_SIZE); + memset(eqos->descs, 0, eqos->desc_size * EQOS_DESCRIPTORS_NUM); + + for (i = 0; i < EQOS_DESCRIPTORS_TX; i++) { + struct eqos_desc *tx_desc = eqos_get_desc(eqos, i, false); + eqos->config->ops->eqos_flush_desc(tx_desc); + } + for (i = 0; i < EQOS_DESCRIPTORS_RX; i++) { - struct eqos_desc *rx_desc = &(eqos->rx_descs[i]); + struct eqos_desc *rx_desc = eqos_get_desc(eqos, i, true); rx_desc->des0 = (u32)(ulong)(eqos->rx_dma_buf + (i * EQOS_MAX_PACKET_SIZE)); rx_desc->des3 = EQOS_DESC3_OWN | EQOS_DESC3_BUF1V; @@ -1449,12 +1422,14 @@ static int eqos_start(struct udevice *dev) } writel(0, &eqos->dma_regs->ch0_txdesc_list_haddress); - writel((ulong)eqos->tx_descs, &eqos->dma_regs->ch0_txdesc_list_address); + writel((ulong)eqos_get_desc(eqos, 0, false), + &eqos->dma_regs->ch0_txdesc_list_address); writel(EQOS_DESCRIPTORS_TX - 1, &eqos->dma_regs->ch0_txdesc_ring_length); writel(0, &eqos->dma_regs->ch0_rxdesc_list_haddress); - writel((ulong)eqos->rx_descs, &eqos->dma_regs->ch0_rxdesc_list_address); + writel((ulong)eqos_get_desc(eqos, 0, true), + &eqos->dma_regs->ch0_rxdesc_list_address); writel(EQOS_DESCRIPTORS_RX - 1, &eqos->dma_regs->ch0_rxdesc_ring_length); @@ -1473,7 +1448,7 @@ static int eqos_start(struct udevice *dev) * that's not distinguishable from none of the descriptors being * available. */ - last_rx_desc = (ulong)&(eqos->rx_descs[(EQOS_DESCRIPTORS_RX - 1)]); + last_rx_desc = (ulong)eqos_get_desc(eqos, EQOS_DESCRIPTORS_RX - 1, true); writel(last_rx_desc, &eqos->dma_regs->ch0_rxdesc_tail_pointer); eqos->started = true; @@ -1558,7 +1533,7 @@ static int eqos_send(struct udevice *dev, void *packet, int length) memcpy(eqos->tx_dma_buf, packet, length); eqos->config->ops->eqos_flush_buffer(eqos->tx_dma_buf, length); - tx_desc = &(eqos->tx_descs[eqos->tx_desc_idx]); + tx_desc = eqos_get_desc(eqos, eqos->tx_desc_idx, false); eqos->tx_desc_idx++; eqos->tx_desc_idx %= EQOS_DESCRIPTORS_TX; @@ -1573,7 +1548,7 @@ static int eqos_send(struct udevice *dev, void *packet, int length) tx_desc->des3 = EQOS_DESC3_OWN | EQOS_DESC3_FD | EQOS_DESC3_LD | length; eqos->config->ops->eqos_flush_desc(tx_desc); - writel((ulong)(&(eqos->tx_descs[eqos->tx_desc_idx])), + writel((ulong)eqos_get_desc(eqos, eqos->tx_desc_idx, false), &eqos->dma_regs->ch0_txdesc_tail_pointer); for (i = 0; i < 1000000; i++) { @@ -1596,7 +1571,7 @@ static int eqos_recv(struct udevice *dev, int flags, uchar **packetp) debug("%s(dev=%p, flags=%x):\n", __func__, dev, flags); - rx_desc = &(eqos->rx_descs[eqos->rx_desc_idx]); + rx_desc = eqos_get_desc(eqos, eqos->rx_desc_idx, true); eqos->config->ops->eqos_inval_desc(rx_desc); if (rx_desc->des3 & EQOS_DESC3_OWN) { debug("%s: RX packet not available\n", __func__); @@ -1631,7 +1606,7 @@ static int eqos_free_pkt(struct udevice *dev, uchar *packet, int length) eqos->config->ops->eqos_inval_buffer(packet, length); - rx_desc = &(eqos->rx_descs[eqos->rx_desc_idx]); + rx_desc = eqos_get_desc(eqos, eqos->rx_desc_idx, true); rx_desc->des0 = 0; mb(); @@ -1663,17 +1638,12 @@ static int eqos_probe_resources_core(struct udevice *dev) debug("%s(dev=%p):\n", __func__, dev); - eqos->descs = eqos_alloc_descs(EQOS_DESCRIPTORS_TX + - EQOS_DESCRIPTORS_RX); + eqos->descs = eqos_alloc_descs(eqos, EQOS_DESCRIPTORS_NUM); if (!eqos->descs) { debug("%s: eqos_alloc_descs() failed\n", __func__); ret = -ENOMEM; goto err; } - eqos->tx_descs = (struct eqos_desc *)eqos->descs; - eqos->rx_descs = (eqos->tx_descs + EQOS_DESCRIPTORS_TX); - debug("%s: tx_descs=%p, rx_descs=%p\n", __func__, eqos->tx_descs, - eqos->rx_descs); eqos->tx_dma_buf = memalign(EQOS_BUFFER_ALIGN, EQOS_MAX_PACKET_SIZE); if (!eqos->tx_dma_buf) { @@ -2083,8 +2053,8 @@ static const struct eth_ops eqos_ops = { }; static struct eqos_ops eqos_tegra186_ops = { - .eqos_inval_desc = eqos_inval_desc_tegra186, - .eqos_flush_desc = eqos_flush_desc_tegra186, + .eqos_inval_desc = eqos_inval_desc_generic, + .eqos_flush_desc = eqos_flush_desc_generic, .eqos_inval_buffer = eqos_inval_buffer_tegra186, .eqos_flush_buffer = eqos_flush_buffer_tegra186, .eqos_probe_resources = eqos_probe_resources_tegra186, @@ -2105,6 +2075,7 @@ static const struct eqos_config __maybe_unused eqos_tegra186_config = { .swr_wait = 10, .config_mac = EQOS_MAC_RXQ_CTRL0_RXQ0EN_ENABLED_DCB, .config_mac_mdio = EQOS_MAC_MDIO_ADDRESS_CR_20_35, + .axi_bus_width = EQOS_AXI_WIDTH_128, .interface = eqos_get_interface_tegra186, .ops = &eqos_tegra186_ops }; @@ -2132,6 +2103,7 @@ static const struct eqos_config __maybe_unused eqos_stm32_config = { .swr_wait = 50, .config_mac = EQOS_MAC_RXQ_CTRL0_RXQ0EN_ENABLED_AV, .config_mac_mdio = EQOS_MAC_MDIO_ADDRESS_CR_250_300, + .axi_bus_width = EQOS_AXI_WIDTH_64, .interface = eqos_get_interface_stm32, .ops = &eqos_stm32_ops }; @@ -2159,6 +2131,7 @@ struct eqos_config __maybe_unused eqos_imx_config = { .swr_wait = 50, .config_mac = EQOS_MAC_RXQ_CTRL0_RXQ0EN_ENABLED_DCB, .config_mac_mdio = EQOS_MAC_MDIO_ADDRESS_CR_250_300, + .axi_bus_width = EQOS_AXI_WIDTH_64, .interface = eqos_get_interface_imx, .ops = &eqos_imx_ops }; diff --git a/include/configs/stm32mp1.h b/include/configs/stm32mp1.h index 1aa7514ac7..863b652ca4 100644 --- a/include/configs/stm32mp1.h +++ b/include/configs/stm32mp1.h @@ -69,7 +69,6 @@ /* Ethernet need */ #ifdef CONFIG_DWC_ETH_QOS -#define CONFIG_SYS_NONCACHED_MEMORY (1 * SZ_1M) /* 1M */ #define CONFIG_SERVERIP 192.168.1.1 #define CONFIG_BOOTP_SERVERIP #define CONFIG_SYS_AUTOLOAD "no"