]> git.dujemihanovic.xyz Git - u-boot.git/commitdiff
mmc: Add MMC controller driver for OcteonTX / TX2
authorSuneel Garapati <sgarapati@marvell.com>
Sun, 20 Oct 2019 01:03:01 +0000 (18:03 -0700)
committerStefan Roese <sr@denx.de>
Tue, 25 Aug 2020 06:01:16 +0000 (08:01 +0200)
Adds support for MMC controllers found on OcteonTX or
OcteonTX2 SoC platforms.

Signed-off-by: Aaron Williams <awilliams@marvell.com>
Signed-off-by: Suneel Garapati <sgarapati@marvell.com>
Cc: Peng Fan <peng.fan@nxp.com>
drivers/mmc/Kconfig
drivers/mmc/Makefile
drivers/mmc/octeontx_hsmmc.c [new file with mode: 0644]
drivers/mmc/octeontx_hsmmc.h [new file with mode: 0644]

index c29d1ea680ef85c996757b53eb0f5e8cbdb1b47c..0c252e34c74a6f409e608b6e9e01b25389aa2a0e 100644 (file)
@@ -305,6 +305,15 @@ config MMC_PCI
          This selects PCI-based MMC controllers.
          If you have an MMC controller on a PCI bus, say Y here.
 
+config MMC_OCTEONTX
+       bool "Marvell OcteonTX Multimedia Card Interface support"
+       depends on (ARCH_OCTEONTX || ARCH_OCTEONTX2)
+       depends on DM_MMC
+       help
+         This selects the OcteonTX Multimedia card Interface.
+         If you have an OcteonTX/TX2 board with a Multimedia Card slot,
+         say Y here.
+
          If unsure, say N.
 
 config PXA_MMC_GENERIC
index d375669a7b56ca752d7dec69a82ae3bf25703459..22266ec8ece4fd339684b249639e9cf3243764fc 100644 (file)
@@ -36,6 +36,7 @@ obj-$(CONFIG_MVEBU_MMC) += mvebu_mmc.o
 obj-$(CONFIG_MMC_OMAP_HS)              += omap_hsmmc.o
 obj-$(CONFIG_MMC_MXC)                  += mxcmmc.o
 obj-$(CONFIG_MMC_MXS)                  += mxsmmc.o
+obj-$(CONFIG_MMC_OCTEONTX)             += octeontx_hsmmc.o
 obj-$(CONFIG_MMC_PCI)                  += pci_mmc.o
 obj-$(CONFIG_PXA_MMC_GENERIC) += pxa_mmc_gen.o
 obj-$(CONFIG_$(SPL_TPL_)SUPPORT_EMMC_RPMB) += rpmb.o
diff --git a/drivers/mmc/octeontx_hsmmc.c b/drivers/mmc/octeontx_hsmmc.c
new file mode 100644 (file)
index 0000000..ddc3669
--- /dev/null
@@ -0,0 +1,3897 @@
+// SPDX-License-Identifier:    GPL-2.0
+/*
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * https://spdx.org/licenses
+ */
+
+//#define DEBUG
+#include <cpu_func.h>
+#include <dm.h>
+#include <dm/lists.h>
+#include <env.h>
+#include <errno.h>
+#include <fdtdec.h>
+#include <log.h>
+#include <malloc.h>
+#include <memalign.h>
+#include <mmc.h>
+#include <part.h>
+#include <pci.h>
+#include <pci_ids.h>
+#include <time.h>
+#include <watchdog.h>
+
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/libfdt.h>
+
+#include <asm/arch/board.h>
+#include <asm/arch/clock.h>
+#include <asm/arch/csrs/csrs-mio_emm.h>
+#include <asm/io.h>
+
+#include <power/regulator.h>
+
+#include "octeontx_hsmmc.h"
+
+#define MMC_TIMEOUT_SHORT      20      /* in ms */
+#define MMC_TIMEOUT_LONG       1000
+#define MMC_TIMEOUT_ERASE      10000
+
+#define MMC_DEFAULT_DATA_IN_TAP                        10
+#define MMC_DEFAULT_CMD_IN_TAP                 10
+#define MMC_DEFAULT_CMD_OUT_TAP                        39
+#define MMC_DEFAULT_DATA_OUT_TAP               39
+#define MMC_DEFAULT_HS200_CMD_IN_TAP           24
+#define MMC_DEFAULT_HS200_DATA_IN_TAP          24
+#define MMC_DEFAULT_HS200_CMD_OUT_TAP  (otx_is_soc(CN95XX) ? 10 : 5)
+#define MMC_DEFAULT_HS200_DATA_OUT_TAP (otx_is_soc(CN95XX) ? 10 : 5)
+#define MMC_DEFAULT_HS400_CMD_OUT_TAP  (otx_is_soc(CN95XX) ? 10 : 5)
+#define MMC_DEFAULT_HS400_DATA_OUT_TAP (otx_is_soc(CN95XX) ? 5 : 3)
+#define MMC_DEFAULT_HS200_CMD_OUT_DLY          800     /* Delay in ps */
+#define MMC_DEFAULT_HS200_DATA_OUT_DLY         800     /* Delay in ps */
+#define MMC_DEFAULT_HS400_CMD_OUT_DLY          800     /* Delay in ps */
+#define MMC_DEFAULT_HS400_DATA_OUT_DLY         400     /* Delay in ps */
+#define MMC_DEFAULT_SD_UHS_SDR104_CMD_OUT_TAP  MMC_DEFAULT_HS200_CMD_OUT_TAP
+#define MMC_DEFAULT_SD_UHS_SDR104_DATA_OUT_TAP MMC_DEFAULT_HS200_DATA_OUT_TAP
+#define MMC_LEGACY_DEFAULT_CMD_OUT_TAP         39
+#define MMC_LEGACY_DEFAULT_DATA_OUT_TAP                39
+#define MMC_SD_LEGACY_DEFAULT_CMD_OUT_TAP      63
+#define MMC_SD_LEGACY_DEFAULT_DATA_OUT_TAP     63
+#define MMC_HS_CMD_OUT_TAP                     32
+#define MMC_HS_DATA_OUT_TAP                    32
+#define MMC_SD_HS_CMD_OUT_TAP                  26
+#define MMC_SD_HS_DATA_OUT_TAP                 26
+#define MMC_SD_UHS_SDR25_CMD_OUT_TAP           26
+#define MMC_SD_UHS_SDR25_DATA_OUT_TAP          26
+#define MMC_SD_UHS_SDR50_CMD_OUT_TAP           26
+#define MMC_SD_UHS_SDR50_DATA_OUT_TAP          26
+#define MMC_DEFAULT_TAP_DELAY                  4
+#define TOTAL_NO_OF_TAPS                       512
+static void octeontx_mmc_switch_to(struct mmc *mmc);
+static int octeontx_mmc_configure_delay(struct mmc *mmc);
+static void octeontx_mmc_set_timing(struct mmc *mmc);
+static void set_wdog(struct mmc *mmc, u64 us);
+static void do_switch(struct mmc *mmc, union mio_emm_switch emm_switch);
+static int octeontx_mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd,
+                                struct mmc_data *data);
+static int octeontx2_mmc_calc_delay(struct mmc *mmc, int delay);
+static int octeontx_mmc_calibrate_delay(struct mmc *mmc);
+static int octeontx_mmc_set_input_bus_timing(struct mmc *mmc);
+static int octeontx_mmc_set_output_bus_timing(struct mmc *mmc);
+
+static bool host_probed;
+
+/**
+ * Get the slot data structure from a MMC data structure
+ */
+static inline struct octeontx_mmc_slot *mmc_to_slot(struct mmc *mmc)
+{
+       return container_of(mmc, struct octeontx_mmc_slot, mmc);
+}
+
+static inline struct octeontx_mmc_host *mmc_to_host(struct mmc *mmc)
+{
+       return mmc_to_slot(mmc)->host;
+}
+
+static inline struct octeontx_mmc_slot *dev_to_mmc_slot(struct udevice *dev)
+{
+       return dev_get_priv(dev);
+}
+
+static inline struct mmc *dev_to_mmc(struct udevice *dev)
+{
+       return &((struct octeontx_mmc_slot *)dev_get_priv(dev))->mmc;
+}
+
+#ifdef DEBUG
+const char *mmc_reg_str(u64 reg)
+{
+       if (reg == MIO_EMM_DMA_CFG())
+               return "MIO_EMM_DMA_CFG";
+       if (reg == MIO_EMM_DMA_ADR())
+               return "MIO_EMM_DMA_ADR";
+       if (reg == MIO_EMM_DMA_INT())
+               return "MIO_EMM_DMA_INT";
+       if (reg == MIO_EMM_CFG())
+               return "MIO_EMM_CFG";
+       if (reg == MIO_EMM_MODEX(0))
+               return "MIO_EMM_MODE0";
+       if (reg == MIO_EMM_MODEX(1))
+               return "MIO_EMM_MODE1";
+       if (reg == MIO_EMM_MODEX(2))
+               return "MIO_EMM_MODE2";
+       if (reg == MIO_EMM_MODEX(3))
+               return "MIO_EMM_MODE3";
+       if (reg == MIO_EMM_IO_CTL())
+               return "MIO_EMM_IO_CTL";
+       if (reg == MIO_EMM_SWITCH())
+               return "MIO_EMM_SWITCH";
+       if (reg == MIO_EMM_DMA())
+               return "MIO_EMM_DMA";
+       if (reg == MIO_EMM_CMD())
+               return "MIO_EMM_CMD";
+       if (reg == MIO_EMM_RSP_STS())
+               return "MIO_EMM_RSP_STS";
+       if (reg == MIO_EMM_RSP_LO())
+               return "MIO_EMM_RSP_LO";
+       if (reg == MIO_EMM_RSP_HI())
+               return "MIO_EMM_RSP_HI";
+       if (reg == MIO_EMM_INT())
+               return "MIO_EMM_INT";
+       if (reg == MIO_EMM_WDOG())
+               return "MIO_EMM_WDOG";
+       if (reg == MIO_EMM_DMA_ARG())
+               return "MIO_EMM_DMA_ARG";
+       if (IS_ENABLED(CONFIG_ARCH_OCTEONTX)) {
+               if (reg == MIO_EMM_SAMPLE())
+                       return "MIO_EMM_SAMPLE";
+       }
+       if (reg == MIO_EMM_STS_MASK())
+               return "MIO_EMM_STS_MASK";
+       if (reg == MIO_EMM_RCA())
+               return "MIO_EMM_RCA";
+       if (reg == MIO_EMM_BUF_IDX())
+               return "MIO_EMM_BUF_IDX";
+       if (reg == MIO_EMM_BUF_DAT())
+               return "MIO_EMM_BUF_DAT";
+       if (!IS_ENABLED(CONFIG_ARCH_OCTEONTX)) {
+               if (reg == MIO_EMM_CALB())
+                       return "MIO_EMM_CALB";
+               if (reg == MIO_EMM_TAP())
+                       return "MIO_EMM_TAP";
+               if (reg == MIO_EMM_TIMING())
+                       return "MIO_EMM_TIMING";
+               if (reg == MIO_EMM_DEBUG())
+                       return "MIO_EMM_DEBUG";
+       }
+
+       return "UNKNOWN";
+}
+#endif
+
+static void octeontx_print_rsp_sts(struct mmc *mmc)
+{
+#ifdef DEBUG
+       union mio_emm_rsp_sts emm_rsp_sts;
+       const struct octeontx_mmc_host *host = mmc_to_host(mmc);
+       static const char * const ctype_xor_str[] = {
+               "No data",
+               "Read data into Dbuf",
+               "Write data from Dbuf",
+               "Reserved"
+       };
+
+       static const char * const rtype_xor_str[] = {
+               "No response",
+               "R1, 48 bits",
+               "R2, 136 bits",
+               "R3, 48 bits",
+               "R4, 48 bits",
+               "R5, 48 bits",
+               "Reserved 6",
+               "Reserved 7"
+       };
+
+       emm_rsp_sts.u = readq(host->base_addr + MIO_EMM_RSP_STS());
+       printf("\nMIO_EMM_RSP_STS:              0x%016llx\n", emm_rsp_sts.u);
+       printf("    60-61: bus_id:              %u\n", emm_rsp_sts.s.bus_id);
+       printf("    59:    cmd_val:             %s\n",
+              emm_rsp_sts.s.cmd_val ? "yes" : "no");
+       printf("    58:    switch_val:          %s\n",
+              emm_rsp_sts.s.switch_val ? "yes" : "no");
+       printf("    57:    dma_val:             %s\n",
+              emm_rsp_sts.s.dma_val ? "yes" : "no");
+       printf("    56:    dma_pend:            %s\n",
+              emm_rsp_sts.s.dma_pend ? "yes" : "no");
+       printf("    28:    dbuf_err:            %s\n",
+              emm_rsp_sts.s.dbuf_err ? "yes" : "no");
+       printf("    23:    dbuf:                %u\n", emm_rsp_sts.s.dbuf);
+       printf("    22:    blk_timeout:         %s\n",
+              emm_rsp_sts.s.blk_timeout ? "yes" : "no");
+       printf("    21:    blk_crc_err:         %s\n",
+              emm_rsp_sts.s.blk_crc_err ? "yes" : "no");
+       printf("    20:    rsp_busybit:         %s\n",
+              emm_rsp_sts.s.rsp_busybit ? "yes" : "no");
+       printf("    19:    stp_timeout:         %s\n",
+              emm_rsp_sts.s.stp_timeout ? "yes" : "no");
+       printf("    18:    stp_crc_err:         %s\n",
+              emm_rsp_sts.s.stp_crc_err ? "yes" : "no");
+       printf("    17:    stp_bad_sts:         %s\n",
+              emm_rsp_sts.s.stp_bad_sts ? "yes" : "no");
+       printf("    16:    stp_val:             %s\n",
+              emm_rsp_sts.s.stp_val ? "yes" : "no");
+       printf("    15:    rsp_timeout:         %s\n",
+              emm_rsp_sts.s.rsp_timeout ? "yes" : "no");
+       printf("    14:    rsp_crc_err:         %s\n",
+              emm_rsp_sts.s.rsp_crc_err ? "yes" : "no");
+       printf("    13:    rsp_bad_sts:         %s\n",
+              emm_rsp_sts.s.rsp_bad_sts ? "yes" : "no");
+       printf("    12:    rsp_val:             %s\n",
+              emm_rsp_sts.s.rsp_val ? "yes" : "no");
+       printf("    9-11:  rsp_type:            %s\n",
+              rtype_xor_str[emm_rsp_sts.s.rsp_type]);
+       printf("    7-8:   cmd_type:            %s\n",
+              ctype_xor_str[emm_rsp_sts.s.cmd_type]);
+       printf("    1-6:   cmd_idx:             %u\n",
+              emm_rsp_sts.s.cmd_idx);
+       printf("    0:     cmd_done:            %s\n",
+              emm_rsp_sts.s.cmd_done ? "yes" : "no");
+#endif
+}
+
+static inline u64 read_csr(struct mmc *mmc, u64 reg)
+{
+       const struct octeontx_mmc_host *host = mmc_to_host(mmc);
+       u64 value = readq(host->base_addr + reg);
+#ifdef DEBUG_CSR
+       printf("        %s: %s(0x%p) => 0x%llx\n", __func__,
+              mmc_reg_str(reg), host->base_addr + reg,
+              value);
+#endif
+       return value;
+}
+
+/**
+ * Writes to a CSR register
+ *
+ * @param[in]  mmc     pointer to mmc data structure
+ * @param      reg     register offset
+ * @param      value   value to write to register
+ */
+static inline void write_csr(struct mmc *mmc, u64 reg, u64 value)
+{
+       const struct octeontx_mmc_host *host = mmc_to_host(mmc);
+       void *addr = host->base_addr + reg;
+
+#ifdef DEBUG_CSR
+       printf("        %s: %s(0x%p) <= 0x%llx\n", __func__, mmc_reg_str(reg),
+              addr, value);
+#endif
+       writeq(value, addr);
+}
+
+#ifdef DEBUG
+static void mmc_print_status(u32 status)
+{
+#ifdef DEBUG_STATUS
+       static const char * const state[] = {
+               "Idle",         /* 0 */
+               "Ready",        /* 1 */
+               "Ident",        /* 2 */
+               "Standby",      /* 3 */
+               "Tran",         /* 4 */
+               "Data",         /* 5 */
+               "Receive",      /* 6 */
+               "Program",      /* 7 */
+               "Dis",          /* 8 */
+               "Btst",         /* 9 */
+               "Sleep",        /* 10 */
+               "reserved",     /* 11 */
+               "reserved",     /* 12 */
+               "reserved",     /* 13 */
+               "reserved",     /* 14 */
+               "reserved"      /* 15 */ };
+       if (status & R1_APP_CMD)
+               puts("MMC ACMD\n");
+       if (status & R1_SWITCH_ERROR)
+               puts("MMC switch error\n");
+       if (status & R1_READY_FOR_DATA)
+               puts("MMC ready for data\n");
+       printf("MMC %s state\n", state[R1_CURRENT_STATE(status)]);
+       if (status & R1_ERASE_RESET)
+               puts("MMC erase reset\n");
+       if (status & R1_WP_ERASE_SKIP)
+               puts("MMC partial erase due to write protected blocks\n");
+       if (status & R1_CID_CSD_OVERWRITE)
+               puts("MMC CID/CSD overwrite error\n");
+       if (status & R1_ERROR)
+               puts("MMC undefined device error\n");
+       if (status & R1_CC_ERROR)
+               puts("MMC device error\n");
+       if (status & R1_CARD_ECC_FAILED)
+               puts("MMC internal ECC failed to correct data\n");
+       if (status & R1_ILLEGAL_COMMAND)
+               puts("MMC illegal command\n");
+       if (status & R1_COM_CRC_ERROR)
+               puts("MMC CRC of previous command failed\n");
+       if (status & R1_LOCK_UNLOCK_FAILED)
+               puts("MMC sequence or password error in lock/unlock device command\n");
+       if (status & R1_CARD_IS_LOCKED)
+               puts("MMC device locked by host\n");
+       if (status & R1_WP_VIOLATION)
+               puts("MMC attempt to program write protected block\n");
+       if (status & R1_ERASE_PARAM)
+               puts("MMC invalid selection of erase groups for erase\n");
+       if (status & R1_ERASE_SEQ_ERROR)
+               puts("MMC error in sequence of erase commands\n");
+       if (status & R1_BLOCK_LEN_ERROR)
+               puts("MMC block length error\n");
+       if (status & R1_ADDRESS_ERROR)
+               puts("MMC address misalign error\n");
+       if (status & R1_OUT_OF_RANGE)
+               puts("MMC address out of range\n");
+#endif
+}
+#endif
+
+/**
+ * Print out all of the register values where mmc is optional
+ *
+ * @param mmc  MMC device (can be NULL)
+ * @param host Pointer to host data structure (can be NULL if mmc is !NULL)
+ */
+static void octeontx_mmc_print_registers2(struct mmc *mmc,
+                                         struct octeontx_mmc_host *host)
+{
+       struct octeontx_mmc_slot *slot = mmc ? mmc->priv : NULL;
+       union mio_emm_dma_cfg emm_dma_cfg;
+       union mio_emm_dma_adr emm_dma_adr;
+       union mio_emm_dma_int emm_dma_int;
+       union mio_emm_cfg emm_cfg;
+       union mio_emm_modex emm_mode;
+       union mio_emm_switch emm_switch;
+       union mio_emm_dma emm_dma;
+       union mio_emm_cmd emm_cmd;
+       union mio_emm_rsp_sts emm_rsp_sts;
+       union mio_emm_rsp_lo emm_rsp_lo;
+       union mio_emm_rsp_hi emm_rsp_hi;
+       union mio_emm_int emm_int;
+       union mio_emm_wdog emm_wdog;
+       union mio_emm_sample emm_sample;
+       union mio_emm_calb emm_calb;
+       union mio_emm_tap emm_tap;
+       union mio_emm_timing emm_timing;
+       union mio_emm_io_ctl io_ctl;
+       union mio_emm_debug emm_debug;
+       union mio_emm_sts_mask emm_sts_mask;
+       union mio_emm_rca emm_rca;
+       int bus;
+
+       static const char * const bus_width_str[] = {
+               "1-bit data bus (power on)",
+               "4-bit data bus",
+               "8-bit data bus",
+               "reserved (3)",
+               "reserved (4)",
+               "4-bit data bus (dual data rate)",
+               "8-bit data bus (dual data rate)",
+               "reserved (7)",
+               "reserved (8)",
+               "invalid (9)",
+               "invalid (10)",
+               "invalid (11)",
+               "invalid (12)",
+               "invalid (13)",
+               "invalid (14)",
+               "invalid (15)",
+       };
+       static const char * const ctype_xor_str[] = {
+               "No data",
+               "Read data into Dbuf",
+               "Write data from Dbuf",
+               "Reserved"
+       };
+
+       static const char * const rtype_xor_str[] = {
+               "No response",
+               "R1, 48 bits",
+               "R2, 136 bits",
+               "R3, 48 bits",
+               "R4, 48 bits",
+               "R5, 48 bits",
+               "Reserved 6",
+               "Reserved 7"
+       };
+
+       if (!host && mmc)
+               host = mmc_to_host(mmc);
+
+       if (mmc)
+               printf("%s: bus id: %u\n", __func__, slot->bus_id);
+       emm_dma_cfg.u = readq(host->base_addr + MIO_EMM_DMA_CFG());
+       printf("MIO_EMM_DMA_CFG:                0x%016llx\n",
+              emm_dma_cfg.u);
+       printf("    63:    en:                  %s\n",
+              emm_dma_cfg.s.en ? "enabled" : "disabled");
+       printf("    62:    rw:                  %s\n",
+              emm_dma_cfg.s.rw ? "write" : "read");
+       printf("    61:    clr:                 %s\n",
+              emm_dma_cfg.s.clr ? "clear" : "not clear");
+       printf("    59:    swap32:              %s\n",
+              emm_dma_cfg.s.swap32 ? "yes" : "no");
+       printf("    58:    swap16:              %s\n",
+              emm_dma_cfg.s.swap16 ? "yes" : "no");
+       printf("    57:    swap8:               %s\n",
+              emm_dma_cfg.s.swap8 ? "yes" : "no");
+       printf("    56:    endian:              %s\n",
+              emm_dma_cfg.s.endian ? "little" : "big");
+       printf("    36-55: size:                %u\n",
+              emm_dma_cfg.s.size);
+
+       emm_dma_adr.u = readq(host->base_addr + MIO_EMM_DMA_ADR());
+       printf("MIO_EMM_DMA_ADR:              0x%016llx\n", emm_dma_adr.u);
+       printf("    0-49:  adr:                 0x%llx\n",
+              (u64)emm_dma_adr.s.adr);
+
+       emm_dma_int.u = readq(host->base_addr + MIO_EMM_DMA_INT());
+       printf("\nMIO_EMM_DMA_INT:              0x%016llx\n",
+              emm_dma_int.u);
+       printf("    1:     FIFO:                %s\n",
+              emm_dma_int.s.fifo ? "yes" : "no");
+       printf("    0:     Done:                %s\n",
+              emm_dma_int.s.done ? "yes" : "no");
+               emm_cfg.u = readq(host->base_addr + MIO_EMM_CFG());
+
+       printf("\nMIO_EMM_CFG:                  0x%016llx\n",
+              emm_cfg.u);
+       printf("    3:     bus_ena3:            %s\n",
+              emm_cfg.s.bus_ena & 0x08 ? "yes" : "no");
+       printf("    2:     bus_ena2:            %s\n",
+              emm_cfg.s.bus_ena & 0x04 ? "yes" : "no");
+       printf("    1:     bus_ena1:            %s\n",
+              emm_cfg.s.bus_ena & 0x02 ? "yes" : "no");
+       printf("    0:     bus_ena0:            %s\n",
+              emm_cfg.s.bus_ena & 0x01 ? "yes" : "no");
+       for (bus = 0; bus < 4; bus++) {
+               emm_mode.u = readq(host->base_addr + MIO_EMM_MODEX(bus));
+               printf("\nMIO_EMM_MODE%u:               0x%016llx\n",
+                      bus, emm_mode.u);
+               if (!IS_ENABLED(CONFIG_ARCH_OCTEONTX)) {
+                       printf("    50:    hs400_timing:        %s\n",
+                              emm_mode.s.hs400_timing ? "yes" : "no");
+                       printf("    49:    hs200_timing:        %s\n",
+                              emm_mode.s.hs200_timing ? "yes" : "no");
+               }
+               printf("    48:    hs_timing:           %s\n",
+                      emm_mode.s.hs_timing ? "yes" : "no");
+               printf("    40-42: bus_width:           %s\n",
+                      bus_width_str[emm_mode.s.bus_width]);
+               printf("    32-35: power_class          %u\n",
+                      emm_mode.s.power_class);
+               printf("    16-31: clk_hi:              %u\n",
+                      emm_mode.s.clk_hi);
+               printf("    0-15:  clk_lo:              %u\n",
+                      emm_mode.s.clk_lo);
+       }
+
+       emm_switch.u = readq(host->base_addr + MIO_EMM_SWITCH());
+       printf("\nMIO_EMM_SWITCH:               0x%016llx\n", emm_switch.u);
+       printf("    60-61: bus_id:              %u\n", emm_switch.s.bus_id);
+       printf("    59:    switch_exe:          %s\n",
+              emm_switch.s.switch_exe ? "yes" : "no");
+       printf("    58:    switch_err0:         %s\n",
+              emm_switch.s.switch_err0 ? "yes" : "no");
+       printf("    57:    switch_err1:         %s\n",
+              emm_switch.s.switch_err1 ? "yes" : "no");
+       printf("    56:    switch_err2:         %s\n",
+              emm_switch.s.switch_err2 ? "yes" : "no");
+       printf("    48:    hs_timing:           %s\n",
+              emm_switch.s.hs_timing ? "yes" : "no");
+       printf("    42-40: bus_width:           %s\n",
+              bus_width_str[emm_switch.s.bus_width]);
+       printf("    32-35: power_class:         %u\n",
+              emm_switch.s.power_class);
+       printf("    16-31: clk_hi:              %u\n",
+              emm_switch.s.clk_hi);
+       printf("    0-15:  clk_lo:              %u\n", emm_switch.s.clk_lo);
+
+       emm_dma.u = readq(host->base_addr + MIO_EMM_DMA());
+       printf("\nMIO_EMM_DMA:                  0x%016llx\n", emm_dma.u);
+       printf("    60-61: bus_id:              %u\n", emm_dma.s.bus_id);
+       printf("    59:    dma_val:             %s\n",
+              emm_dma.s.dma_val ? "yes" : "no");
+       printf("    58:    sector:              %s mode\n",
+              emm_dma.s.sector ? "sector" : "byte");
+       printf("    57:    dat_null:            %s\n",
+              emm_dma.s.dat_null ? "yes" : "no");
+       printf("    51-56: thres:               %u\n", emm_dma.s.thres);
+       printf("    50:    rel_wr:              %s\n",
+              emm_dma.s.rel_wr ? "yes" : "no");
+       printf("    49:    rw:                  %s\n",
+              emm_dma.s.rw ? "write" : "read");
+       printf("    48:    multi:               %s\n",
+              emm_dma.s.multi ? "yes" : "no");
+       printf("    32-47: block_cnt:           %u\n",
+              emm_dma.s.block_cnt);
+       printf("    0-31:  card_addr:           0x%x\n",
+              emm_dma.s.card_addr);
+
+       emm_cmd.u = readq(host->base_addr + MIO_EMM_CMD());
+       printf("\nMIO_EMM_CMD:                  0x%016llx\n", emm_cmd.u);
+       printf("\n  62:    skip_busy:           %s\n",
+              emm_cmd.s.skip_busy ? "yes" : "no");
+       printf("    60-61: bus_id:              %u\n", emm_cmd.s.bus_id);
+       printf("    59:    cmd_val:             %s\n",
+              emm_cmd.s.cmd_val ? "yes" : "no");
+       printf("    55:    dbuf:                %u\n", emm_cmd.s.dbuf);
+       printf("    49-54: offset:              %u\n", emm_cmd.s.offset);
+       printf("    41-42: ctype_xor:           %s\n",
+              ctype_xor_str[emm_cmd.s.ctype_xor]);
+       printf("    38-40: rtype_xor:           %s\n",
+              rtype_xor_str[emm_cmd.s.rtype_xor]);
+       printf("    32-37: cmd_idx:             %u\n", emm_cmd.s.cmd_idx);
+       printf("    0-31:  arg:                 0x%x\n", emm_cmd.s.arg);
+
+       emm_rsp_sts.u = readq(host->base_addr + MIO_EMM_RSP_STS());
+       printf("\nMIO_EMM_RSP_STS:              0x%016llx\n", emm_rsp_sts.u);
+       printf("    60-61: bus_id:              %u\n", emm_rsp_sts.s.bus_id);
+       printf("    59:    cmd_val:             %s\n",
+              emm_rsp_sts.s.cmd_val ? "yes" : "no");
+       printf("    58:    switch_val:          %s\n",
+              emm_rsp_sts.s.switch_val ? "yes" : "no");
+       printf("    57:    dma_val:             %s\n",
+              emm_rsp_sts.s.dma_val ? "yes" : "no");
+       printf("    56:    dma_pend:            %s\n",
+              emm_rsp_sts.s.dma_pend ? "yes" : "no");
+       printf("    28:    dbuf_err:            %s\n",
+              emm_rsp_sts.s.dbuf_err ? "yes" : "no");
+       printf("    23:    dbuf:                %u\n", emm_rsp_sts.s.dbuf);
+       printf("    22:    blk_timeout:         %s\n",
+              emm_rsp_sts.s.blk_timeout ? "yes" : "no");
+       printf("    21:    blk_crc_err:         %s\n",
+              emm_rsp_sts.s.blk_crc_err ? "yes" : "no");
+       printf("    20:    rsp_busybit:         %s\n",
+              emm_rsp_sts.s.rsp_busybit ? "yes" : "no");
+       printf("    19:    stp_timeout:         %s\n",
+              emm_rsp_sts.s.stp_timeout ? "yes" : "no");
+       printf("    18:    stp_crc_err:         %s\n",
+              emm_rsp_sts.s.stp_crc_err ? "yes" : "no");
+       printf("    17:    stp_bad_sts:         %s\n",
+              emm_rsp_sts.s.stp_bad_sts ? "yes" : "no");
+       printf("    16:    stp_val:             %s\n",
+              emm_rsp_sts.s.stp_val ? "yes" : "no");
+       printf("    15:    rsp_timeout:         %s\n",
+              emm_rsp_sts.s.rsp_timeout ? "yes" : "no");
+       printf("    14:    rsp_crc_err:         %s\n",
+              emm_rsp_sts.s.rsp_crc_err ? "yes" : "no");
+       printf("    13:    rsp_bad_sts:         %s\n",
+              emm_rsp_sts.s.rsp_bad_sts ? "yes" : "no");
+       printf("    12:    rsp_val:             %s\n",
+              emm_rsp_sts.s.rsp_val ? "yes" : "no");
+       printf("    9-11:  rsp_type:            %s\n",
+              rtype_xor_str[emm_rsp_sts.s.rsp_type]);
+       printf("    7-8:   cmd_type:            %s\n",
+              ctype_xor_str[emm_rsp_sts.s.cmd_type]);
+       printf("    1-6:   cmd_idx:             %u\n",
+              emm_rsp_sts.s.cmd_idx);
+       printf("    0:     cmd_done:            %s\n",
+              emm_rsp_sts.s.cmd_done ? "yes" : "no");
+
+       emm_rsp_lo.u = readq(host->base_addr + MIO_EMM_RSP_LO());
+       printf("\nMIO_EMM_RSP_STS_LO:           0x%016llx\n", emm_rsp_lo.u);
+
+       emm_rsp_hi.u = readq(host->base_addr + MIO_EMM_RSP_HI());
+       printf("\nMIO_EMM_RSP_STS_HI:           0x%016llx\n", emm_rsp_hi.u);
+
+       emm_int.u = readq(host->base_addr + MIO_EMM_INT());
+       printf("\nMIO_EMM_INT:                  0x%016llx\n", emm_int.u);
+       printf("    6:    switch_err:           %s\n",
+              emm_int.s.switch_err ? "yes" : "no");
+       printf("    5:    switch_done:          %s\n",
+              emm_int.s.switch_done ? "yes" : "no");
+       printf("    4:    dma_err:              %s\n",
+              emm_int.s.dma_err ? "yes" : "no");
+       printf("    3:    cmd_err:              %s\n",
+              emm_int.s.cmd_err ? "yes" : "no");
+       printf("    2:    dma_done:             %s\n",
+              emm_int.s.dma_done ? "yes" : "no");
+       printf("    1:    cmd_done:             %s\n",
+              emm_int.s.cmd_done ? "yes" : "no");
+       printf("    0:    buf_done:             %s\n",
+              emm_int.s.buf_done ? "yes" : "no");
+
+       emm_wdog.u = readq(host->base_addr + MIO_EMM_WDOG());
+       printf("\nMIO_EMM_WDOG:                 0x%016llx (%u)\n",
+              emm_wdog.u, emm_wdog.s.clk_cnt);
+
+       if (IS_ENABLED(CONFIG_ARCH_OCTEONTX)) {
+               emm_sample.u = readq(host->base_addr + MIO_EMM_SAMPLE());
+               printf("\nMIO_EMM_SAMPLE:               0x%016llx\n",
+                      emm_sample.u);
+               printf("    16-25: cmd_cnt:             %u\n",
+                      emm_sample.s.cmd_cnt);
+               printf("    0-9:   dat_cnt:             %u\n",
+                      emm_sample.s.dat_cnt);
+       }
+
+       emm_sts_mask.u = readq(host->base_addr + MIO_EMM_STS_MASK());
+       printf("\nMIO_EMM_STS_MASK:             0x%016llx\n", emm_sts_mask.u);
+
+       emm_rca.u = readq(host->base_addr + MIO_EMM_RCA());
+       printf("\nMIO_EMM_RCA:                  0x%016llx\n", emm_rca.u);
+       printf("    0-15:  card_rca:            0x%04x\n",
+              emm_rca.s.card_rca);
+       if (!IS_ENABLED(CONFIG_ARCH_OCTEONTX)) {
+               emm_calb.u = readq(host->base_addr + MIO_EMM_CALB());
+               printf("\nMIO_EMM_CALB:                 0x%016llx\n",
+                      emm_calb.u);
+               printf("       0:  start:               %u\n",
+                      emm_calb.s.start);
+               emm_tap.u = readq(host->base_addr + MIO_EMM_TAP());
+               printf("\nMIO_EMM_TAP:                  0x%016llx\n",
+                      emm_tap.u);
+               printf("     7-0:  delay:               %u\n", emm_tap.s.delay);
+               emm_timing.u = readq(host->base_addr + MIO_EMM_TIMING());
+               printf("\nMIO_EMM_TIMING:               0x%016llx\n",
+                      emm_timing.u);
+               printf("   53-48:  cmd_in_tap:          %u\n",
+                      emm_timing.s.cmd_in_tap);
+               printf("   37-32:  cmd_out_tap:         %u\n",
+                      emm_timing.s.cmd_out_tap);
+               printf("   21-16:  data_in_tap:         %u\n",
+                      emm_timing.s.data_in_tap);
+               printf("     5-0:  data_out_tap:        %u\n",
+                      emm_timing.s.data_out_tap);
+               io_ctl.u = readq(host->base_addr + MIO_EMM_IO_CTL());
+               printf("\nMIO_IO_CTL:                   0x%016llx\n", io_ctl.u);
+               printf("     3-2:  drive:               %u (%u mA)\n",
+                      io_ctl.s.drive, 2 << io_ctl.s.drive);
+               printf("       0:  slew:                %u %s\n", io_ctl.s.slew,
+                      io_ctl.s.slew ? "high" : "low");
+               emm_debug.u = readq(host->base_addr + MIO_EMM_DEBUG());
+               printf("\nMIO_EMM_DEBUG:                0x%016llx\n",
+                      emm_debug.u);
+               printf("      21: rdsync_rst            0x%x\n",
+                      emm_debug.s.rdsync_rst);
+               printf("      20: emmc_clk_disable      0x%x\n",
+                      emm_debug.s.emmc_clk_disable);
+               printf("   19-16: dma_sm:               0x%x\n",
+                      emm_debug.s.dma_sm);
+               printf("   15-12: data_sm:              0x%x\n",
+                      emm_debug.s.data_sm);
+               printf("    11-8: cmd_sm:               0x%x\n",
+                      emm_debug.s.cmd_sm);
+               printf("       0: clk_on:               0x%x\n",
+                      emm_debug.s.clk_on);
+       }
+
+       puts("\n");
+}
+
+/**
+ * Print out all of the register values
+ *
+ * @param mmc  MMC device
+ */
+static void octeontx_mmc_print_registers(struct mmc *mmc)
+{
+#ifdef DEBUG_REGISTERS
+       const int print = 1;
+#else
+       const int print = 0;
+#endif
+       if (print)
+               octeontx_mmc_print_registers2(mmc, mmc_to_host(mmc));
+}
+
+static const struct octeontx_sd_mods octeontx_cr_types[] = {
+{ {0, 0}, {0, 0}, {0, 0} },    /* CMD0 */
+{ {0, 3}, {0, 3}, {0, 0} },    /* CMD1 */
+{ {0, 2}, {0, 2}, {0, 0} },    /* CMD2 */
+{ {0, 1}, {0, 3}, {0, 0} },    /* CMD3 SD_CMD_SEND_RELATIVE_ADDR 0, 2 */
+{ {0, 0}, {0, 0}, {0, 0} },    /* CMD4 */
+{ {0, 1}, {0, 1}, {0, 0} },    /* CMD5 */
+{ {0, 1}, {1, 1}, {0, 1} },    /*
+                                * CMD6 SD_CMD_SWITCH_FUNC 1,0
+                                * (ACMD) SD_APP_SET_BUS_WIDTH
+                                */
+{ {0, 1}, {0, 1}, {0, 0} },    /* CMD7 */
+{ {1, 1}, {0, 3}, {0, 0} },    /* CMD8 SD_CMD_SEND_IF_COND 1,2 */
+{ {0, 2}, {0, 2}, {0, 0} },    /* CMD9 */
+{ {0, 2}, {0, 2}, {0, 0} },    /* CMD10 */
+{ {1, 1}, {0, 1}, {1, 1} },    /* CMD11 SD_CMD_SWITCH_UHS18V 1,0 */
+{ {0, 1}, {0, 1}, {0, 0} },    /* CMD12 */
+{ {0, 1}, {0, 1}, {1, 3} },    /* CMD13 (ACMD)) SD_CMD_APP_SD_STATUS 1,2 */
+{ {1, 1}, {1, 1}, {0, 0} },    /* CMD14 */
+{ {0, 0}, {0, 0}, {0, 0} },    /* CMD15 */
+{ {0, 1}, {0, 1}, {0, 0} },    /* CMD16 */
+{ {1, 1}, {1, 1}, {0, 0} },    /* CMD17 */
+{ {1, 1}, {1, 1}, {0, 0} },    /* CMD18 */
+{ {3, 1}, {3, 1}, {0, 0} },    /* CMD19 */
+{ {2, 1}, {0, 0}, {0, 0} },    /* CMD20 */     /* SD 2,0 */
+{ {0, 0}, {0, 0}, {0, 0} },    /* CMD21 */
+{ {0, 0}, {0, 0}, {1, 1} },    /* CMD22 (ACMD) SD_APP_SEND_NUM_WR_BLKS 1,0 */
+{ {0, 1}, {0, 1}, {0, 1} },    /* CMD23 */     /* SD ACMD 1,0 */
+{ {2, 1}, {2, 1}, {2, 1} },    /* CMD24 */
+{ {2, 1}, {2, 1}, {2, 1} },    /* CMD25 */
+{ {2, 1}, {2, 1}, {2, 1} },    /* CMD26 */
+{ {2, 1}, {2, 1}, {2, 1} },    /* CMD27 */
+{ {0, 1}, {0, 1}, {0, 1} },    /* CMD28 */
+{ {0, 1}, {0, 1}, {0, 1} },    /* CMD29 */
+{ {1, 1}, {1, 1}, {1, 1} },    /* CMD30 */
+{ {1, 1}, {1, 1}, {1, 1} },    /* CMD31 */
+{ {0, 0}, {0, 1}, {0, 0} },    /* CMD32 SD_CMD_ERASE_WR_BLK_START 0,1 */
+{ {0, 0}, {0, 1}, {0, 0} },    /* CMD33 SD_CMD_ERASE_WR_BLK_END 0,1 */
+{ {0, 0}, {0, 0}, {0, 0} },    /* CMD34 */
+{ {0, 1}, {0, 1}, {0, 1} },    /* CMD35 */
+{ {0, 1}, {0, 1}, {0, 1} },    /* CMD36 */
+{ {0, 0}, {0, 0}, {0, 0} },    /* CMD37 */
+{ {0, 1}, {0, 1}, {0, 1} },    /* CMD38 */
+{ {0, 4}, {0, 4}, {0, 4} },    /* CMD39 */
+{ {0, 5}, {0, 5}, {0, 5} },    /* CMD40 */
+{ {0, 0}, {0, 0}, {0, 3} },    /* CMD41 (ACMD) SD_CMD_APP_SEND_OP_COND 0,3 */
+{ {2, 1}, {2, 1}, {2, 1} },    /* CMD42 */
+{ {0, 0}, {0, 0}, {0, 0} },    /* CMD43 */
+{ {0, 0}, {0, 0}, {0, 0} },    /* CMD44 */
+{ {0, 0}, {0, 0}, {0, 0} },    /* CMD45 */
+{ {0, 0}, {0, 0}, {0, 0} },    /* CMD46 */
+{ {0, 0}, {0, 0}, {0, 0} },    /* CMD47 */
+{ {0, 0}, {1, 0}, {0, 0} },    /* CMD48 SD_CMD_READ_EXTR_SINGLE */
+{ {0, 0}, {2, 0}, {0, 0} },    /* CMD49 SD_CMD_WRITE_EXTR_SINGLE */
+{ {0, 0}, {0, 0}, {0, 0} },    /* CMD50 */
+{ {0, 0}, {0, 0}, {1, 1} },    /* CMD51 (ACMD) SD_CMD_APP_SEND_SCR 1,1 */
+{ {0, 0}, {0, 0}, {0, 0} },    /* CMD52 */
+{ {0, 0}, {0, 0}, {0, 0} },    /* CMD53 */
+{ {0, 0}, {0, 0}, {0, 0} },    /* CMD54 */
+{ {0, 1}, {0, 1}, {0, 1} },    /* CMD55 */
+{ {0xff, 0xff}, {0xff, 0xff}, {0xff, 0xff} },  /* CMD56 */
+{ {0, 0}, {0, 0}, {0, 0} },    /* CMD57 */
+{ {0, 0}, {0, 3}, {0, 3} },    /* CMD58 SD_CMD_SPI_READ_OCR 0,3 */
+{ {0, 0}, {0, 1}, {0, 0} },    /* CMD59 SD_CMD_SPI_CRC_ON_OFF 0,1 */
+{ {0, 0}, {0, 0}, {0, 0} },    /* CMD60 */
+{ {0, 0}, {0, 0}, {0, 0} },    /* CMD61 */
+{ {0, 0}, {0, 0}, {0, 0} },    /* CMD62 */
+{ {0, 0}, {0, 0}, {0, 0} }     /* CMD63 */
+};
+
+/**
+ * Returns XOR values needed for SD commands and other quirks
+ *
+ * @param      mmc     mmc device
+ * @param      cmd     command information
+ *
+ * @return octeontx_mmc_cr_mods data structure with various quirks and flags
+ */
+static struct octeontx_mmc_cr_mods
+octeontx_mmc_get_cr_mods(struct mmc *mmc, const struct mmc_cmd *cmd,
+                        const struct mmc_data *data)
+{
+       struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+       struct octeontx_mmc_cr_mods cr = {0, 0};
+       const struct octeontx_sd_mods *sdm =
+                                       &octeontx_cr_types[cmd->cmdidx & 0x3f];
+       u8 c = sdm->mmc.c, r = sdm->mmc.r;
+       u8 desired_ctype = 0;
+
+       if (IS_MMC(mmc)) {
+#ifdef MMC_SUPPORTS_TUNING
+               if (cmd->cmdidx == MMC_CMD_SEND_TUNING_BLOCK_HS200) {
+                       if (cmd->resp_type == MMC_RSP_R1)
+                               cr.rtype_xor = 1;
+                       if (data && data->flags & MMC_DATA_READ)
+                               cr.ctype_xor = 1;
+               }
+#endif
+               return cr;
+       }
+
+       if (cmd->cmdidx == 56)
+               c = (cmd->cmdarg & 1) ? 1 : 2;
+
+       if (data) {
+               if (data->flags & MMC_DATA_READ)
+                       desired_ctype = 1;
+               else if (data->flags & MMC_DATA_WRITE)
+                       desired_ctype = 2;
+       }
+
+       cr.ctype_xor = c ^ desired_ctype;
+       if (slot->is_acmd)
+               cr.rtype_xor = r ^ sdm->sdacmd.r;
+       else
+               cr.rtype_xor = r ^ sdm->sd.r;
+
+       debug("%s(%s): mmc c: %d, mmc r: %d, desired c: %d, xor c: %d, xor r: %d\n",
+             __func__, mmc->dev->name, c, r, desired_ctype,
+             cr.ctype_xor, cr.rtype_xor);
+       return cr;
+}
+
+/**
+ * Keep track of switch commands internally
+ */
+static void octeontx_mmc_track_switch(struct mmc *mmc, u32 cmd_arg)
+{
+       struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+       u8 how = (cmd_arg >> 24) & 3;
+       u8 where = (u8)(cmd_arg >> 16);
+       u8 val = (u8)(cmd_arg >> 8);
+
+       slot->want_switch = slot->cached_switch;
+
+       if (slot->is_acmd)
+               return;
+
+       if (how != 3)
+               return;
+
+       switch (where) {
+       case EXT_CSD_BUS_WIDTH:
+               slot->want_switch.s.bus_width = val;
+               break;
+       case EXT_CSD_POWER_CLASS:
+               slot->want_switch.s.power_class = val;
+               break;
+       case EXT_CSD_HS_TIMING:
+               slot->want_switch.s.hs_timing = 0;
+               slot->want_switch.s.hs200_timing = 0;
+               slot->want_switch.s.hs400_timing = 0;
+               switch (val & 0xf) {
+               case 0:
+                       break;
+               case 1:
+                       slot->want_switch.s.hs_timing = 1;
+                       break;
+               case 2:
+                       if (!slot->is_asim && !slot->is_emul)
+                               slot->want_switch.s.hs200_timing = 1;
+                       break;
+               case 3:
+                       if (!slot->is_asim && !slot->is_emul)
+                               slot->want_switch.s.hs400_timing = 1;
+                       break;
+               default:
+                       pr_err("%s(%s): Unsupported timing mode 0x%x\n",
+                              __func__, mmc->dev->name, val & 0xf);
+                       break;
+               }
+               break;
+       default:
+               break;
+       }
+}
+
+static int octeontx_mmc_print_rsp_errors(struct mmc *mmc,
+                                        union mio_emm_rsp_sts rsp_sts)
+{
+       bool err = false;
+       const char *name = mmc->dev->name;
+
+       if (rsp_sts.s.acc_timeout) {
+               pr_warn("%s(%s): acc_timeout\n", __func__, name);
+               err = true;
+       }
+       if (rsp_sts.s.dbuf_err) {
+               pr_warn("%s(%s): dbuf_err\n", __func__, name);
+               err = true;
+       }
+       if (rsp_sts.s.blk_timeout) {
+               pr_warn("%s(%s): blk_timeout\n", __func__, name);
+               err = true;
+       }
+       if (rsp_sts.s.blk_crc_err) {
+               pr_warn("%s(%s): blk_crc_err\n", __func__, name);
+               err = true;
+       }
+       if (rsp_sts.s.stp_timeout) {
+               pr_warn("%s(%s): stp_timeout\n", __func__, name);
+               err = true;
+       }
+       if (rsp_sts.s.stp_crc_err) {
+               pr_warn("%s(%s): stp_crc_err\n", __func__, name);
+               err = true;
+       }
+       if (rsp_sts.s.stp_bad_sts) {
+               pr_warn("%s(%s): stp_bad_sts\n", __func__, name);
+               err = true;
+       }
+       if (err)
+               pr_warn("  rsp_sts: 0x%llx\n", rsp_sts.u);
+
+       return err ? -1 : 0;
+}
+
+/**
+ * Starts a DMA operation for block read/write
+ *
+ * @param      mmc     mmc device
+ * @param      write   true if write operation
+ * @param      clear   true to clear DMA operation
+ * @param      adr     source or destination DMA address
+ * @param      size    size in blocks
+ * @param      timeout timeout in ms
+ */
+static void octeontx_mmc_start_dma(struct mmc *mmc, bool write,
+                                  bool clear, u32 block, dma_addr_t adr,
+                                  u32 size, int timeout)
+{
+       const struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+       union mio_emm_dma_cfg emm_dma_cfg;
+       union mio_emm_dma_adr emm_dma_adr;
+       union mio_emm_dma emm_dma;
+
+       /* Clear any interrupts */
+       write_csr(mmc, MIO_EMM_DMA_INT(),
+                 read_csr(mmc, MIO_EMM_DMA_INT()));
+
+       emm_dma_cfg.u = 0;
+       emm_dma_cfg.s.en = 1;
+       emm_dma_cfg.s.rw = !!write;
+       emm_dma_cfg.s.clr = !!clear;
+       emm_dma_cfg.s.size = ((u64)(size * mmc->read_bl_len) / 8) - 1;
+#if __BYTE_ORDER != __BIG_ENDIAN
+       emm_dma_cfg.s.endian = 1;
+#endif
+       emm_dma_adr.u = 0;
+       emm_dma_adr.s.adr = adr;
+       write_csr(mmc, MIO_EMM_DMA_ADR(), emm_dma_adr.u);
+       write_csr(mmc, MIO_EMM_DMA_CFG(), emm_dma_cfg.u);
+
+       emm_dma.u = 0;
+       emm_dma.s.bus_id = slot->bus_id;
+       emm_dma.s.dma_val = 1;
+       emm_dma.s.rw = !!write;
+       emm_dma.s.sector = mmc->high_capacity ? 1 : 0;
+
+       if (size > 1 && ((IS_SD(mmc) && (mmc->scr[0] & 2)) || !IS_SD(mmc)))
+               emm_dma.s.multi = 1;
+       else
+               emm_dma.s.multi = 0;
+
+       emm_dma.s.block_cnt = size;
+       if (!mmc->high_capacity)
+               block *= mmc->read_bl_len;
+       emm_dma.s.card_addr = block;
+       debug("%s(%s): card address: 0x%x, size: %d, multi: %d\n",
+             __func__, mmc->dev->name, block, size, emm_dma.s.multi);
+
+       if (timeout > 0)
+               timeout = (timeout * 1000) - 1000;
+       set_wdog(mmc, timeout);
+
+       debug("  Writing 0x%llx to mio_emm_dma\n", emm_dma.u);
+       write_csr(mmc, MIO_EMM_DMA(), emm_dma.u);
+}
+
+/**
+ * Waits for a DMA operation to complete
+ *
+ * @param      mmc     mmc device
+ * @param      timeout timeout in ms
+ *
+ * @return     0 for success (could be DMA errors), -ETIMEDOUT on timeout
+ */
+
+/**
+ * Cleanup DMA engine after a failure
+ *
+ * @param      mmc     mmc device
+ * @param      rsp_sts rsp status
+ */
+static void octeontx_mmc_cleanup_dma(struct mmc *mmc,
+                                    union mio_emm_rsp_sts rsp_sts)
+{
+       struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+       union mio_emm_dma emm_dma;
+       ulong start;
+       int retries = 3;
+
+       do {
+               debug("%s(%s): rsp_sts: 0x%llx, rsp_lo: 0x%llx, dma_int: 0x%llx\n",
+                     __func__, mmc->dev->name, rsp_sts.u,
+                     read_csr(mmc, MIO_EMM_RSP_LO()),
+                     read_csr(mmc, MIO_EMM_DMA_INT()));
+               emm_dma.u = read_csr(mmc, MIO_EMM_DMA());
+               emm_dma.s.dma_val = 1;
+               emm_dma.s.dat_null = 1;
+               emm_dma.s.bus_id = slot->bus_id;
+               write_csr(mmc, MIO_EMM_DMA(), emm_dma.u);
+               start = get_timer(0);
+               do {
+                       rsp_sts.u = read_csr(mmc, MIO_EMM_RSP_STS());
+                       WATCHDOG_RESET();
+               } while (get_timer(start) < 100 &&
+                        (rsp_sts.s.dma_val || rsp_sts.s.dma_pend));
+       } while (retries-- >= 0 && rsp_sts.s.dma_pend);
+       if (rsp_sts.s.dma_val)
+               pr_err("%s(%s): Error: could not clean up DMA.  RSP_STS: 0x%llx, RSP_LO: 0x%llx\n",
+                      __func__, mmc->dev->name, rsp_sts.u,
+                      read_csr(mmc, MIO_EMM_RSP_LO()));
+       debug("  rsp_sts after clearing up DMA: 0x%llx\n",
+             read_csr(mmc, MIO_EMM_RSP_STS()));
+}
+
+/**
+ * Waits for a DMA operation to complete
+ *
+ * @param      mmc     mmc device
+ * @param      timeout timeout in ms
+ * @param      verbose true to print out error information
+ *
+ * @return     0 for success (could be DMA errors), -ETIMEDOUT on timeout
+ *             or -EIO if IO error.
+ */
+static int octeontx_mmc_wait_dma(struct mmc *mmc, bool write, ulong timeout,
+                                bool verbose)
+{
+       struct octeontx_mmc_host *host = mmc_to_host(mmc);
+       ulong start_time = get_timer(0);
+       union mio_emm_dma_int emm_dma_int;
+       union mio_emm_rsp_sts rsp_sts;
+       union mio_emm_dma emm_dma;
+       bool timed_out = false;
+       bool err = false;
+
+       debug("%s(%s, %lu, %d), delay: %uus\n", __func__, mmc->dev->name,
+             timeout, verbose, host->dma_wait_delay);
+
+       udelay(host->dma_wait_delay);
+       do {
+               emm_dma_int.u = read_csr(mmc, MIO_EMM_DMA_INT());
+               rsp_sts.u = read_csr(mmc, MIO_EMM_RSP_STS());
+               if (write) {
+                       if ((rsp_sts.s.dma_pend && !rsp_sts.s.dma_val) ||
+                           rsp_sts.s.blk_timeout ||
+                           rsp_sts.s.stp_timeout ||
+                           rsp_sts.s.rsp_timeout) {
+                               err = true;
+#ifdef DEBUG
+                               debug("%s: f1\n", __func__);
+                               octeontx_mmc_print_rsp_errors(mmc, rsp_sts);
+#endif
+                               break;
+                       }
+               } else {
+                       if (rsp_sts.s.blk_crc_err ||
+                           (rsp_sts.s.dma_pend && !rsp_sts.s.dma_val)) {
+                               err = true;
+#if defined(DEBUG)
+                               octeontx_mmc_print_rsp_errors(mmc, rsp_sts);
+#endif
+                               break;
+                       }
+               }
+               if (rsp_sts.s.dma_pend) {
+                       /*
+                        * If this is set then an error has occurred.
+                        * Try and restart the DMA operation.
+                        */
+                       emm_dma.u = read_csr(mmc, MIO_EMM_DMA());
+                       if (verbose) {
+                               pr_err("%s(%s): DMA pending error: rsp_sts: 0x%llx, dma_int: 0x%llx, emm_dma: 0x%llx\n",
+                                      __func__, mmc->dev->name, rsp_sts.u,
+                                      emm_dma_int.u, emm_dma.u);
+                               octeontx_print_rsp_sts(mmc);
+                               debug("  MIO_EMM_DEBUG: 0x%llx\n",
+                                     read_csr(mmc, MIO_EMM_DEBUG()));
+                               pr_err("%s: Trying DMA resume...\n", __func__);
+                       }
+                       emm_dma.s.dma_val = 1;
+                       emm_dma.s.dat_null = 1;
+                       write_csr(mmc, MIO_EMM_DMA(), emm_dma.u);
+                       udelay(10);
+               } else if (!rsp_sts.s.dma_val && emm_dma_int.s.done) {
+                       break;
+               }
+               WATCHDOG_RESET();
+               timed_out = (get_timer(start_time) > timeout);
+       } while (!timed_out);
+
+       if (timed_out || err) {
+               if (verbose) {
+                       pr_err("%s(%s): MMC DMA %s after %lu ms, rsp_sts: 0x%llx, dma_int: 0x%llx, rsp_sts_lo: 0x%llx, emm_dma: 0x%llx\n",
+                              __func__, mmc->dev->name,
+                              timed_out ? "timed out" : "error",
+                              get_timer(start_time), rsp_sts.u,
+                              emm_dma_int.u,
+                              read_csr(mmc, MIO_EMM_RSP_LO()),
+                              read_csr(mmc, MIO_EMM_DMA()));
+                       octeontx_print_rsp_sts(mmc);
+               }
+               if (rsp_sts.s.dma_pend)
+                       octeontx_mmc_cleanup_dma(mmc, rsp_sts);
+       } else {
+               write_csr(mmc, MIO_EMM_DMA_INT(),
+                         read_csr(mmc, MIO_EMM_DMA_INT()));
+       }
+
+       return timed_out ? -ETIMEDOUT : (err ? -EIO : 0);
+}
+
+/**
+ * Read blocks from the MMC/SD device
+ *
+ * @param      mmc     mmc device
+ * @param      cmd     command
+ * @param      data    data for read
+ * @param      verbose true to print out error information
+ *
+ * @return     number of blocks read or 0 if error
+ */
+static int octeontx_mmc_read_blocks(struct mmc *mmc, struct mmc_cmd *cmd,
+                                   struct mmc_data *data, bool verbose)
+{
+       struct octeontx_mmc_host *host = mmc_to_host(mmc);
+       union mio_emm_rsp_sts rsp_sts;
+       dma_addr_t dma_addr = (dma_addr_t)dm_pci_virt_to_mem(host->dev,
+                                                            data->dest);
+       ulong count;
+       ulong blkcnt = data->blocks;
+       ulong start = cmd->cmdarg;
+       int timeout = 1000 + blkcnt * 20;
+       bool timed_out = false;
+       bool multi_xfer = cmd->cmdidx == MMC_CMD_READ_MULTIPLE_BLOCK;
+
+       debug("%s(%s): dest: %p, dma address: 0x%llx, blkcnt: %lu, start: %lu\n",
+             __func__, mmc->dev->name, data->dest, dma_addr, blkcnt, start);
+       debug("%s: rsp_sts: 0x%llx\n", __func__,
+             read_csr(mmc, MIO_EMM_RSP_STS()));
+       /* use max timeout for multi-block transfers */
+       /* timeout = 0; */
+
+       /*
+        * If we have a valid SD card in the slot, we set the response bit
+        * mask to check for CRC errors and timeouts only.
+        * Otherwise, use the default power on reset value.
+        */
+       write_csr(mmc, MIO_EMM_STS_MASK(),
+                 IS_SD(mmc) ? 0x00b00000ull : 0xe4390080ull);
+       invalidate_dcache_range((u64)data->dest,
+                               (u64)data->dest + blkcnt * data->blocksize);
+
+       if (multi_xfer) {
+               octeontx_mmc_start_dma(mmc, false, false, start, dma_addr,
+                                      blkcnt, timeout);
+               timed_out = !!octeontx_mmc_wait_dma(mmc, false, timeout,
+                                                   verbose);
+               rsp_sts.u = read_csr(mmc, MIO_EMM_RSP_STS());
+               if (timed_out || rsp_sts.s.dma_val || rsp_sts.s.dma_pend) {
+                       if (!verbose)
+                               return 0;
+
+                       pr_err("%s(%s): Error: DMA timed out.  rsp_sts: 0x%llx, emm_int: 0x%llx, dma_int: 0x%llx, rsp_lo: 0x%llx\n",
+                              __func__, mmc->dev->name, rsp_sts.u,
+                              read_csr(mmc, MIO_EMM_INT()),
+                              read_csr(mmc, MIO_EMM_DMA_INT()),
+                              read_csr(mmc, MIO_EMM_RSP_LO()));
+                       pr_err("%s: block count: %lu, start: 0x%lx\n",
+                              __func__, blkcnt, start);
+                       octeontx_mmc_print_registers(mmc);
+                       return 0;
+               }
+       } else {
+               count = blkcnt;
+               timeout = 1000;
+               do {
+                       octeontx_mmc_start_dma(mmc, false, false, start,
+                                              dma_addr, 1, timeout);
+                       dma_addr += mmc->read_bl_len;
+                       start++;
+
+                       timed_out = !!octeontx_mmc_wait_dma(mmc, false,
+                                                           timeout, verbose);
+                       rsp_sts.u = read_csr(mmc, MIO_EMM_RSP_STS());
+                       if (timed_out || rsp_sts.s.dma_val ||
+                           rsp_sts.s.dma_pend) {
+                               if (verbose) {
+                                       pr_err("%s: Error: DMA timed out.  rsp_sts: 0x%llx, emm_int: 0x%llx, dma_int: 0x%llx, rsp_lo: 0x%llx\n",
+                                              __func__, rsp_sts.u,
+                                              read_csr(mmc, MIO_EMM_INT()),
+                                              read_csr(mmc, MIO_EMM_DMA_INT()),
+                                              read_csr(mmc, MIO_EMM_RSP_LO()));
+                                       pr_err("%s: block count: 1, start: 0x%lx\n",
+                                              __func__, start);
+                                       octeontx_mmc_print_registers(mmc);
+                               }
+                               return blkcnt - count;
+                       }
+                       WATCHDOG_RESET();
+               } while (--count);
+       }
+#ifdef DEBUG
+       debug("%s(%s): Read %lu (0x%lx) blocks starting at block %u (0x%x) to address %p (dma address 0x%llx)\n",
+             __func__, mmc->dev->name, blkcnt, blkcnt,
+             cmd->cmdarg, cmd->cmdarg, data->dest,
+             dm_pci_virt_to_mem(host->dev, data->dest));
+       print_buffer(0, data->dest, 1, 0x200, 0);
+#endif
+       return blkcnt;
+}
+
+static int octeontx_mmc_poll_ready(struct mmc *mmc, ulong timeout)
+{
+       ulong start;
+       struct mmc_cmd cmd;
+       int err;
+       bool not_ready = false;
+
+       memset(&cmd, 0, sizeof(cmd));
+       cmd.cmdidx = MMC_CMD_SEND_STATUS;
+       cmd.cmdarg = mmc->rca << 16;
+       cmd.resp_type = MMC_RSP_R1;
+       start = get_timer(0);
+       do {
+               err = octeontx_mmc_send_cmd(mmc, &cmd, NULL);
+               if (err) {
+                       pr_err("%s(%s): MMC command error: %d; Retry...\n",
+                              __func__, mmc->dev->name, err);
+                       not_ready = true;
+               } else if (cmd.response[0] & R1_READY_FOR_DATA) {
+                       return 0;
+               }
+               WATCHDOG_RESET();
+       } while (get_timer(start) < timeout);
+
+       if (not_ready)
+               pr_err("%s(%s): MMC command error; Retry timeout\n",
+                      __func__, mmc->dev->name);
+       return -ETIMEDOUT;
+}
+
+static ulong octeontx_mmc_write_blocks(struct mmc *mmc, struct mmc_cmd *cmd,
+                                      struct mmc_data *data)
+{
+       struct octeontx_mmc_host *host = mmc_to_host(mmc);
+       ulong start = cmd->cmdarg;
+       ulong blkcnt = data->blocks;
+       dma_addr_t dma_addr;
+       union mio_emm_rsp_sts rsp_sts;
+       union mio_emm_sts_mask emm_sts_mask;
+       ulong timeout;
+       int count;
+       bool timed_out = false;
+       bool multi_xfer = (blkcnt > 1) &&
+                       ((IS_SD(mmc) && mmc->scr[0] & 2) || !IS_SD(mmc));
+
+       octeontx_mmc_switch_to(mmc);
+       emm_sts_mask.u = 0;
+       emm_sts_mask.s.sts_msk = R1_BLOCK_WRITE_MASK;
+       write_csr(mmc, MIO_EMM_STS_MASK(), emm_sts_mask.u);
+
+       if (octeontx_mmc_poll_ready(mmc, 10000)) {
+               pr_err("%s(%s): Ready timed out\n", __func__, mmc->dev->name);
+               return 0;
+       }
+       flush_dcache_range((u64)data->src,
+                          (u64)data->src + blkcnt * mmc->write_bl_len);
+       dma_addr = (u64)dm_pci_virt_to_mem(host->dev, (void *)data->src);
+       if (multi_xfer) {
+               timeout = 5000 + 100 * blkcnt;
+               octeontx_mmc_start_dma(mmc, true, false, start, dma_addr,
+                                      blkcnt, timeout);
+               timed_out = !!octeontx_mmc_wait_dma(mmc, true, timeout, true);
+               rsp_sts.u = read_csr(mmc, MIO_EMM_RSP_STS());
+               if (timed_out || rsp_sts.s.dma_val || rsp_sts.s.dma_pend) {
+                       pr_err("%s(%s): Error: multi-DMA timed out after %lums.  rsp_sts: 0x%llx, emm_int: 0x%llx, emm_dma_int: 0x%llx, rsp_sts_lo: 0x%llx, emm_dma: 0x%llx\n",
+                              __func__, mmc->dev->name, timeout,
+                              rsp_sts.u,
+                              read_csr(mmc, MIO_EMM_INT()),
+                              read_csr(mmc, MIO_EMM_DMA_INT()),
+                              read_csr(mmc, MIO_EMM_RSP_LO()),
+                              read_csr(mmc, MIO_EMM_DMA()));
+                       return 0;
+               }
+       } else {
+               timeout = 5000;
+               count = blkcnt;
+               do {
+                       octeontx_mmc_start_dma(mmc, true, false, start,
+                                              dma_addr, 1, timeout);
+                       dma_addr += mmc->read_bl_len;
+                       start++;
+
+                       timed_out = !!octeontx_mmc_wait_dma(mmc, true, timeout,
+                                                           true);
+                       rsp_sts.u = read_csr(mmc, MIO_EMM_RSP_STS());
+                       if (timed_out || rsp_sts.s.dma_val ||
+                           rsp_sts.s.dma_pend) {
+                               pr_err("%s(%s): Error: single-DMA timed out after %lums.  rsp_sts: 0x%llx, emm_int: 0x%llx, emm_dma_int: 0x%llx, rsp_sts_lo: 0x%llx, emm_dma: 0x%llx\n",
+                                      __func__, mmc->dev->name, timeout,
+                                      rsp_sts.u,
+                                      read_csr(mmc, MIO_EMM_RSP_STS()),
+                                      read_csr(mmc, MIO_EMM_DMA_INT()),
+                                      read_csr(mmc, MIO_EMM_RSP_LO()),
+                                      read_csr(mmc, MIO_EMM_DMA()));
+                               return blkcnt - count;
+                       }
+                       WATCHDOG_RESET();
+               } while (--count);
+       }
+
+       return blkcnt;
+}
+
+/**
+ * Send a command to the eMMC/SD device
+ *
+ * @param mmc  mmc device
+ * @param cmd  cmd to send and response
+ * @param data additional data
+ * @param flags
+ * @return     0 for success, otherwise error
+ */
+static int octeontx_mmc_send_cmd(struct mmc *mmc, struct mmc_cmd *cmd,
+                                struct mmc_data *data)
+{
+       struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+       const char *name = slot->dev->name;
+       struct octeontx_mmc_cr_mods mods = {0, 0};
+       union mio_emm_rsp_sts rsp_sts;
+       union mio_emm_cmd emm_cmd;
+       union mio_emm_rsp_lo rsp_lo;
+       union mio_emm_buf_idx emm_buf_idx;
+       union mio_emm_buf_dat emm_buf_dat;
+       ulong start;
+       int i;
+       ulong blkcnt;
+
+       /**
+        * This constant has a 1 bit for each command which should have a short
+        * timeout and a 0 for each bit with a long timeout.  Currently the
+        * following commands have a long timeout:
+        *   CMD6, CMD17, CMD18, CMD24, CMD25, CMD32, CMD33, CMD35, CMD36 and
+        *   CMD38.
+        */
+       static const u64 timeout_short = 0xFFFFFFA4FCF9FFDFull;
+       uint timeout;
+
+       if (cmd->cmdidx == MMC_CMD_SEND_EXT_CSD) {
+               union mio_emm_rca emm_rca;
+
+               emm_rca.u = 0;
+               emm_rca.s.card_rca = mmc->rca;
+               write_csr(mmc, MIO_EMM_RCA(), emm_rca.u);
+       }
+
+       if (timeout_short & (1ull << cmd->cmdidx))
+               timeout = MMC_TIMEOUT_SHORT;
+       else if (cmd->cmdidx == MMC_CMD_SWITCH && IS_SD(mmc))
+               timeout = 2560;
+       else if (cmd->cmdidx == MMC_CMD_ERASE)
+               timeout = MMC_TIMEOUT_ERASE;
+       else
+               timeout = MMC_TIMEOUT_LONG;
+
+       debug("%s(%s): cmd idx: %u, arg: 0x%x, resp type: 0x%x, timeout: %u\n",
+             __func__, name, cmd->cmdidx, cmd->cmdarg, cmd->resp_type,
+             timeout);
+       if (data)
+               debug("  data: addr: %p, flags: 0x%x, blocks: %u, blocksize: %u\n",
+                     data->dest, data->flags, data->blocks, data->blocksize);
+
+       octeontx_mmc_switch_to(mmc);
+
+       /* Clear any interrupts */
+       write_csr(mmc, MIO_EMM_INT(), read_csr(mmc, MIO_EMM_INT()));
+
+       /*
+        * We need to override the default command types and response types
+        * when dealing with SD cards.
+        */
+       mods = octeontx_mmc_get_cr_mods(mmc, cmd, data);
+
+       /* Handle block read/write/stop operations */
+       switch (cmd->cmdidx) {
+       case MMC_CMD_GO_IDLE_STATE:
+               slot->tuned = false;
+               slot->hs200_tuned = false;
+               slot->hs400_tuned = false;
+               break;
+       case MMC_CMD_STOP_TRANSMISSION:
+               return 0;
+       case MMC_CMD_READ_MULTIPLE_BLOCK:
+       case MMC_CMD_READ_SINGLE_BLOCK:
+               pr_debug("%s(%s): Reading blocks\n", __func__, name);
+               blkcnt = octeontx_mmc_read_blocks(mmc, cmd, data, true);
+               return (blkcnt > 0) ? 0 : -1;
+       case MMC_CMD_WRITE_MULTIPLE_BLOCK:
+       case MMC_CMD_WRITE_SINGLE_BLOCK:
+               blkcnt = octeontx_mmc_write_blocks(mmc, cmd, data);
+               return (blkcnt > 0) ? 0 : -1;
+       case MMC_CMD_SELECT_CARD:
+               /* Set the RCA register (is it set automatically?) */
+               if (IS_SD(mmc)) {
+                       union mio_emm_rca emm_rca;
+
+                       emm_rca.u = 0;
+                       emm_rca.s.card_rca = (cmd->cmdarg >> 16);
+                       write_csr(mmc, MIO_EMM_RCA(), emm_rca.u);
+                       debug("%s: Set SD relative address (RCA) to 0x%x\n",
+                             __func__, emm_rca.s.card_rca);
+               }
+               break;
+
+       case MMC_CMD_SWITCH:
+               if (!data && !slot->is_acmd)
+                       octeontx_mmc_track_switch(mmc, cmd->cmdarg);
+               break;
+       }
+
+       emm_cmd.u = 0;
+       emm_cmd.s.cmd_val = 1;
+       emm_cmd.s.bus_id = slot->bus_id;
+       emm_cmd.s.cmd_idx = cmd->cmdidx;
+       emm_cmd.s.arg = cmd->cmdarg;
+       emm_cmd.s.ctype_xor = mods.ctype_xor;
+       emm_cmd.s.rtype_xor = mods.rtype_xor;
+       if (data && data->blocks == 1 && data->blocksize != 512) {
+               emm_cmd.s.offset =
+                       64 - ((data->blocks * data->blocksize) / 8);
+               debug("%s: offset set to %u\n", __func__, emm_cmd.s.offset);
+       }
+
+       if (data && data->flags & MMC_DATA_WRITE) {
+               u8 *src = (u8 *)data->src;
+
+               if (!src) {
+                       pr_err("%s(%s): Error: data source for cmd 0x%x is NULL!\n",
+                              __func__, name, cmd->cmdidx);
+                       return -1;
+               }
+               if (data->blocksize > 512) {
+                       pr_err("%s(%s): Error: data for cmd 0x%x exceeds 512 bytes\n",
+                              __func__, name, cmd->cmdidx);
+                       return -1;
+               }
+#ifdef DEBUG
+               debug("%s: Sending %d bytes data\n", __func__, data->blocksize);
+               print_buffer(0, src, 1, data->blocksize, 0);
+#endif
+               emm_buf_idx.u = 0;
+               emm_buf_idx.s.inc = 1;
+               write_csr(mmc, MIO_EMM_BUF_IDX(), emm_buf_idx.u);
+               for (i = 0; i < (data->blocksize + 7) / 8; i++) {
+                       memcpy(&emm_buf_dat.u, src, sizeof(emm_buf_dat.u));
+                       write_csr(mmc, MIO_EMM_BUF_DAT(),
+                                 cpu_to_be64(emm_buf_dat.u));
+                       src += sizeof(emm_buf_dat.u);
+               }
+               write_csr(mmc, MIO_EMM_BUF_IDX(), 0);
+       }
+       debug("%s(%s): Sending command %u (emm_cmd: 0x%llx)\n", __func__,
+             name, cmd->cmdidx, emm_cmd.u);
+       set_wdog(mmc, timeout * 1000);
+       write_csr(mmc, MIO_EMM_CMD(), emm_cmd.u);
+
+       /* Wait for command to finish or time out */
+       start = get_timer(0);
+       do {
+               rsp_sts.u = read_csr(mmc, MIO_EMM_RSP_STS());
+               WATCHDOG_RESET();
+       } while (!rsp_sts.s.cmd_done && !rsp_sts.s.rsp_timeout &&
+                (get_timer(start) < timeout + 10));
+       octeontx_mmc_print_rsp_errors(mmc, rsp_sts);
+       if (rsp_sts.s.rsp_timeout || !rsp_sts.s.cmd_done) {
+               debug("%s(%s): Error: command %u(0x%x) timed out.  rsp_sts: 0x%llx\n",
+                     __func__, name, cmd->cmdidx, cmd->cmdarg, rsp_sts.u);
+               octeontx_mmc_print_registers(mmc);
+               return -ETIMEDOUT;
+       }
+       if (rsp_sts.s.rsp_crc_err) {
+               debug("%s(%s): RSP CRC error, rsp_sts: 0x%llx, cmdidx: %u, arg: 0x%08x\n",
+                     __func__, name, rsp_sts.u, cmd->cmdidx, cmd->cmdarg);
+               octeontx_mmc_print_registers(mmc);
+               return -1;
+       }
+       if (slot->bus_id != rsp_sts.s.bus_id) {
+               pr_warn("%s(%s): bus id mismatch, got %d, expected %d for command 0x%x(0x%x)\n",
+                       __func__, name,
+                       rsp_sts.s.bus_id, slot->bus_id,
+                       cmd->cmdidx, cmd->cmdarg);
+               goto error;
+       }
+       if (rsp_sts.s.rsp_bad_sts) {
+               rsp_lo.u = read_csr(mmc, MIO_EMM_RSP_LO());
+               debug("%s: Bad response for bus id %d, cmd id %d:\n"
+                     "    rsp_timeout: %d\n"
+                     "    rsp_bad_sts: %d\n"
+                     "    rsp_crc_err: %d\n",
+                     __func__, slot->bus_id, cmd->cmdidx,
+                     rsp_sts.s.rsp_timeout,
+                     rsp_sts.s.rsp_bad_sts,
+                     rsp_sts.s.rsp_crc_err);
+               if (rsp_sts.s.rsp_type == 1 && rsp_sts.s.rsp_bad_sts) {
+                       debug("    Response status: 0x%llx\n",
+                             (rsp_lo.u >> 8) & 0xffffffff);
+#ifdef DEBUG
+                       mmc_print_status((rsp_lo.u >> 8) & 0xffffffff);
+#endif
+               }
+               goto error;
+       }
+       if (rsp_sts.s.cmd_idx != cmd->cmdidx) {
+               debug("%s(%s): Command response index %d does not match command index %d\n",
+                     __func__, name, rsp_sts.s.cmd_idx, cmd->cmdidx);
+               octeontx_print_rsp_sts(mmc);
+               debug("%s: rsp_lo: 0x%llx\n", __func__,
+                     read_csr(mmc, MIO_EMM_RSP_LO()));
+
+               goto error;
+       }
+
+       slot->is_acmd = (cmd->cmdidx == MMC_CMD_APP_CMD);
+
+       if (!cmd->resp_type & MMC_RSP_PRESENT)
+               debug("  Response type: 0x%x, no response expected\n",
+                     cmd->resp_type);
+       /* Get the response if present */
+       if (rsp_sts.s.rsp_val && (cmd->resp_type & MMC_RSP_PRESENT)) {
+               union mio_emm_rsp_hi rsp_hi;
+
+               rsp_lo.u = read_csr(mmc, MIO_EMM_RSP_LO());
+
+               switch (rsp_sts.s.rsp_type) {
+               case 1:
+               case 3:
+               case 4:
+               case 5:
+                       cmd->response[0] = (rsp_lo.u >> 8) & 0xffffffffull;
+                       debug("  response: 0x%08x\n",
+                             cmd->response[0]);
+                       cmd->response[1] = 0;
+                       cmd->response[2] = 0;
+                       cmd->response[3] = 0;
+                       break;
+               case 2:
+                       cmd->response[3] = rsp_lo.u & 0xffffffff;
+                       cmd->response[2] = (rsp_lo.u >> 32) & 0xffffffff;
+                       rsp_hi.u = read_csr(mmc, MIO_EMM_RSP_HI());
+                       cmd->response[1] = rsp_hi.u & 0xffffffff;
+                       cmd->response[0] = (rsp_hi.u >> 32) & 0xffffffff;
+                       debug("  response: 0x%08x 0x%08x 0x%08x 0x%08x\n",
+                             cmd->response[0], cmd->response[1],
+                             cmd->response[2], cmd->response[3]);
+                       break;
+               default:
+                       pr_err("%s(%s): Unknown response type 0x%x for command %d, arg: 0x%x, rsp_sts: 0x%llx\n",
+                              __func__, name, rsp_sts.s.rsp_type, cmd->cmdidx,
+                              cmd->cmdarg, rsp_sts.u);
+                       return -1;
+               }
+       } else {
+               debug("  Response not expected\n");
+       }
+
+       if (data && data->flags & MMC_DATA_READ) {
+               u8 *dest = (u8 *)data->dest;
+
+               if (!dest) {
+                       pr_err("%s(%s): Error, destination buffer NULL!\n",
+                              __func__, mmc->dev->name);
+                       goto error;
+               }
+               if (data->blocksize > 512) {
+                       printf("%s(%s): Error: data size %u exceeds 512\n",
+                              __func__, mmc->dev->name,
+                              data->blocksize);
+                       goto error;
+               }
+               emm_buf_idx.u = 0;
+               emm_buf_idx.s.inc = 1;
+               write_csr(mmc, MIO_EMM_BUF_IDX(), emm_buf_idx.u);
+               for (i = 0; i < (data->blocksize + 7) / 8; i++) {
+                       emm_buf_dat.u = read_csr(mmc, MIO_EMM_BUF_DAT());
+                       emm_buf_dat.u = be64_to_cpu(emm_buf_dat.u);
+                       memcpy(dest, &emm_buf_dat.u, sizeof(emm_buf_dat.u));
+                       dest += sizeof(emm_buf_dat.u);
+               }
+               write_csr(mmc, MIO_EMM_BUF_IDX(), 0);
+#ifdef DEBUG
+               debug("%s: Received %d bytes data\n", __func__,
+                     data->blocksize);
+               print_buffer(0, data->dest, 1, data->blocksize, 0);
+#endif
+       }
+
+       return 0;
+error:
+#ifdef DEBUG
+       octeontx_mmc_print_registers(mmc);
+#endif
+       return -1;
+}
+
+static int octeontx_mmc_dev_send_cmd(struct udevice *dev, struct mmc_cmd *cmd,
+                                    struct mmc_data *data)
+{
+       return octeontx_mmc_send_cmd(dev_to_mmc(dev), cmd, data);
+}
+
+#ifdef MMC_SUPPORTS_TUNING
+static int octeontx_mmc_test_cmd(struct mmc *mmc, u32 opcode, int *statp)
+{
+       struct mmc_cmd cmd;
+       int err;
+
+       memset(&cmd, 0, sizeof(cmd));
+
+       debug("%s(%s, %u, %p)\n", __func__, mmc->dev->name, opcode, statp);
+       cmd.cmdidx = opcode;
+       cmd.resp_type = MMC_RSP_R1;
+       cmd.cmdarg = mmc->rca << 16;
+
+       err = octeontx_mmc_send_cmd(mmc, &cmd, NULL);
+       if (err)
+               debug("%s(%s, %u) returned %d\n", __func__,
+                     mmc->dev->name, opcode, err);
+       if (statp)
+               *statp = cmd.response[0];
+       return err;
+}
+
+static int octeontx_mmc_test_get_ext_csd(struct mmc *mmc, u32 opcode,
+                                        int *statp)
+{
+       struct mmc_cmd cmd;
+       struct mmc_data data;
+       int err;
+       u8 ext_csd[MMC_MAX_BLOCK_LEN];
+
+       debug("%s(%s, %u, %p)\n",  __func__, mmc->dev->name, opcode, statp);
+       memset(&cmd, 0, sizeof(cmd));
+
+       cmd.cmdidx = MMC_CMD_SEND_EXT_CSD;
+       cmd.resp_type = MMC_RSP_R1;
+       cmd.cmdarg = 0;
+
+       data.dest = (char *)ext_csd;
+       data.blocks = 1;
+       data.blocksize = MMC_MAX_BLOCK_LEN;
+       data.flags = MMC_DATA_READ;
+
+       err = octeontx_mmc_send_cmd(mmc, &cmd, &data);
+       if (statp)
+               *statp = cmd.response[0];
+
+       return err;
+}
+
+/**
+ * Wrapper to set the MIO_EMM_TIMING register
+ *
+ * @param      mmc             pointer to mmc data structure
+ * @param      emm_timing      New emm_timing register value
+ *
+ * On some devices it is possible that changing the data out value can
+ * cause a glitch on an internal fifo.  This works around this problem
+ * by performing a soft-reset immediately before setting the timing register.
+ *
+ * Note: this function should not be called from any function that
+ * performs DMA or block operations since not all registers are
+ * preserved.
+ */
+static void octeontx_mmc_set_emm_timing(struct mmc *mmc,
+                                       union mio_emm_timing emm_timing)
+{
+       union mio_emm_cfg emm_cfg;
+       struct octeontx_mmc_slot *slot = mmc->priv;
+       union mio_emm_debug emm_debug;
+
+       debug("%s(%s, 0x%llx) din: %u\n", __func__, mmc->dev->name,
+             emm_timing.u, emm_timing.s.data_in_tap);
+
+       udelay(1);
+       if (slot->host->tap_requires_noclk) {
+               /* Turn off the clock */
+               emm_debug.u = read_csr(mmc, MIO_EMM_DEBUG());
+               emm_debug.s.emmc_clk_disable = 1;
+               write_csr(mmc, MIO_EMM_DEBUG(), emm_debug.u);
+               udelay(1);
+               emm_debug.s.rdsync_rst = 1;
+               write_csr(mmc, MIO_EMM_DEBUG(), emm_debug.u);
+       }
+       emm_cfg.u = read_csr(mmc, MIO_EMM_CFG());
+       emm_cfg.s.bus_ena = 1 << 3;
+       write_csr(mmc, MIO_EMM_CFG(), emm_cfg.u);
+
+       udelay(1);
+       write_csr(mmc, MIO_EMM_TIMING(), emm_timing.u);
+       udelay(1);
+
+       if (slot->host->tap_requires_noclk) {
+               /* Turn on the clock */
+               emm_debug.s.rdsync_rst = 0;
+               write_csr(mmc, MIO_EMM_DEBUG(), emm_debug.u);
+               udelay(1);
+               emm_debug.s.emmc_clk_disable = 0;
+               write_csr(mmc, MIO_EMM_DEBUG(), emm_debug.u);
+               udelay(1);
+       }
+       emm_cfg.s.bus_ena = 1 << mmc_to_slot(mmc)->bus_id;
+       write_csr(mmc, MIO_EMM_CFG(), emm_cfg.u);
+}
+
+static const u8 octeontx_hs400_tuning_block[512] = {
+       0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00,
+       0xff, 0xff, 0xcc, 0xcc, 0xcc, 0x33, 0xcc, 0xcc,
+       0xcc, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xff, 0xff,
+       0xff, 0xee, 0xff, 0xff, 0xff, 0xee, 0xee, 0xff,
+       0xff, 0xff, 0xdd, 0xff, 0xff, 0xff, 0xdd, 0xdd,
+       0xff, 0xff, 0xff, 0xbb, 0xff, 0xff, 0xff, 0xbb,
+       0xbb, 0xff, 0xff, 0xff, 0x77, 0xff, 0xff, 0xff,
+       0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee, 0xff,
+       0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00,
+       0x00, 0xff, 0xff, 0xcc, 0xcc, 0xcc, 0x33, 0xcc,
+       0xcc, 0xcc, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xff,
+       0xff, 0xff, 0xee, 0xff, 0xff, 0xff, 0xee, 0xee,
+       0xff, 0xff, 0xff, 0xdd, 0xff, 0xff, 0xff, 0xdd,
+       0xdd, 0xff, 0xff, 0xff, 0xbb, 0xff, 0xff, 0xff,
+       0xbb, 0xbb, 0xff, 0xff, 0xff, 0x77, 0xff, 0xff,
+       0xff, 0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee,
+       0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00,
+       0xff, 0xff, 0xcc, 0xcc, 0xcc, 0x33, 0xcc, 0xcc,
+       0xcc, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xff, 0xff,
+       0xff, 0xee, 0xff, 0xff, 0xff, 0xee, 0xee, 0xff,
+       0xff, 0xff, 0xdd, 0xff, 0xff, 0xff, 0xdd, 0xdd,
+       0xff, 0xff, 0xff, 0xbb, 0xff, 0xff, 0xff, 0xbb,
+       0xbb, 0xff, 0xff, 0xff, 0x77, 0xff, 0xff, 0xff,
+       0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee, 0xff,
+       0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00,
+       0x00, 0xff, 0xff, 0xcc, 0xcc, 0xcc, 0x33, 0xcc,
+       0xcc, 0xcc, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xff,
+       0xff, 0xff, 0xee, 0xff, 0xff, 0xff, 0xee, 0xee,
+       0xff, 0xff, 0xff, 0xdd, 0xff, 0xff, 0xff, 0xdd,
+       0xdd, 0xff, 0xff, 0xff, 0xbb, 0xff, 0xff, 0xff,
+       0xbb, 0xbb, 0xff, 0xff, 0xff, 0x77, 0xff, 0xff,
+       0xff, 0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee,
+       0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00,
+       0xff, 0xff, 0xcc, 0xcc, 0xcc, 0x33, 0xcc, 0xcc,
+       0xcc, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xff, 0xff,
+       0xff, 0xee, 0xff, 0xff, 0xff, 0xee, 0xee, 0xff,
+       0xff, 0xff, 0xdd, 0xff, 0xff, 0xff, 0xdd, 0xdd,
+       0xff, 0xff, 0xff, 0xbb, 0xff, 0xff, 0xff, 0xbb,
+       0xbb, 0xff, 0xff, 0xff, 0x77, 0xff, 0xff, 0xff,
+       0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee, 0xff,
+       0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00,
+       0x00, 0xff, 0xff, 0xcc, 0xcc, 0xcc, 0x33, 0xcc,
+       0xcc, 0xcc, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xff,
+       0xff, 0xff, 0xee, 0xff, 0xff, 0xff, 0xee, 0xee,
+       0xff, 0xff, 0xff, 0xdd, 0xff, 0xff, 0xff, 0xdd,
+       0xdd, 0xff, 0xff, 0xff, 0xbb, 0xff, 0xff, 0xff,
+       0xbb, 0xbb, 0xff, 0xff, 0xff, 0x77, 0xff, 0xff,
+       0xff, 0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee,
+       0xff, 0x00, 0x00, 0xff, 0xff, 0x00, 0xff, 0x00,
+       0x00, 0xff, 0x00, 0xff, 0x55, 0xaa, 0x55, 0xaa,
+       0xcc, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xff, 0xff,
+       0xff, 0xee, 0xff, 0xff, 0xff, 0xee, 0xee, 0xff,
+       0xff, 0xff, 0xdd, 0xff, 0xff, 0xff, 0xdd, 0xdd,
+       0xff, 0xff, 0xff, 0xbb, 0xff, 0xff, 0xff, 0xbb,
+       0xbb, 0xff, 0xff, 0xff, 0x77, 0xff, 0xff, 0xff,
+       0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee, 0xff,
+       0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00,
+       0x00, 0xff, 0x00, 0xff, 0x00, 0xff, 0x00, 0xff,
+       0x01, 0xfe, 0x01, 0xfe, 0xcc, 0xcc, 0xcc, 0xff,
+       0xff, 0xff, 0xee, 0xff, 0xff, 0xff, 0xee, 0xee,
+       0xff, 0xff, 0xff, 0xdd, 0xff, 0xff, 0xff, 0xdd,
+       0xdd, 0xff, 0xff, 0xff, 0xbb, 0xff, 0xff, 0xff,
+       0xbb, 0xbb, 0xff, 0xff, 0xff, 0x77, 0xff, 0xff,
+       0xff, 0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee,
+
+};
+
+/**
+ * Perform tuning in HS400 mode
+ *
+ * @param[in]  mmc     mmc data structure
+ *
+ * @ret                0 for success, otherwise error
+ */
+static int octeontx_tune_hs400(struct mmc *mmc)
+{
+       struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+       struct mmc_cmd cmd;
+       struct mmc_data data;
+       union mio_emm_timing emm_timing;
+       u8 buffer[mmc->read_bl_len];
+       int tap_adj;
+       int err = -1;
+       int tap;
+       int run = 0;
+       int start_run = -1;
+       int best_run = 0;
+       int best_start = -1;
+       bool prev_ok = false;
+       char env_name[64];
+       char how[MAX_NO_OF_TAPS + 1] = "";
+
+       if (slot->hs400_tuning_block == -1)
+               return 0;
+
+       /* The eMMC standard disables all tuning support when operating in
+        * DDR modes like HS400.  The problem with this is that there are
+        * many cases where the HS200 tuning does not work for HS400 mode.
+        * In order to perform this tuning, while in HS200 a block is written
+        * to a block specified in the device tree (marvell,hs400-tuning-block)
+        * which is used for tuning in this function by repeatedly reading
+        * this block and comparing the data and return code.  This function
+        * chooses the data input tap in the middle of the longest run of
+        * successful read operations.
+        */
+
+       emm_timing = slot->hs200_taps;
+       debug("%s(%s): Start ci: %d, co: %d, di: %d, do: %d\n",
+             __func__, mmc->dev->name, emm_timing.s.cmd_in_tap,
+             emm_timing.s.cmd_out_tap, emm_timing.s.data_in_tap,
+             emm_timing.s.data_out_tap);
+       memset(buffer, 0xdb, sizeof(buffer));
+
+       snprintf(env_name, sizeof(env_name), "emmc%d_data_in_tap_hs400",
+                slot->bus_id);
+       tap = env_get_ulong(env_name, 10, -1L);
+       if (tap >= 0 && tap < MAX_NO_OF_TAPS) {
+               printf("Overriding data input tap for HS400 mode to %d\n", tap);
+               emm_timing.s.data_in_tap = tap;
+               octeontx_mmc_set_emm_timing(mmc, emm_timing);
+               return 0;
+       }
+
+       for (tap = 0; tap <= MAX_NO_OF_TAPS; tap++, prev_ok = !err) {
+               if (tap < MAX_NO_OF_TAPS) {
+                       debug("%s: Testing data in tap %d\n", __func__, tap);
+                       emm_timing.s.data_in_tap = tap;
+                       octeontx_mmc_set_emm_timing(mmc, emm_timing);
+
+                       cmd.cmdidx = MMC_CMD_READ_SINGLE_BLOCK;
+                       cmd.cmdarg = slot->hs400_tuning_block;
+                       cmd.resp_type = MMC_RSP_R1;
+                       data.dest = (void *)buffer;
+                       data.blocks = 1;
+                       data.blocksize = mmc->read_bl_len;
+                       data.flags = MMC_DATA_READ;
+                       err = !octeontx_mmc_read_blocks(mmc, &cmd, &data,
+                                                       false);
+                       if (err || memcmp(buffer, octeontx_hs400_tuning_block,
+                                         sizeof(buffer))) {
+#ifdef DEBUG
+                               if (!err) {
+                                       debug("%s: data mismatch.  Read:\n",
+                                             __func__);
+                                       print_buffer(0, buffer, 1,
+                                                    sizeof(buffer), 0);
+                                       debug("\nExpected:\n");
+                                       print_buffer(0,
+                                           octeontx_hs400_tuning_block, 1,
+                                           sizeof(octeontx_hs400_tuning_block),
+                                           0);
+                               } else {
+                                       debug("%s: Error %d reading block\n",
+                                             __func__, err);
+                               }
+#endif
+                               err = -EINVAL;
+                       } else {
+                               debug("%s: tap %d good\n", __func__, tap);
+                       }
+                       how[tap] = "-+"[!err];
+               } else {
+                       err = -EINVAL;
+               }
+
+               if (!err) {
+                       if (!prev_ok)
+                               start_run = tap;
+               } else if (prev_ok) {
+                       run = tap - 1 - start_run;
+                       if (start_run >= 0 && run > best_run) {
+                               best_start = start_run;
+                               best_run = run;
+                       }
+               }
+       }
+
+       how[tap - 1] = '\0';
+       if (best_start < 0) {
+               printf("%s(%s): %lldMHz tuning failed for HS400\n",
+                      __func__, mmc->dev->name, slot->clock / 1000000);
+               return -EINVAL;
+       }
+       tap = best_start + best_run / 2;
+
+       snprintf(env_name, sizeof(env_name), "emmc%d_data_in_tap_adj_hs400",
+                slot->bus_id);
+       tap_adj = env_get_ulong(env_name, 10, slot->hs400_tap_adj);
+       /*
+        * Keep it in range and if out of range force it back in with a small
+        * buffer.
+        */
+       if (best_run > 3) {
+               tap = tap + tap_adj;
+               if (tap >= best_start + best_run)
+                       tap = best_start + best_run - 2;
+               if (tap <= best_start)
+                       tap = best_start + 2;
+       }
+       how[tap] = '@';
+       debug("Tuning: %s\n", how);
+       debug("%s(%s): HS400 tap: best run start: %d, length: %d, tap: %d\n",
+             __func__, mmc->dev->name, best_start, best_run, tap);
+       slot->hs400_taps = slot->hs200_taps;
+       slot->hs400_taps.s.data_in_tap = tap;
+       slot->hs400_tuned = true;
+       if (env_get_yesno("emmc_export_hs400_taps") > 0) {
+               debug("%s(%s): Exporting HS400 taps\n",
+                     __func__, mmc->dev->name);
+               env_set_ulong("emmc_timing_tap", slot->host->timing_taps);
+               snprintf(env_name, sizeof(env_name),
+                        "emmc%d_hs400_data_in_tap_debug",
+                        slot->bus_id);
+               env_set(env_name, how);
+               snprintf(env_name, sizeof(env_name),
+                        "emmc%d_hs400_data_in_tap_val",
+                        slot->bus_id);
+               env_set_ulong(env_name, tap);
+               snprintf(env_name, sizeof(env_name),
+                        "emmc%d_hs400_data_in_tap_start",
+                        slot->bus_id);
+               env_set_ulong(env_name, best_start);
+               snprintf(env_name, sizeof(env_name),
+                        "emmc%d_hs400_data_in_tap_end",
+                        slot->bus_id);
+               env_set_ulong(env_name, best_start + best_run);
+               snprintf(env_name, sizeof(env_name),
+                        "emmc%d_hs400_cmd_in_tap",
+                        slot->bus_id);
+               env_set_ulong(env_name, slot->hs400_taps.s.cmd_in_tap);
+               snprintf(env_name, sizeof(env_name),
+                        "emmc%d_hs400_cmd_out_tap",
+                        slot->bus_id);
+               env_set_ulong(env_name, slot->hs400_taps.s.cmd_out_tap);
+               snprintf(env_name, sizeof(env_name),
+                        "emmc%d_hs400_cmd_out_delay",
+                        slot->bus_id);
+               env_set_ulong(env_name, slot->cmd_out_hs400_delay);
+               snprintf(env_name, sizeof(env_name),
+                        "emmc%d_hs400_data_out_tap",
+                        slot->bus_id);
+               env_set_ulong(env_name, slot->hs400_taps.s.data_out_tap);
+               snprintf(env_name, sizeof(env_name),
+                        "emmc%d_hs400_data_out_delay",
+                        slot->bus_id);
+               env_set_ulong(env_name, slot->data_out_hs400_delay);
+       } else {
+               debug("%s(%s): HS400 environment export disabled\n",
+                     __func__, mmc->dev->name);
+       }
+       octeontx_mmc_set_timing(mmc);
+
+       return 0;
+}
+
+struct adj {
+       const char *name;
+       u8 mask_shift;
+       int (*test)(struct mmc *mmc, u32 opcode, int *error);
+       u32 opcode;
+       bool ddr_only;
+       bool hs200_only;
+       bool not_hs200_only;
+       u8 num_runs;
+};
+
+struct adj adj[] = {
+       { "CMD_IN", 48, octeontx_mmc_test_cmd, MMC_CMD_SEND_STATUS,
+         false, false, false, 2, },
+/*     { "CMD_OUT", 32, octeontx_mmc_test_cmd, MMC_CMD_SEND_STATUS, },*/
+       { "DATA_IN(HS200)", 16, mmc_send_tuning,
+               MMC_CMD_SEND_TUNING_BLOCK_HS200, false, true, false, 2, },
+       { "DATA_IN", 16, octeontx_mmc_test_get_ext_csd, 0, false, false,
+         true, 2, },
+/*     { "DATA_OUT", 0, octeontx_mmc_test_cmd, 0, true, false},*/
+       { NULL, },
+};
+
+/**
+ * Perform tuning tests to find optimal timing
+ *
+ * @param      mmc     mmc device
+ * @param      adj     parameter to tune
+ * @param      opcode  command opcode to use
+ *
+ * @return     0 for success, -1 if tuning failed
+ */
+static int octeontx_mmc_adjust_tuning(struct mmc *mmc, struct adj *adj,
+                                     u32 opcode)
+{
+       struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+       union mio_emm_timing timing;
+       union mio_emm_debug emm_debug;
+       int tap;
+       int err = -1;
+       int run = 0;
+       int count;
+       int start_run = -1;
+       int best_run = 0;
+       int best_start = -1;
+       bool prev_ok = false;
+       u64 tap_status = 0;
+       const int tap_adj = slot->hs200_tap_adj;
+       char how[MAX_NO_OF_TAPS + 1] = "";
+       bool is_hs200 = mmc->selected_mode == MMC_HS_200;
+
+       debug("%s(%s, %s, %d), hs200: %d\n", __func__, mmc->dev->name,
+             adj->name, opcode, is_hs200);
+       octeontx_mmc_set_emm_timing(mmc,
+                                   is_hs200 ? slot->hs200_taps : slot->taps);
+
+#ifdef DEBUG
+       if (opcode == MMC_CMD_SEND_TUNING_BLOCK_HS200) {
+               printf("%s(%s): Before tuning %s, opcode: %d\n",
+                      __func__, mmc->dev->name, adj->name, opcode);
+               octeontx_mmc_print_registers2(mmc, NULL);
+       }
+#endif
+
+       /*
+        * The algorithm to find the optimal timing is to start
+        * at the end and work backwards and select the second
+        * value that passes.  Each test is repeated twice.
+        */
+       for (tap = 0; tap <= MAX_NO_OF_TAPS; tap++, prev_ok = !err) {
+               if (tap < MAX_NO_OF_TAPS) {
+                       if (slot->host->tap_requires_noclk) {
+                               /* Turn off the clock */
+                               emm_debug.u = read_csr(mmc, MIO_EMM_DEBUG());
+                               emm_debug.s.emmc_clk_disable = 1;
+                               write_csr(mmc, MIO_EMM_DEBUG(), emm_debug.u);
+                               udelay(1);
+                               emm_debug.s.rdsync_rst = 1;
+                               write_csr(mmc, MIO_EMM_DEBUG(), emm_debug.u);
+                               udelay(1);
+                       }
+
+                       timing.u = read_csr(mmc, MIO_EMM_TIMING());
+                       timing.u &= ~(0x3full << adj->mask_shift);
+                       timing.u |= (u64)tap << adj->mask_shift;
+                       write_csr(mmc, MIO_EMM_TIMING(), timing.u);
+                       debug("%s(%s): Testing ci: %d, co: %d, di: %d, do: %d\n",
+                             __func__, mmc->dev->name, timing.s.cmd_in_tap,
+                             timing.s.cmd_out_tap, timing.s.data_in_tap,
+                             timing.s.data_out_tap);
+
+                       if (slot->host->tap_requires_noclk) {
+                               /* Turn off the clock */
+                               emm_debug.s.rdsync_rst = 0;
+                               write_csr(mmc, MIO_EMM_DEBUG(), emm_debug.u);
+                               udelay(1);
+                               emm_debug.u = read_csr(mmc, MIO_EMM_DEBUG());
+                               emm_debug.s.emmc_clk_disable = 0;
+                               write_csr(mmc, MIO_EMM_DEBUG(), emm_debug.u);
+                               udelay(1);
+                       }
+                       for (count = 0; count < 2; count++) {
+                               err = adj->test(mmc, opcode, NULL);
+                               if (err) {
+                                       debug("%s(%s, %s): tap %d failed, count: %d, rsp_sts: 0x%llx, rsp_lo: 0x%llx\n",
+                                             __func__, mmc->dev->name,
+                                             adj->name, tap, count,
+                                             read_csr(mmc,
+                                                      MIO_EMM_RSP_STS()),
+                                             read_csr(mmc,
+                                                      MIO_EMM_RSP_LO()));
+                                       debug("%s(%s, %s): tap: %d, do: %d, di: %d, co: %d, ci: %d\n",
+                                             __func__, mmc->dev->name,
+                                             adj->name, tap,
+                                             timing.s.data_out_tap,
+                                             timing.s.data_in_tap,
+                                             timing.s.cmd_out_tap,
+                                             timing.s.cmd_in_tap);
+                                       break;
+                               }
+                               debug("%s(%s, %s): tap %d passed, count: %d, rsp_sts: 0x%llx, rsp_lo: 0x%llx\n",
+                                     __func__, mmc->dev->name, adj->name, tap,
+                                     count,
+                                     read_csr(mmc, MIO_EMM_RSP_STS()),
+                                     read_csr(mmc, MIO_EMM_RSP_LO()));
+                       }
+                       tap_status |= (u64)(!err) << tap;
+                       how[tap] = "-+"[!err];
+               } else {
+                       /*
+                        * Putting the end+1 case in the loop simplifies
+                        * logic, allowing 'prev_ok' to process a sweet
+                        * spot in tuning which extends to the wall.
+                        */
+                       err = -EINVAL;
+               }
+               if (!err) {
+                       /*
+                        * If no CRC/etc errors in the response, but previous
+                        * failed, note the start of a new run.
+                        */
+                       debug("  prev_ok: %d\n", prev_ok);
+                       if (!prev_ok)
+                               start_run = tap;
+               } else if (prev_ok) {
+                       run = tap - 1 - start_run;
+                       /* did we just exit a wider sweet spot? */
+                       if (start_run >= 0 && run > best_run) {
+                               best_start = start_run;
+                               best_run = run;
+                       }
+               }
+       }
+       how[tap - 1] = '\0';
+       if (best_start < 0) {
+               printf("%s(%s, %s): %lldMHz tuning %s failed\n", __func__,
+                      mmc->dev->name, adj->name, slot->clock / 1000000,
+                      adj->name);
+               return -EINVAL;
+       }
+
+       tap = best_start + best_run / 2;
+       debug("  tap %d is center, start: %d, run: %d\n", tap,
+             best_start, best_run);
+       if (is_hs200) {
+               slot->hs200_taps.u &= ~(0x3full << adj->mask_shift);
+               slot->hs200_taps.u |= (u64)tap << adj->mask_shift;
+       } else {
+               slot->taps.u &= ~(0x3full << adj->mask_shift);
+               slot->taps.u |= (u64)tap << adj->mask_shift;
+       }
+       if (best_start < 0) {
+               printf("%s(%s, %s): %lldMHz tuning %s failed\n", __func__,
+                      mmc->dev->name, adj->name, slot->clock / 1000000,
+                      adj->name);
+               return -EINVAL;
+       }
+
+       tap = best_start + best_run / 2;
+       if (is_hs200 && (tap + tap_adj >= 0) && (tap + tap_adj < 64) &&
+           tap_status & (1ULL << (tap + tap_adj))) {
+               debug("Adjusting tap from %d by %d to %d\n",
+                     tap, tap_adj, tap + tap_adj);
+               tap += tap_adj;
+       }
+       how[tap] = '@';
+       debug("%s/%s %d/%d/%d %s\n", mmc->dev->name,
+             adj->name, best_start, tap, best_start + best_run, how);
+
+       if (is_hs200) {
+               slot->hs200_taps.u &= ~(0x3full << adj->mask_shift);
+               slot->hs200_taps.u |= (u64)tap << adj->mask_shift;
+       } else {
+               slot->taps.u &= ~(0x3full << adj->mask_shift);
+               slot->taps.u |= (u64)tap << adj->mask_shift;
+       }
+
+#ifdef DEBUG
+       if (opcode == MMC_CMD_SEND_TUNING_BLOCK_HS200) {
+               debug("%s(%s, %s): After successful tuning\n",
+                     __func__, mmc->dev->name, adj->name);
+               debug("%s(%s, %s): tap: %d, new do: %d, di: %d, co: %d, ci: %d\n",
+                     __func__, mmc->dev->name, adj->name, tap,
+                     slot->taps.s.data_out_tap,
+                     slot->taps.s.data_in_tap,
+                     slot->taps.s.cmd_out_tap,
+                     slot->taps.s.cmd_in_tap);
+               debug("%s(%s, %s): tap: %d, new do HS200: %d, di: %d, co: %d, ci: %d\n",
+                     __func__, mmc->dev->name, adj->name, tap,
+                     slot->hs200_taps.s.data_out_tap,
+                     slot->hs200_taps.s.data_in_tap,
+                     slot->hs200_taps.s.cmd_out_tap,
+                     slot->hs200_taps.s.cmd_in_tap);
+       }
+#endif
+       octeontx_mmc_set_timing(mmc);
+
+       if (is_hs200 && env_get_yesno("emmc_export_hs200_taps")) {
+               char env_name[64];
+
+               env_set_ulong("emmc_timing_tap", slot->host->timing_taps);
+               switch (opcode) {
+               case MMC_CMD_SEND_TUNING_BLOCK:
+                       snprintf(env_name, sizeof(env_name),
+                                "emmc%d_hs200_data_in_tap_debug",
+                                slot->bus_id);
+                       env_set(env_name, how);
+                       snprintf(env_name, sizeof(env_name),
+                                "emmc%d_hs200_data_in_tap_val", slot->bus_id);
+                       env_set_ulong(env_name, tap);
+                       snprintf(env_name, sizeof(env_name),
+                                "emmc%d_hs200_data_in_tap_start",
+                                slot->bus_id);
+                       env_set_ulong(env_name, best_start);
+                       snprintf(env_name, sizeof(env_name),
+                                "emmc%d_hs200_data_in_tap_end",
+                                slot->bus_id);
+                       env_set_ulong(env_name, best_start + best_run);
+                       break;
+               case MMC_CMD_SEND_STATUS:
+                       snprintf(env_name, sizeof(env_name),
+                                "emmc%d_hs200_cmd_in_tap_debug",
+                                slot->bus_id);
+                       env_set(env_name, how);
+                       snprintf(env_name, sizeof(env_name),
+                                "emmc%d_hs200_cmd_in_tap_val", slot->bus_id);
+                       env_set_ulong(env_name, tap);
+                       snprintf(env_name, sizeof(env_name),
+                                "emmc%d_hs200_cmd_in_tap_start",
+                                slot->bus_id);
+                       env_set_ulong(env_name, best_start);
+                       snprintf(env_name, sizeof(env_name),
+                                "emmc%d_hs200_cmd_in_tap_end",
+                                slot->bus_id);
+                       env_set_ulong(env_name, best_start + best_run);
+                       break;
+               default:
+                       snprintf(env_name, sizeof(env_name),
+                                "emmc%d_hs200_data_out_tap", slot->bus_id);
+                       env_set_ulong(env_name, slot->data_out_hs200_delay);
+                       snprintf(env_name, sizeof(env_name),
+                                "emmc%d_hs200_cmd_out_tap", slot->bus_id);
+                       env_set_ulong(env_name, slot->cmd_out_hs200_delay);
+                       break;
+               }
+       }
+
+       return 0;
+}
+
+static int octeontx_mmc_execute_tuning(struct udevice *dev, u32 opcode)
+{
+       struct mmc *mmc = dev_to_mmc(dev);
+       struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+       union mio_emm_timing emm_timing;
+       int err;
+       struct adj *a;
+       bool is_hs200;
+       char env_name[64];
+
+       pr_info("%s re-tuning, opcode 0x%x\n", dev->name, opcode);
+
+       if (slot->is_asim || slot->is_emul)
+               return 0;
+
+       is_hs200 = (mmc->selected_mode == MMC_HS_200);
+       if (is_hs200) {
+               slot->hs200_tuned = false;
+               slot->hs400_tuned = false;
+       } else {
+               slot->tuned = false;
+       }
+       octeontx_mmc_set_output_bus_timing(mmc);
+       octeontx_mmc_set_input_bus_timing(mmc);
+       emm_timing.u = read_csr(mmc, MIO_EMM_TIMING());
+       if (mmc->selected_mode == MMC_HS_200) {
+               slot->hs200_taps.s.cmd_out_tap = emm_timing.s.cmd_out_tap;
+               slot->hs200_taps.s.data_out_tap = emm_timing.s.data_out_tap;
+       } else {
+               slot->taps.s.cmd_out_tap = emm_timing.s.cmd_out_tap;
+               slot->taps.s.data_out_tap = emm_timing.s.data_out_tap;
+       }
+       octeontx_mmc_set_input_bus_timing(mmc);
+       octeontx_mmc_set_output_bus_timing(mmc);
+
+       for (a = adj; a->name; a++) {
+               ulong in_tap;
+
+               if (!strcmp(a->name, "CMD_IN")) {
+                       snprintf(env_name, sizeof(env_name),
+                                "emmc%d_cmd_in_tap", slot->bus_id);
+                       in_tap = env_get_ulong(env_name, 10, (ulong)-1);
+                       if (in_tap != (ulong)-1) {
+                               if (mmc->selected_mode == MMC_HS_200 ||
+                                   a->hs200_only) {
+                                       slot->hs200_taps.s.cmd_in_tap = in_tap;
+                                       slot->hs400_taps.s.cmd_in_tap = in_tap;
+                               } else {
+                                       slot->taps.s.cmd_in_tap = in_tap;
+                               }
+                               continue;
+                       }
+               } else if (a->hs200_only &&
+                          !strcmp(a->name, "DATA_IN(HS200)")) {
+                       snprintf(env_name, sizeof(env_name),
+                                "emmc%d_data_in_tap_hs200", slot->bus_id);
+                       in_tap = env_get_ulong(env_name, 10, (ulong)-1);
+                       if (in_tap != (ulong)-1) {
+                               debug("%s(%s): Overriding HS200 data in tap to %d\n",
+                                     __func__, dev->name, (int)in_tap);
+                               slot->hs200_taps.s.data_in_tap = in_tap;
+                               continue;
+                       }
+               } else if (!a->hs200_only && !strcmp(a->name, "DATA_IN")) {
+                       snprintf(env_name, sizeof(env_name),
+                                "emmc%d_data_in_tap", slot->bus_id);
+                       in_tap = env_get_ulong(env_name, 10, (ulong)-1);
+                       if (in_tap != (ulong)-1) {
+                               debug("%s(%s): Overriding non-HS200 data in tap to %d\n",
+                                     __func__, dev->name, (int)in_tap);
+                               slot->taps.s.data_in_tap = in_tap;
+                               continue;
+                       }
+               }
+
+               debug("%s(%s): Testing: %s, mode: %s, opcode: %u\n", __func__,
+                     dev->name, a->name, mmc_mode_name(mmc->selected_mode),
+                     opcode);
+
+               /* Skip DDR only test when not in DDR mode */
+               if (a->ddr_only && !mmc->ddr_mode) {
+                       debug("%s(%s): Skipping %s due to non-DDR mode\n",
+                             __func__, dev->name, a->name);
+                       continue;
+               }
+               /* Skip hs200 tests in non-hs200 mode and
+                * non-hs200 tests in hs200 mode
+                */
+               if (is_hs200) {
+                       if (a->not_hs200_only) {
+                               debug("%s(%s): Skipping %s\n", __func__,
+                                     dev->name, a->name);
+                               continue;
+                       }
+               } else {
+                       if (a->hs200_only) {
+                               debug("%s(%s): Skipping %s\n", __func__,
+                                     dev->name, a->name);
+                               continue;
+                       }
+               }
+
+               err = octeontx_mmc_adjust_tuning(mmc, a, a->opcode ?
+                                                a->opcode : opcode);
+               if (err) {
+                       pr_err("%s(%s, %u): tuning %s failed\n", __func__,
+                              dev->name, opcode, a->name);
+                       return err;
+               }
+       }
+
+       octeontx_mmc_set_timing(mmc);
+       if (is_hs200)
+               slot->hs200_tuned = true;
+       else
+               slot->tuned = true;
+
+       if (slot->hs400_tuning_block != -1) {
+               struct mmc_cmd cmd;
+               struct mmc_data data;
+               u8 buffer[mmc->read_bl_len];
+
+               cmd.cmdidx = MMC_CMD_READ_SINGLE_BLOCK;
+               cmd.cmdarg = slot->hs400_tuning_block;
+               cmd.resp_type = MMC_RSP_R1;
+               data.dest = (void *)buffer;
+               data.blocks = 1;
+               data.blocksize = mmc->read_bl_len;
+               data.flags = MMC_DATA_READ;
+               err = octeontx_mmc_read_blocks(mmc, &cmd, &data, true) != 1;
+
+               if (err) {
+                       printf("%s: Cannot read HS400 tuning block %u\n",
+                              dev->name, slot->hs400_tuning_block);
+                       return err;
+               }
+               if (memcmp(buffer, octeontx_hs400_tuning_block,
+                          sizeof(buffer))) {
+                       debug("%s(%s): Writing new HS400 tuning block to block %d\n",
+                             __func__, dev->name, slot->hs400_tuning_block);
+                       cmd.cmdidx = MMC_CMD_WRITE_SINGLE_BLOCK;
+                       data.src = (void *)octeontx_hs400_tuning_block;
+                       data.flags = MMC_DATA_WRITE;
+                       err = !octeontx_mmc_write_blocks(mmc, &cmd, &data);
+                       if (err) {
+                               printf("%s: Cannot write HS400 tuning block %u\n",
+                                      dev->name, slot->hs400_tuning_block);
+                               return -EINVAL;
+                       }
+               }
+       }
+
+       return 0;
+}
+#else /* MMC_SUPPORTS_TUNING */
+static void octeontx_mmc_set_emm_timing(struct mmc *mmc,
+                                       union mio_emm_timing emm_timing)
+{
+}
+#endif /* MMC_SUPPORTS_TUNING */
+
+/**
+ * Calculate the clock period with rounding up
+ *
+ * @param      mmc     mmc device
+ * @return     clock period in system clocks for clk_lo + clk_hi
+ */
+static u32 octeontx_mmc_calc_clk_period(struct mmc *mmc)
+{
+       struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+       struct octeontx_mmc_host *host = slot->host;
+
+       return DIV_ROUND_UP(host->sys_freq, mmc->clock);
+}
+
+static int octeontx_mmc_set_ios(struct udevice *dev)
+{
+       struct octeontx_mmc_slot *slot = dev_to_mmc_slot(dev);
+       struct mmc *mmc = &slot->mmc;
+       struct octeontx_mmc_host *host = slot->host;
+       union mio_emm_switch emm_switch;
+       union mio_emm_modex mode;
+       uint clock;
+       int bus_width = 0;
+       int clk_period = 0;
+       int power_class = 10;
+       int err = 0;
+       bool is_hs200 = false;
+       bool is_hs400 = false;
+
+       debug("%s(%s): Entry\n", __func__, dev->name);
+       debug("  clock: %u, bus width: %u, mode: %u\n", mmc->clock,
+             mmc->bus_width, mmc->selected_mode);
+       debug("  host caps: 0x%x, card caps: 0x%x\n", mmc->host_caps,
+             mmc->card_caps);
+       octeontx_mmc_switch_to(mmc);
+
+       clock = mmc->clock;
+       if (!clock)
+               clock = mmc->cfg->f_min;
+
+       switch (mmc->bus_width) {
+       case 8:
+               bus_width = 2;
+               break;
+       case 4:
+               bus_width = 1;
+               break;
+       case 1:
+               bus_width = 0;
+               break;
+       default:
+               pr_warn("%s(%s): Invalid bus width %d, defaulting to 1\n",
+                       __func__, dev->name, mmc->bus_width);
+               bus_width = 0;
+       }
+
+       /* DDR is available for 4/8 bit bus width */
+       if (mmc->ddr_mode && bus_width)
+               bus_width |= 4;
+
+       debug("%s: sys_freq: %llu\n", __func__, host->sys_freq);
+       clk_period = octeontx_mmc_calc_clk_period(mmc);
+
+       emm_switch.u = 0;
+       emm_switch.s.bus_width = bus_width;
+       emm_switch.s.power_class = power_class;
+       emm_switch.s.clk_hi = clk_period / 2;
+       emm_switch.s.clk_lo = clk_period / 2;
+
+       debug("%s: last mode: %d, mode: %d, last clock: %u, clock: %u, ddr: %d\n",
+             __func__, slot->last_mode, mmc->selected_mode,
+             slot->last_clock, mmc->clock, mmc->ddr_mode);
+       switch (mmc->selected_mode) {
+       case MMC_LEGACY:
+               break;
+       case MMC_HS:
+       case SD_HS:
+       case MMC_HS_52:
+               emm_switch.s.hs_timing = 1;
+               break;
+       case MMC_HS_200:
+               is_hs200 = true;
+               fallthrough;
+       case UHS_SDR12:
+       case UHS_SDR25:
+       case UHS_SDR50:
+       case UHS_SDR104:
+               emm_switch.s.hs200_timing = 1;
+               break;
+       case MMC_HS_400:
+               is_hs400 = true;
+               fallthrough;
+       case UHS_DDR50:
+       case MMC_DDR_52:
+               emm_switch.s.hs400_timing = 1;
+               break;
+       default:
+               pr_err("%s(%s): Unsupported mode 0x%x\n", __func__, dev->name,
+                      mmc->selected_mode);
+               return -1;
+       }
+       emm_switch.s.bus_id = slot->bus_id;
+
+       if (!is_hs200 && !is_hs400 &&
+           (mmc->selected_mode != slot->last_mode ||
+            mmc->clock != slot->last_clock) &&
+           !mmc->ddr_mode) {
+               slot->tuned = false;
+               slot->last_mode = mmc->selected_mode;
+               slot->last_clock = mmc->clock;
+       }
+
+       if (CONFIG_IS_ENABLED(MMC_VERBOSE)) {
+               debug("%s(%s): Setting bus mode to %s\n", __func__, dev->name,
+                     mmc_mode_name(mmc->selected_mode));
+       } else {
+               debug("%s(%s): Setting bus mode to 0x%x\n", __func__, dev->name,
+                     mmc->selected_mode);
+       }
+
+       debug(" Trying switch 0x%llx w%d hs:%d hs200:%d hs400:%d\n",
+             emm_switch.u, emm_switch.s.bus_width, emm_switch.s.hs_timing,
+             emm_switch.s.hs200_timing, emm_switch.s.hs400_timing);
+
+       set_wdog(mmc, 1000);
+       do_switch(mmc, emm_switch);
+       mdelay(100);
+       mode.u = read_csr(mmc, MIO_EMM_MODEX(slot->bus_id));
+       debug("%s(%s): mode: 0x%llx w:%d, hs:%d, hs200:%d, hs400:%d\n",
+             __func__, dev->name, mode.u, mode.s.bus_width,
+             mode.s.hs_timing, mode.s.hs200_timing, mode.s.hs400_timing);
+
+       err = octeontx_mmc_configure_delay(mmc);
+
+#ifdef MMC_SUPPORTS_TUNING
+       if (!err && mmc->selected_mode == MMC_HS_400 && !slot->hs400_tuned) {
+               debug("%s: Tuning HS400 mode\n", __func__);
+               err = octeontx_tune_hs400(mmc);
+       }
+#endif
+
+       return err;
+}
+
+/**
+ * Gets the status of the card detect pin
+ */
+static int octeontx_mmc_get_cd(struct udevice *dev)
+{
+       struct octeontx_mmc_slot *slot = dev_to_mmc_slot(dev);
+       int val = 1;
+
+       if (dm_gpio_is_valid(&slot->cd_gpio)) {
+               val = dm_gpio_get_value(&slot->cd_gpio);
+               val ^= slot->cd_inverted;
+       }
+       debug("%s(%s): cd: %d\n", __func__, dev->name, val);
+       return val;
+}
+
+/**
+ * Gets the status of the write protect pin
+ */
+static int octeontx_mmc_get_wp(struct udevice *dev)
+{
+       struct octeontx_mmc_slot *slot = dev_to_mmc_slot(dev);
+       int val = 0;
+
+       if (dm_gpio_is_valid(&slot->wp_gpio)) {
+               val = dm_gpio_get_value(&slot->wp_gpio);
+               val ^= slot->wp_inverted;
+       }
+       debug("%s(%s): wp: %d\n", __func__, dev->name, val);
+       return val;
+}
+
+static void octeontx_mmc_set_timing(struct mmc *mmc)
+{
+       union mio_emm_timing timing;
+       struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+
+       switch (mmc->selected_mode) {
+       case MMC_HS_200:
+               timing = slot->hs200_taps;
+               break;
+       case MMC_HS_400:
+               timing = slot->hs400_tuned ?
+                               slot->hs400_taps : slot->hs200_taps;
+               break;
+       default:
+               timing = slot->taps;
+               break;
+       }
+
+       debug("%s(%s):\n  cmd_in_tap: %u\n  cmd_out_tap: %u\n  data_in_tap: %u\n  data_out_tap: %u\n",
+             __func__, mmc->dev->name, timing.s.cmd_in_tap,
+             timing.s.cmd_out_tap, timing.s.data_in_tap,
+             timing.s.data_out_tap);
+
+       octeontx_mmc_set_emm_timing(mmc, timing);
+}
+
+static int octeontx_mmc_configure_delay(struct mmc *mmc)
+{
+       struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+       struct octeontx_mmc_host *host __maybe_unused = slot->host;
+       bool __maybe_unused is_hs200;
+       bool __maybe_unused is_hs400;
+
+       debug("%s(%s)\n", __func__, mmc->dev->name);
+
+       if (IS_ENABLED(CONFIG_ARCH_OCTEONTX)) {
+               union mio_emm_sample emm_sample;
+
+               emm_sample.u = 0;
+               emm_sample.s.cmd_cnt = slot->cmd_cnt;
+               emm_sample.s.dat_cnt = slot->dat_cnt;
+               write_csr(mmc, MIO_EMM_SAMPLE(), emm_sample.u);
+       } else {
+               is_hs200 = (mmc->selected_mode == MMC_HS_200);
+               is_hs400 = (mmc->selected_mode == MMC_HS_400);
+
+               if ((is_hs200 && slot->hs200_tuned) ||
+                   (is_hs400 && slot->hs400_tuned) ||
+                   (!is_hs200 && !is_hs400 && slot->tuned)) {
+                       octeontx_mmc_set_output_bus_timing(mmc);
+               } else {
+                       int half = MAX_NO_OF_TAPS / 2;
+                       int dout, cout;
+
+                       switch (mmc->selected_mode) {
+                       case MMC_LEGACY:
+                               if (IS_SD(mmc)) {
+                                       cout = MMC_SD_LEGACY_DEFAULT_CMD_OUT_TAP;
+                                       dout = MMC_SD_LEGACY_DEFAULT_DATA_OUT_TAP;
+                               } else {
+                                       cout = MMC_LEGACY_DEFAULT_CMD_OUT_TAP;
+                                       dout = MMC_LEGACY_DEFAULT_DATA_OUT_TAP;
+                               }
+                               break;
+                       case MMC_HS:
+                               cout = MMC_HS_CMD_OUT_TAP;
+                               dout = MMC_HS_DATA_OUT_TAP;
+                               break;
+                       case SD_HS:
+                       case UHS_SDR12:
+                       case UHS_SDR25:
+                       case UHS_SDR50:
+                               cout = MMC_SD_HS_CMD_OUT_TAP;
+                               dout = MMC_SD_HS_DATA_OUT_TAP;
+                               break;
+                       case UHS_SDR104:
+                       case UHS_DDR50:
+                       case MMC_HS_52:
+                       case MMC_DDR_52:
+                               cout = MMC_DEFAULT_CMD_OUT_TAP;
+                               dout = MMC_DEFAULT_DATA_OUT_TAP;
+                               break;
+                       case MMC_HS_200:
+                               cout = -1;
+                               dout = -1;
+                               if (host->timing_calibrated) {
+                                       cout = octeontx2_mmc_calc_delay(
+                                               mmc, slot->cmd_out_hs200_delay);
+                                       dout = octeontx2_mmc_calc_delay(
+                                               mmc,
+                                               slot->data_out_hs200_delay);
+                                       debug("%s(%s): Calibrated HS200/HS400 cmd out delay: %dps tap: %d, data out delay: %d, tap: %d\n",
+                                             __func__, mmc->dev->name,
+                                             slot->cmd_out_hs200_delay, cout,
+                                             slot->data_out_hs200_delay, dout);
+                               } else {
+                                       cout = MMC_DEFAULT_HS200_CMD_OUT_TAP;
+                                       dout = MMC_DEFAULT_HS200_DATA_OUT_TAP;
+                               }
+                               is_hs200 = true;
+                               break;
+                       case MMC_HS_400:
+                               cout = -1;
+                               dout = -1;
+                               if (host->timing_calibrated) {
+                                       if (slot->cmd_out_hs400_delay)
+                                               cout = octeontx2_mmc_calc_delay(
+                                                       mmc,
+                                                       slot->cmd_out_hs400_delay);
+                                       if (slot->data_out_hs400_delay)
+                                               dout = octeontx2_mmc_calc_delay(
+                                                       mmc,
+                                                       slot->data_out_hs400_delay);
+                                       debug("%s(%s): Calibrated HS200/HS400 cmd out delay: %dps tap: %d, data out delay: %d, tap: %d\n",
+                                             __func__, mmc->dev->name,
+                                             slot->cmd_out_hs400_delay, cout,
+                                             slot->data_out_hs400_delay, dout);
+                               } else {
+                                       cout = MMC_DEFAULT_HS400_CMD_OUT_TAP;
+                                       dout = MMC_DEFAULT_HS400_DATA_OUT_TAP;
+                               }
+                               is_hs400 = true;
+                               break;
+                       default:
+                               pr_err("%s(%s): Invalid mode %d\n", __func__,
+                                      mmc->dev->name, mmc->selected_mode);
+                               return -1;
+                       }
+                       debug("%s(%s): Not tuned, hs200: %d, hs200 tuned: %d, hs400: %d, hs400 tuned: %d, tuned: %d\n",
+                             __func__, mmc->dev->name, is_hs200,
+                             slot->hs200_tuned,
+                             is_hs400, slot->hs400_tuned, slot->tuned);
+                       /* Set some defaults */
+                       if (is_hs200) {
+                               slot->hs200_taps.u = 0;
+                               slot->hs200_taps.s.cmd_out_tap = cout;
+                               slot->hs200_taps.s.data_out_tap = dout;
+                               slot->hs200_taps.s.cmd_in_tap = half;
+                               slot->hs200_taps.s.data_in_tap = half;
+                       } else if (is_hs400) {
+                               slot->hs400_taps.u = 0;
+                               slot->hs400_taps.s.cmd_out_tap = cout;
+                               slot->hs400_taps.s.data_out_tap = dout;
+                               slot->hs400_taps.s.cmd_in_tap = half;
+                               slot->hs400_taps.s.data_in_tap = half;
+                       } else {
+                               slot->taps.u = 0;
+                               slot->taps.s.cmd_out_tap = cout;
+                               slot->taps.s.data_out_tap = dout;
+                               slot->taps.s.cmd_in_tap = half;
+                               slot->taps.s.data_in_tap = half;
+                       }
+               }
+
+               if (is_hs200)
+                       debug("%s(%s): hs200 taps: ci: %u, co: %u, di: %u, do: %u\n",
+                             __func__, mmc->dev->name,
+                             slot->hs200_taps.s.cmd_in_tap,
+                             slot->hs200_taps.s.cmd_out_tap,
+                             slot->hs200_taps.s.data_in_tap,
+                             slot->hs200_taps.s.data_out_tap);
+               else if (is_hs400)
+                       debug("%s(%s): hs400 taps: ci: %u, co: %u, di: %u, do: %u\n",
+                             __func__, mmc->dev->name,
+                             slot->hs400_taps.s.cmd_in_tap,
+                             slot->hs400_taps.s.cmd_out_tap,
+                             slot->hs400_taps.s.data_in_tap,
+                             slot->hs400_taps.s.data_out_tap);
+               else
+                       debug("%s(%s): taps: ci: %u, co: %u, di: %u, do: %u\n",
+                             __func__, mmc->dev->name, slot->taps.s.cmd_in_tap,
+                             slot->taps.s.cmd_out_tap,
+                             slot->taps.s.data_in_tap,
+                             slot->taps.s.data_out_tap);
+               octeontx_mmc_set_timing(mmc);
+               debug("%s: Done\n", __func__);
+       }
+
+       return 0;
+}
+
+/**
+ * Sets the MMC watchdog timer in microseconds
+ *
+ * @param      mmc     mmc device
+ * @param      us      timeout in microseconds, 0 for maximum timeout
+ */
+static void set_wdog(struct mmc *mmc, u64 us)
+{
+       union mio_emm_wdog wdog;
+       u64 val;
+
+       val = (us * mmc->clock) / 1000000;
+       if (val >= (1 << 26) || !us) {
+               if (us)
+                       pr_debug("%s: warning: timeout %llu exceeds max value %llu, truncating\n",
+                                __func__, us,
+                                (u64)(((1ULL << 26) - 1) * 1000000ULL) /
+                                       mmc->clock);
+               val = (1 << 26) - 1;
+       }
+       wdog.u = 0;
+       wdog.s.clk_cnt = val;
+       write_csr(mmc, MIO_EMM_WDOG(), wdog.u);
+}
+
+/**
+ * Set the IO drive strength and slew
+ *
+ * @param      mmc     mmc device
+ */
+static void octeontx_mmc_io_drive_setup(struct mmc *mmc)
+{
+       if (!IS_ENABLED(CONFIG_ARCH_OCTEONTX)) {
+               struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+               union mio_emm_io_ctl io_ctl;
+
+               if (slot->drive < 0 || slot->slew < 0)
+                       return;
+
+               io_ctl.u = 0;
+               io_ctl.s.drive = slot->drive;
+               io_ctl.s.slew = slot->slew;
+               write_csr(mmc, MIO_EMM_IO_CTL(), io_ctl.u);
+       }
+}
+
+/**
+ * Print switch errors
+ *
+ * @param      mmc     mmc device
+ */
+static void check_switch_errors(struct mmc *mmc)
+{
+       union mio_emm_switch emm_switch;
+
+       emm_switch.u = read_csr(mmc, MIO_EMM_SWITCH());
+       if (emm_switch.s.switch_err0)
+               pr_err("%s: Switch power class error\n", mmc->cfg->name);
+       if (emm_switch.s.switch_err1)
+               pr_err("%s: Switch HS timing error\n", mmc->cfg->name);
+       if (emm_switch.s.switch_err2)
+               pr_err("%s: Switch bus width error\n", mmc->cfg->name);
+}
+
+static void do_switch(struct mmc *mmc, union mio_emm_switch emm_switch)
+{
+       union mio_emm_rsp_sts rsp_sts;
+       struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+       int bus_id = emm_switch.s.bus_id;
+       ulong start;
+
+       if (emm_switch.s.bus_id != 0) {
+               emm_switch.s.bus_id = 0;
+               write_csr(mmc, MIO_EMM_SWITCH(), emm_switch.u);
+               udelay(100);
+               emm_switch.s.bus_id = bus_id;
+       }
+       debug("%s(%s, 0x%llx)\n", __func__, mmc->dev->name, emm_switch.u);
+       write_csr(mmc, MIO_EMM_SWITCH(), emm_switch.u);
+
+       start = get_timer(0);
+       do {
+               rsp_sts.u = read_csr(mmc, MIO_EMM_RSP_STS());
+               if (!rsp_sts.s.switch_val)
+                       break;
+               udelay(100);
+       } while (get_timer(start) < 10);
+       if (rsp_sts.s.switch_val) {
+               pr_warn("%s(%s): Warning: writing 0x%llx to emm_switch timed out, status: 0x%llx\n",
+                       __func__, mmc->dev->name, emm_switch.u, rsp_sts.u);
+       }
+       slot->cached_switch = emm_switch;
+       check_switch_errors(mmc);
+       slot->cached_switch.u = emm_switch.u;
+       debug("%s: emm_switch: 0x%llx, rsp_lo: 0x%llx\n",
+             __func__, read_csr(mmc, MIO_EMM_SWITCH()),
+                                read_csr(mmc, MIO_EMM_RSP_LO()));
+}
+
+/**
+ * Given a delay in ps, return the tap delay count
+ *
+ * @param      mmc     mmc data structure
+ * @param      delay   delay in picoseconds
+ *
+ * @return     Number of tap cycles or error if -1
+ */
+static int octeontx2_mmc_calc_delay(struct mmc *mmc, int delay)
+{
+       struct octeontx_mmc_host *host = mmc_to_host(mmc);
+
+       if (host->is_asim || host->is_emul)
+               return 63;
+
+       if (!host->timing_taps) {
+               pr_err("%s(%s): Error: host timing not calibrated\n",
+                      __func__, mmc->dev->name);
+               return -1;
+       }
+       debug("%s(%s, %d) timing taps: %llu\n", __func__, mmc->dev->name,
+             delay, host->timing_taps);
+       return min_t(int, DIV_ROUND_UP(delay, host->timing_taps), 63);
+}
+
+/**
+ * Calibrates the delay based on the internal clock
+ *
+ * @param      mmc     Pointer to mmc data structure
+ *
+ * @return     0 for success or -ETIMEDOUT on error
+ *
+ * NOTE: On error a default value will be calculated.
+ */
+static int octeontx_mmc_calibrate_delay(struct mmc *mmc)
+{
+       union mio_emm_calb emm_calb;
+       union mio_emm_tap emm_tap;
+       union mio_emm_cfg emm_cfg;
+       union mio_emm_io_ctl emm_io_ctl;
+       union mio_emm_switch emm_switch;
+       union mio_emm_wdog emm_wdog;
+       union mio_emm_sts_mask emm_sts_mask;
+       union mio_emm_debug emm_debug;
+       union mio_emm_timing emm_timing;
+       struct octeontx_mmc_host *host = mmc_to_host(mmc);
+       ulong start;
+       u8 bus_id, bus_ena;
+
+       debug("%s: Calibrating delay\n", __func__);
+       if (host->is_asim || host->is_emul) {
+               debug("  No calibration for ASIM\n");
+               return 0;
+       }
+       emm_tap.u = 0;
+       if (host->calibrate_glitch) {
+               emm_tap.s.delay = MMC_DEFAULT_TAP_DELAY;
+       } else {
+               /* Save registers */
+               emm_cfg.u = read_csr(mmc, MIO_EMM_CFG());
+               emm_io_ctl.u = read_csr(mmc, MIO_EMM_IO_CTL());
+               emm_switch.u = read_csr(mmc, MIO_EMM_SWITCH());
+               emm_wdog.u = read_csr(mmc, MIO_EMM_WDOG());
+               emm_sts_mask.u = read_csr(mmc, MIO_EMM_STS_MASK());
+               emm_debug.u = read_csr(mmc, MIO_EMM_DEBUG());
+               emm_timing.u = read_csr(mmc, MIO_EMM_TIMING());
+               bus_ena = emm_cfg.s.bus_ena;
+               bus_id = emm_switch.s.bus_id;
+               emm_cfg.s.bus_ena = 0;
+               write_csr(mmc, MIO_EMM_CFG(), emm_cfg.u);
+               udelay(1);
+               emm_cfg.s.bus_ena = 1ULL << 3;
+               write_csr(mmc, MIO_EMM_CFG(), emm_cfg.u);
+               mdelay(1);
+               emm_calb.u = 0;
+               write_csr(mmc, MIO_EMM_CALB(), emm_calb.u);
+               emm_calb.s.start = 1;
+               write_csr(mmc, MIO_EMM_CALB(), emm_calb.u);
+               start = get_timer(0);
+               /* This should only take 3 microseconds */
+               do {
+                       udelay(5);
+                       emm_tap.u = read_csr(mmc, MIO_EMM_TAP());
+               } while (!emm_tap.s.delay && get_timer(start) < 10);
+
+               emm_calb.s.start = 0;
+               write_csr(mmc, MIO_EMM_CALB(), emm_calb.u);
+
+               emm_cfg.s.bus_ena = 0;
+               write_csr(mmc, MIO_EMM_CFG(), emm_cfg.u);
+               udelay(1);
+               /* Restore registers */
+               emm_cfg.s.bus_ena = bus_ena;
+               write_csr(mmc, MIO_EMM_CFG(), emm_cfg.u);
+               if (host->tap_requires_noclk) {
+                       /* Turn off the clock */
+                       emm_debug.u = read_csr(mmc, MIO_EMM_DEBUG());
+                       emm_debug.s.emmc_clk_disable = 1;
+                       write_csr(mmc, MIO_EMM_DEBUG(), emm_debug.u);
+                       udelay(1);
+                       emm_debug.s.rdsync_rst = 1;
+                       write_csr(mmc, MIO_EMM_DEBUG(), emm_debug.u);
+                       udelay(1);
+               }
+
+               write_csr(mmc, MIO_EMM_TIMING(), emm_timing.u);
+               if (host->tap_requires_noclk) {
+                       /* Turn the clock back on */
+                       udelay(1);
+                       emm_debug.s.rdsync_rst = 0;
+                       write_csr(mmc, MIO_EMM_DEBUG(), emm_debug.u);
+                       udelay(1);
+                       emm_debug.s.emmc_clk_disable = 0;
+                       write_csr(mmc, MIO_EMM_DEBUG(), emm_debug.u);
+               }
+               udelay(1);
+               write_csr(mmc, MIO_EMM_IO_CTL(), emm_io_ctl.u);
+               bus_id = emm_switch.s.bus_id;
+               emm_switch.s.bus_id = 0;
+               write_csr(mmc, MIO_EMM_SWITCH(), emm_switch.u);
+               emm_switch.s.bus_id = bus_id;
+               write_csr(mmc, MIO_EMM_SWITCH(), emm_switch.u);
+               write_csr(mmc, MIO_EMM_WDOG(), emm_wdog.u);
+               write_csr(mmc, MIO_EMM_STS_MASK(), emm_sts_mask.u);
+               write_csr(mmc, MIO_EMM_RCA(), mmc->rca);
+               write_csr(mmc, MIO_EMM_DEBUG(), emm_debug.u);
+
+               if (!emm_tap.s.delay) {
+                       pr_err("%s: Error: delay calibration failed, timed out.\n",
+                              __func__);
+                       /* Set to default value if timed out */
+                       emm_tap.s.delay = MMC_DEFAULT_TAP_DELAY;
+                       return -ETIMEDOUT;
+               }
+       }
+       /* Round up */
+       host->timing_taps = (10 * 1000 * emm_tap.s.delay) / TOTAL_NO_OF_TAPS;
+       debug("%s(%s): timing taps: %llu, delay: %u\n",
+             __func__, mmc->dev->name, host->timing_taps, emm_tap.s.delay);
+       host->timing_calibrated = true;
+       return 0;
+}
+
+static int octeontx_mmc_set_input_bus_timing(struct mmc *mmc)
+{
+       struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+
+       if (IS_ENABLED(CONFIG_ARCH_OCTEONTX)) {
+               union mio_emm_sample sample;
+
+               sample.u = 0;
+               sample.s.cmd_cnt = slot->cmd_clk_skew;
+               sample.s.dat_cnt = slot->dat_clk_skew;
+               write_csr(mmc, MIO_EMM_SAMPLE(), sample.u);
+       } else {
+               union mio_emm_timing timing;
+
+               timing.u = read_csr(mmc, MIO_EMM_TIMING());
+               if (mmc->selected_mode == MMC_HS_200) {
+                       if (slot->hs200_tuned) {
+                               timing.s.cmd_in_tap =
+                                       slot->hs200_taps.s.cmd_in_tap;
+                               timing.s.data_in_tap =
+                                       slot->hs200_taps.s.data_in_tap;
+                       } else {
+                               pr_warn("%s(%s): Warning: hs200 timing not tuned\n",
+                                       __func__, mmc->dev->name);
+                               timing.s.cmd_in_tap =
+                                       MMC_DEFAULT_HS200_CMD_IN_TAP;
+                               timing.s.data_in_tap =
+                                       MMC_DEFAULT_HS200_DATA_IN_TAP;
+                       }
+               } else if (mmc->selected_mode == MMC_HS_400) {
+                       if (slot->hs400_tuned) {
+                               timing.s.cmd_in_tap =
+                                       slot->hs400_taps.s.cmd_in_tap;
+                               timing.s.data_in_tap =
+                                       slot->hs400_taps.s.data_in_tap;
+                       } else if (slot->hs200_tuned) {
+                               timing.s.cmd_in_tap =
+                                       slot->hs200_taps.s.cmd_in_tap;
+                               timing.s.data_in_tap =
+                                       slot->hs200_taps.s.data_in_tap;
+                       } else {
+                               pr_warn("%s(%s): Warning: hs400 timing not tuned\n",
+                                       __func__, mmc->dev->name);
+                               timing.s.cmd_in_tap =
+                                       MMC_DEFAULT_HS200_CMD_IN_TAP;
+                               timing.s.data_in_tap =
+                                       MMC_DEFAULT_HS200_DATA_IN_TAP;
+                       }
+               } else if (slot->tuned) {
+                       timing.s.cmd_in_tap = slot->taps.s.cmd_in_tap;
+                       timing.s.data_in_tap = slot->taps.s.data_in_tap;
+               } else {
+                       timing.s.cmd_in_tap = MMC_DEFAULT_CMD_IN_TAP;
+                       timing.s.data_in_tap = MMC_DEFAULT_DATA_IN_TAP;
+               }
+               octeontx_mmc_set_emm_timing(mmc, timing);
+       }
+
+       return 0;
+}
+
+/**
+ * Sets the default bus timing for the current mode.
+ *
+ * @param      mmc     mmc data structure
+ *
+ * @return     0 for success, error otherwise
+ */
+static int octeontx_mmc_set_output_bus_timing(struct mmc *mmc)
+{
+       struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+       union mio_emm_timing timing;
+       int cout_bdelay, dout_bdelay;
+       unsigned int cout_delay, dout_delay;
+       char env_name[32];
+
+       if (IS_ENABLED(CONFIG_ARCH_OCTEONTX))
+               return 0;
+
+       debug("%s(%s)\n", __func__, mmc->dev->name);
+       if (slot->is_asim || slot->is_emul)
+               return 0;
+
+       octeontx_mmc_calibrate_delay(mmc);
+
+       if (mmc->clock < 26000000) {
+               cout_delay = 5000;
+               dout_delay = 5000;
+       } else if (mmc->clock <= 52000000) {
+               cout_delay = 2500;
+               dout_delay = 2500;
+       } else if (!mmc_is_mode_ddr(mmc->selected_mode)) {
+               cout_delay = slot->cmd_out_hs200_delay;
+               dout_delay = slot->data_out_hs200_delay;
+       } else {
+               cout_delay = slot->cmd_out_hs400_delay;
+               dout_delay = slot->data_out_hs400_delay;
+       }
+
+       snprintf(env_name, sizeof(env_name), "mmc%d_hs200_dout_delay_ps",
+                slot->bus_id);
+       dout_delay = env_get_ulong(env_name, 10, dout_delay);
+       debug("%s: dout_delay: %u\n", __func__, dout_delay);
+
+       cout_bdelay = octeontx2_mmc_calc_delay(mmc, cout_delay);
+       dout_bdelay = octeontx2_mmc_calc_delay(mmc, dout_delay);
+
+       debug("%s: cmd output delay: %u, data output delay: %u, cmd bdelay: %d, data bdelay: %d, clock: %d\n",
+             __func__, cout_delay, dout_delay, cout_bdelay, dout_bdelay,
+             mmc->clock);
+       if (cout_bdelay < 0 || dout_bdelay < 0) {
+               pr_err("%s: Error: could not calculate command and/or data clock skew\n",
+                      __func__);
+               return -1;
+       }
+       timing.u = read_csr(mmc, MIO_EMM_TIMING());
+       timing.s.cmd_out_tap = cout_bdelay;
+       timing.s.data_out_tap = dout_bdelay;
+       if (mmc->selected_mode == MMC_HS_200) {
+               slot->hs200_taps.s.cmd_out_tap = cout_bdelay;
+               slot->hs200_taps.s.data_out_tap = dout_bdelay;
+       } else if (mmc->selected_mode == MMC_HS_400) {
+               slot->hs400_taps.s.cmd_out_tap = cout_bdelay;
+               slot->hs400_taps.s.data_out_tap = dout_bdelay;
+       } else {
+               slot->taps.s.cmd_out_tap = cout_bdelay;
+               slot->taps.s.data_out_tap = dout_bdelay;
+       }
+       octeontx_mmc_set_emm_timing(mmc, timing);
+       debug("%s(%s): bdelay: %d/%d, clock: %d, ddr: %s, timing taps: %llu, do: %d, di: %d, co: %d, ci: %d\n",
+             __func__, mmc->dev->name, cout_bdelay, dout_bdelay, mmc->clock,
+             mmc->ddr_mode ? "yes" : "no",
+             mmc_to_host(mmc)->timing_taps,
+             timing.s.data_out_tap,
+             timing.s.data_in_tap,
+             timing.s.cmd_out_tap,
+             timing.s.cmd_in_tap);
+
+       return 0;
+}
+
+static void octeontx_mmc_set_clock(struct mmc *mmc)
+{
+       struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+       uint clock;
+
+       clock = min(mmc->cfg->f_max, (uint)slot->clock);
+       clock = max(mmc->cfg->f_min, clock);
+       debug("%s(%s): f_min: %u, f_max: %u, clock: %u\n", __func__,
+             mmc->dev->name, mmc->cfg->f_min, mmc->cfg->f_max, clock);
+       slot->clock = clock;
+       mmc->clock = clock;
+}
+
+/**
+ * This switches I/O power as needed when switching between slots.
+ *
+ * @param      mmc     mmc data structure
+ */
+static void octeontx_mmc_switch_io(struct mmc *mmc)
+{
+       struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+       struct octeontx_mmc_host *host = slot->host;
+       struct mmc *last_mmc = host->last_mmc;
+       static struct udevice *last_reg;
+       union mio_emm_cfg emm_cfg;
+       int bus;
+       static bool initialized;
+
+       /* First time? */
+       if (!initialized || mmc != host->last_mmc) {
+               struct mmc *ommc;
+
+               /* Switch to bus 3 which is unused */
+               emm_cfg.u = read_csr(mmc, MIO_EMM_CFG());
+               emm_cfg.s.bus_ena = 1 << 3;
+               write_csr(mmc, MIO_EMM_CFG(), emm_cfg.u);
+
+               /* Turn off all other I/O interfaces with first initialization
+                * if at least one supply was found.
+                */
+               for (bus = 0; bus <= OCTEONTX_MAX_MMC_SLOT; bus++) {
+                       ommc = &host->slots[bus].mmc;
+
+                       /* Handle self case later */
+                       if (ommc == mmc || !ommc->vqmmc_supply)
+                               continue;
+
+                       /* Skip if we're not switching regulators */
+                       if (last_reg == mmc->vqmmc_supply)
+                               continue;
+
+                       /* Turn off other regulators */
+                       if (ommc->vqmmc_supply != mmc->vqmmc_supply)
+                               regulator_set_enable(ommc->vqmmc_supply, false);
+               }
+               /* Turn ourself on */
+               if (mmc->vqmmc_supply && last_reg != mmc->vqmmc_supply)
+                       regulator_set_enable(mmc->vqmmc_supply, true);
+               mdelay(1);      /* Settle time */
+               /* Switch to new bus */
+               emm_cfg.s.bus_ena = 1 << slot->bus_id;
+               write_csr(mmc, MIO_EMM_CFG(), emm_cfg.u);
+               last_reg = mmc->vqmmc_supply;
+               initialized = true;
+               return;
+       }
+
+       /* No change in device */
+       if (last_mmc == mmc)
+               return;
+
+       if (!last_mmc) {
+               pr_warn("%s(%s): No previous slot detected in IO slot switch!\n",
+                       __func__, mmc->dev->name);
+               return;
+       }
+
+       debug("%s(%s): last: %s, supply: %p\n", __func__, mmc->dev->name,
+             last_mmc->dev->name, mmc->vqmmc_supply);
+
+       /* The supply is the same so we do nothing */
+       if (last_mmc->vqmmc_supply == mmc->vqmmc_supply)
+               return;
+
+       /* Turn off the old slot I/O supply */
+       if (last_mmc->vqmmc_supply) {
+               debug("%s(%s): Turning off IO to %s, supply: %s\n",
+                     __func__, mmc->dev->name, last_mmc->dev->name,
+                     last_mmc->vqmmc_supply->name);
+               regulator_set_enable(last_mmc->vqmmc_supply, false);
+       }
+       /* Turn on the new slot I/O supply */
+       if (mmc->vqmmc_supply)  {
+               debug("%s(%s): Turning on IO to slot %d, supply: %s\n",
+                     __func__, mmc->dev->name, slot->bus_id,
+                     mmc->vqmmc_supply->name);
+               regulator_set_enable(mmc->vqmmc_supply, true);
+       }
+       /* Allow power to settle */
+       mdelay(1);
+}
+
+/**
+ * Called to switch between mmc devices
+ *
+ * @param      mmc     new mmc device
+ */
+static void octeontx_mmc_switch_to(struct mmc *mmc)
+{
+       struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+       struct octeontx_mmc_slot *old_slot;
+       struct octeontx_mmc_host *host = slot->host;
+       union mio_emm_switch emm_switch;
+       union mio_emm_sts_mask emm_sts_mask;
+       union mio_emm_rca emm_rca;
+
+       if (slot->bus_id == host->last_slotid)
+               return;
+
+       debug("%s(%s) switching from slot %d to slot %d\n", __func__,
+             mmc->dev->name, host->last_slotid, slot->bus_id);
+       octeontx_mmc_switch_io(mmc);
+
+       if (host->last_slotid >= 0 && slot->valid) {
+               old_slot = &host->slots[host->last_slotid];
+               old_slot->cached_switch.u = read_csr(mmc, MIO_EMM_SWITCH());
+               old_slot->cached_rca.u = read_csr(mmc, MIO_EMM_RCA());
+       }
+       if (mmc->rca)
+               write_csr(mmc, MIO_EMM_RCA(), mmc->rca);
+       emm_switch = slot->cached_switch;
+       do_switch(mmc, emm_switch);
+       emm_rca.u = 0;
+       emm_rca.s.card_rca = mmc->rca;
+       write_csr(mmc, MIO_EMM_RCA(), emm_rca.u);
+       mdelay(100);
+
+       set_wdog(mmc, 100000);
+       if (octeontx_mmc_set_output_bus_timing(mmc) ||
+           octeontx_mmc_set_input_bus_timing(mmc))
+               pr_err("%s(%s): Error setting bus timing\n", __func__,
+                      mmc->dev->name);
+       octeontx_mmc_io_drive_setup(mmc);
+
+       emm_sts_mask.u = 0;
+       emm_sts_mask.s.sts_msk = 1 << 7 | 1 << 22 | 1 << 23 | 1 << 19;
+       write_csr(mmc, MIO_EMM_STS_MASK(), emm_sts_mask.u);
+       host->last_slotid = slot->bus_id;
+       host->last_mmc = mmc;
+       mdelay(10);
+}
+
+/**
+ * Perform initial timing configuration
+ *
+ * @param mmc  mmc device
+ *
+ * @return 0 for success
+ *
+ * NOTE: This will need to be updated when new silicon comes out
+ */
+static int octeontx_mmc_init_timing(struct mmc *mmc)
+{
+       union mio_emm_timing timing;
+
+       if (mmc_to_slot(mmc)->is_asim || mmc_to_slot(mmc)->is_emul)
+               return 0;
+
+       debug("%s(%s)\n", __func__, mmc->dev->name);
+       timing.u = 0;
+       timing.s.cmd_out_tap = MMC_DEFAULT_CMD_OUT_TAP;
+       timing.s.data_out_tap = MMC_DEFAULT_DATA_OUT_TAP;
+       timing.s.cmd_in_tap = MMC_DEFAULT_CMD_IN_TAP;
+       timing.s.data_in_tap = MMC_DEFAULT_DATA_IN_TAP;
+       octeontx_mmc_set_emm_timing(mmc, timing);
+       return 0;
+}
+
+/**
+ * Perform low-level initialization
+ *
+ * @param      mmc     mmc device
+ *
+ * @return     0 for success, error otherwise
+ */
+static int octeontx_mmc_init_lowlevel(struct mmc *mmc)
+{
+       struct octeontx_mmc_slot *slot = mmc_to_slot(mmc);
+       struct octeontx_mmc_host *host = slot->host;
+       union mio_emm_switch emm_switch;
+       u32 clk_period;
+
+       debug("%s(%s): lowlevel init for slot %d\n", __func__,
+             mmc->dev->name, slot->bus_id);
+       host->emm_cfg.s.bus_ena &= ~(1 << slot->bus_id);
+       write_csr(mmc, MIO_EMM_CFG(), host->emm_cfg.u);
+       udelay(100);
+       host->emm_cfg.s.bus_ena |= 1 << slot->bus_id;
+       write_csr(mmc, MIO_EMM_CFG(), host->emm_cfg.u);
+       udelay(10);
+       slot->clock = mmc->cfg->f_min;
+       octeontx_mmc_set_clock(&slot->mmc);
+
+       if (IS_ENABLED(CONFIG_ARCH_OCTEONTX2)) {
+               if (host->cond_clock_glitch) {
+                       union mio_emm_debug emm_debug;
+
+                       emm_debug.u = read_csr(mmc, MIO_EMM_DEBUG());
+                       emm_debug.s.clk_on = 1;
+                       write_csr(mmc, MIO_EMM_DEBUG(), emm_debug.u);
+               }
+               octeontx_mmc_calibrate_delay(&slot->mmc);
+       }
+
+       clk_period = octeontx_mmc_calc_clk_period(mmc);
+       emm_switch.u = 0;
+       emm_switch.s.power_class = 10;
+       emm_switch.s.clk_lo = clk_period / 2;
+       emm_switch.s.clk_hi = clk_period / 2;
+
+       emm_switch.s.bus_id = slot->bus_id;
+       debug("%s: Performing switch\n", __func__);
+       do_switch(mmc, emm_switch);
+       slot->cached_switch.u = emm_switch.u;
+
+       if (!IS_ENABLED(CONFIG_ARCH_OCTEONTX))
+               octeontx_mmc_init_timing(mmc);
+
+       set_wdog(mmc, 1000000); /* Set to 1 second */
+       write_csr(mmc, MIO_EMM_STS_MASK(), 0xe4390080ull);
+       write_csr(mmc, MIO_EMM_RCA(), 1);
+       mdelay(10);
+       debug("%s: done\n", __func__);
+       return 0;
+}
+
+/**
+ * Translates a voltage number to bits in MMC register
+ *
+ * @param      voltage voltage in microvolts
+ *
+ * @return     MMC register value for voltage
+ */
+static u32 xlate_voltage(u32 voltage)
+{
+       u32 volt = 0;
+
+       /* Convert to millivolts */
+       voltage /= 1000;
+       if (voltage >= 1650 && voltage <= 1950)
+               volt |= MMC_VDD_165_195;
+       if (voltage >= 2000 && voltage <= 2100)
+               volt |= MMC_VDD_20_21;
+       if (voltage >= 2100 && voltage <= 2200)
+               volt |= MMC_VDD_21_22;
+       if (voltage >= 2200 && voltage <= 2300)
+               volt |= MMC_VDD_22_23;
+       if (voltage >= 2300 && voltage <= 2400)
+               volt |= MMC_VDD_23_24;
+       if (voltage >= 2400 && voltage <= 2500)
+               volt |= MMC_VDD_24_25;
+       if (voltage >= 2500 && voltage <= 2600)
+               volt |= MMC_VDD_25_26;
+       if (voltage >= 2600 && voltage <= 2700)
+               volt |= MMC_VDD_26_27;
+       if (voltage >= 2700 && voltage <= 2800)
+               volt |= MMC_VDD_27_28;
+       if (voltage >= 2800 && voltage <= 2900)
+               volt |= MMC_VDD_28_29;
+       if (voltage >= 2900 && voltage <= 3000)
+               volt |= MMC_VDD_29_30;
+       if (voltage >= 3000 && voltage <= 3100)
+               volt |= MMC_VDD_30_31;
+       if (voltage >= 3100 && voltage <= 3200)
+               volt |= MMC_VDD_31_32;
+       if (voltage >= 3200 && voltage <= 3300)
+               volt |= MMC_VDD_32_33;
+       if (voltage >= 3300 && voltage <= 3400)
+               volt |= MMC_VDD_33_34;
+       if (voltage >= 3400 && voltage <= 3500)
+               volt |= MMC_VDD_34_35;
+       if (voltage >= 3500 && voltage <= 3600)
+               volt |= MMC_VDD_35_36;
+
+       return volt;
+}
+
+/**
+ * Check if a slot is valid in the device tree
+ *
+ * @param      dev     slot device to check
+ *
+ * @return     true if status reports "ok" or "okay" or if no status,
+ *             false otherwise.
+ */
+static bool octeontx_mmc_get_valid(struct udevice *dev)
+{
+       const char *stat = ofnode_read_string(dev->node, "status");
+
+       if (!stat || !strncmp(stat, "ok", 2))
+               return true;
+       else
+               return false;
+}
+
+/**
+ * Reads slot configuration from the device tree
+ *
+ * @param      dev     slot device
+ *
+ * @return     0 on success, otherwise error
+ */
+static int octeontx_mmc_get_config(struct udevice *dev)
+{
+       struct octeontx_mmc_slot *slot = dev_to_mmc_slot(dev);
+       uint voltages[2];
+       uint low, high;
+       char env_name[32];
+       int err;
+       ofnode node = dev->node;
+       int bus_width = 1;
+       ulong new_max_freq;
+
+       debug("%s(%s)", __func__, dev->name);
+       slot->cfg.name = dev->name;
+
+       slot->cfg.f_max = ofnode_read_s32_default(dev->node, "max-frequency",
+                                                 26000000);
+       snprintf(env_name, sizeof(env_name), "mmc_max_frequency%d",
+                slot->bus_id);
+
+       new_max_freq = env_get_ulong(env_name, 10, slot->cfg.f_max);
+       debug("Reading %s, got %lu\n", env_name, new_max_freq);
+
+       if (new_max_freq != slot->cfg.f_max) {
+               printf("Overriding device tree MMC maximum frequency %u to %lu\n",
+                      slot->cfg.f_max, new_max_freq);
+               slot->cfg.f_max = new_max_freq;
+       }
+       slot->cfg.f_min = 400000;
+       slot->cfg.b_max = CONFIG_SYS_MMC_MAX_BLK_COUNT;
+
+       if (IS_ENABLED(CONFIG_ARCH_OCTEONTX2)) {
+               slot->hs400_tuning_block =
+                       ofnode_read_s32_default(dev->node,
+                                               "marvell,hs400-tuning-block",
+                                               -1);
+               debug("%s(%s): mmc HS400 tuning block: %d\n", __func__,
+                     dev->name, slot->hs400_tuning_block);
+
+               slot->hs200_tap_adj =
+                       ofnode_read_s32_default(dev->node,
+                                               "marvell,hs200-tap-adjust", 0);
+               debug("%s(%s): hs200-tap-adjust: %d\n", __func__, dev->name,
+                     slot->hs200_tap_adj);
+               slot->hs400_tap_adj =
+                       ofnode_read_s32_default(dev->node,
+                                               "marvell,hs400-tap-adjust", 0);
+               debug("%s(%s): hs400-tap-adjust: %d\n", __func__, dev->name,
+                     slot->hs400_tap_adj);
+       }
+
+       err = ofnode_read_u32_array(dev->node, "voltage-ranges", voltages, 2);
+       if (err) {
+               slot->cfg.voltages = MMC_VDD_32_33 | MMC_VDD_33_34;
+       } else {
+               low = xlate_voltage(voltages[0]);
+               high = xlate_voltage(voltages[1]);
+               debug("  low voltage: 0x%x (%u), high: 0x%x (%u)\n",
+                     low, voltages[0], high, voltages[1]);
+               if (low > high || !low || !high) {
+                       pr_err("Invalid MMC voltage range [%u-%u] specified for %s\n",
+                              low, high, dev->name);
+                       return -1;
+               }
+               slot->cfg.voltages = 0;
+               do {
+                       slot->cfg.voltages |= low;
+                       low <<= 1;
+               } while (low <= high);
+       }
+       debug("%s: config voltages: 0x%x\n", __func__, slot->cfg.voltages);
+       slot->slew = ofnode_read_s32_default(node, "cavium,clk-slew", -1);
+       slot->drive = ofnode_read_s32_default(node, "cavium,drv-strength", -1);
+       gpio_request_by_name(dev, "cd-gpios", 0, &slot->cd_gpio, GPIOD_IS_IN);
+       slot->cd_inverted = ofnode_read_bool(node, "cd-inverted");
+       gpio_request_by_name(dev, "wp-gpios", 0, &slot->wp_gpio, GPIOD_IS_IN);
+       slot->wp_inverted = ofnode_read_bool(node, "wp-inverted");
+       if (slot->cfg.voltages & MMC_VDD_165_195) {
+               slot->is_1_8v = true;
+               slot->is_3_3v = false;
+       } else if (slot->cfg.voltages & (MMC_VDD_30_31 | MMC_VDD_31_32 |
+                                        MMC_VDD_33_34 | MMC_VDD_34_35 |
+                                        MMC_VDD_35_36)) {
+               slot->is_1_8v = false;
+               slot->is_3_3v = true;
+       }
+
+       bus_width = ofnode_read_u32_default(node, "bus-width", 1);
+       /* Note fall-through */
+       switch (bus_width) {
+       case 8:
+               slot->cfg.host_caps |= MMC_MODE_8BIT;
+       case 4:
+               slot->cfg.host_caps |= MMC_MODE_4BIT;
+       case 1:
+               slot->cfg.host_caps |= MMC_MODE_1BIT;
+               break;
+       }
+       if (ofnode_read_bool(node, "no-1-8-v")) {
+               slot->is_3_3v = true;
+               slot->is_1_8v = false;
+               if (!(slot->cfg.voltages & (MMC_VDD_32_33 | MMC_VDD_33_34)))
+                       pr_warn("%s(%s): voltages indicate 3.3v but 3.3v not supported\n",
+                               __func__, dev->name);
+       }
+       if (ofnode_read_bool(node, "mmc-ddr-3-3v")) {
+               slot->is_3_3v = true;
+               slot->is_1_8v = false;
+               if (!(slot->cfg.voltages & (MMC_VDD_32_33 | MMC_VDD_33_34)))
+                       pr_warn("%s(%s): voltages indicate 3.3v but 3.3v not supported\n",
+                               __func__, dev->name);
+       }
+       if (ofnode_read_bool(node, "cap-sd-highspeed") ||
+           ofnode_read_bool(node, "cap-mmc-highspeed") ||
+           ofnode_read_bool(node, "sd-uhs-sdr25"))
+               slot->cfg.host_caps |= MMC_MODE_HS;
+       if (slot->cfg.f_max >= 50000000 &&
+           slot->cfg.host_caps & MMC_MODE_HS)
+               slot->cfg.host_caps |= MMC_MODE_HS_52MHz | MMC_MODE_HS;
+       if (ofnode_read_bool(node, "sd-uhs-sdr50"))
+               slot->cfg.host_caps |= MMC_MODE_HS_52MHz | MMC_MODE_HS;
+       if (ofnode_read_bool(node, "sd-uhs-ddr50"))
+               slot->cfg.host_caps |= MMC_MODE_HS | MMC_MODE_HS_52MHz |
+                                      MMC_MODE_DDR_52MHz;
+
+       if (IS_ENABLED(CONFIG_ARCH_OCTEONTX2)) {
+               if (!slot->is_asim && !slot->is_emul) {
+                       if (ofnode_read_bool(node, "mmc-hs200-1_8v"))
+                               slot->cfg.host_caps |= MMC_MODE_HS200 |
+                                       MMC_MODE_HS_52MHz;
+                       if (ofnode_read_bool(node, "mmc-hs400-1_8v"))
+                               slot->cfg.host_caps |= MMC_MODE_HS400 |
+                                       MMC_MODE_HS_52MHz |
+                                       MMC_MODE_HS200 |
+                                       MMC_MODE_DDR_52MHz;
+                       slot->cmd_out_hs200_delay =
+                               ofnode_read_u32_default(node,
+                                       "marvell,cmd-out-hs200-dly",
+                                       MMC_DEFAULT_HS200_CMD_OUT_DLY);
+                       debug("%s(%s): HS200 cmd out delay: %d\n",
+                             __func__, dev->name, slot->cmd_out_hs200_delay);
+                       slot->data_out_hs200_delay =
+                               ofnode_read_u32_default(node,
+                                       "marvell,data-out-hs200-dly",
+                                       MMC_DEFAULT_HS200_DATA_OUT_DLY);
+                       debug("%s(%s): HS200 data out delay: %d\n",
+                             __func__, dev->name, slot->data_out_hs200_delay);
+                       slot->cmd_out_hs400_delay =
+                               ofnode_read_u32_default(node,
+                                       "marvell,cmd-out-hs400-dly",
+                                       MMC_DEFAULT_HS400_CMD_OUT_DLY);
+                       debug("%s(%s): HS400 cmd out delay: %d\n",
+                             __func__, dev->name, slot->cmd_out_hs400_delay);
+                       slot->data_out_hs400_delay =
+                               ofnode_read_u32_default(node,
+                                       "marvell,data-out-hs400-dly",
+                                       MMC_DEFAULT_HS400_DATA_OUT_DLY);
+                       debug("%s(%s): HS400 data out delay: %d\n",
+                             __func__, dev->name, slot->data_out_hs400_delay);
+               }
+       }
+
+       slot->disable_ddr = ofnode_read_bool(node, "marvell,disable-ddr");
+       slot->non_removable = ofnode_read_bool(node, "non-removable");
+       slot->cmd_clk_skew = ofnode_read_u32_default(node,
+                                                    "cavium,cmd-clk-skew", 0);
+       slot->dat_clk_skew = ofnode_read_u32_default(node,
+                                                    "cavium,dat-clk-skew", 0);
+       debug("%s(%s): host caps: 0x%x\n", __func__,
+             dev->name, slot->cfg.host_caps);
+       return 0;
+}
+
+/**
+ * Probes a MMC slot
+ *
+ * @param      dev     mmc device
+ *
+ * @return     0 for success, error otherwise
+ */
+static int octeontx_mmc_slot_probe(struct udevice *dev)
+{
+       struct octeontx_mmc_slot *slot;
+       struct mmc *mmc;
+       int err;
+
+       printk("%s (%d)\n", __func__, __LINE__); // test-only
+       debug("%s(%s)\n", __func__, dev->name);
+       if (!host_probed) {
+               pr_err("%s(%s): Error: host not probed yet\n",
+                      __func__, dev->name);
+       }
+       slot = dev_to_mmc_slot(dev);
+       mmc = &slot->mmc;
+       mmc->dev = dev;
+
+       slot->valid = false;
+       if (!octeontx_mmc_get_valid(dev)) {
+               debug("%s(%s): slot is invalid\n", __func__, dev->name);
+               return -ENODEV;
+       }
+
+       debug("%s(%s): Getting config\n", __func__, dev->name);
+       err = octeontx_mmc_get_config(dev);
+       if (err) {
+               pr_err("probe(%s): Error getting config\n", dev->name);
+               return err;
+       }
+
+       debug("%s(%s): mmc bind, mmc: %p\n", __func__, dev->name, &slot->mmc);
+       err = mmc_bind(dev, &slot->mmc, &slot->cfg);
+       if (err) {
+               pr_err("%s(%s): Error binding mmc\n", __func__, dev->name);
+               return -1;
+       }
+
+       /* For some reason, mmc_bind always assigns priv to the device */
+       slot->mmc.priv = slot;
+
+       debug("%s(%s): lowlevel init\n", __func__, dev->name);
+       err = octeontx_mmc_init_lowlevel(mmc);
+       if (err) {
+               pr_err("probe(%s): Low-level init failed\n", dev->name);
+               return err;
+       }
+
+       slot->valid = true;
+
+       debug("%s(%s):\n"
+             "  base address : %p\n"
+             "  bus id       : %d\n", __func__, dev->name,
+               slot->base_addr, slot->bus_id);
+
+       return err;
+}
+
+/**
+ * MMC slot driver operations
+ */
+static const struct dm_mmc_ops octeontx_hsmmc_ops = {
+       .send_cmd = octeontx_mmc_dev_send_cmd,
+       .set_ios = octeontx_mmc_set_ios,
+       .get_cd = octeontx_mmc_get_cd,
+       .get_wp = octeontx_mmc_get_wp,
+#ifdef MMC_SUPPORTS_TUNING
+       .execute_tuning = octeontx_mmc_execute_tuning,
+#endif
+};
+
+static const struct udevice_id octeontx_hsmmc_ids[] = {
+       { .compatible = "mmc-slot" },
+       { }
+};
+
+U_BOOT_DRIVER(octeontx_hsmmc_slot) = {
+       .name   = "octeontx_hsmmc_slot",
+       .id     = UCLASS_MMC,
+       .of_match = of_match_ptr(octeontx_hsmmc_ids),
+       .probe = octeontx_mmc_slot_probe,
+       .ops = &octeontx_hsmmc_ops,
+};
+
+/*****************************************************************
+ * PCI host driver
+ *
+ * The PCI host driver contains the resources used by all of the
+ * slot drivers.
+ *
+ * The slot drivers are pseudo drivers.
+ */
+
+/**
+ * Probe the MMC host controller
+ *
+ * @param      dev     mmc host controller device
+ *
+ * @return     0 for success, -1 on error
+ */
+static int octeontx_mmc_host_probe(struct udevice *dev)
+{
+       pci_dev_t bdf = dm_pci_get_bdf(dev);
+       struct octeontx_mmc_host *host = dev_get_priv(dev);
+       union mio_emm_int emm_int;
+       u8 rev;
+
+       debug("%s(%s): Entry host: %p\n", __func__, dev->name, host);
+
+       if (!octeontx_mmc_get_valid(dev)) {
+               debug("%s(%s): mmc host not valid\n", __func__, dev->name);
+               return -ENODEV;
+       }
+       memset(host, 0, sizeof(*host));
+       host->base_addr = dm_pci_map_bar(dev, PCI_BASE_ADDRESS_0,
+                                        PCI_REGION_MEM);
+       if (!host->base_addr) {
+               pr_err("%s: Error: MMC base address not found\n", __func__);
+               return -1;
+       }
+       host->dev = dev;
+       debug("%s(%s): Base address: %p\n", __func__, dev->name,
+             host->base_addr);
+       if (!dev_has_of_node(dev)) {
+               pr_err("%s: No device tree information found\n", __func__);
+               return -1;
+       }
+       host->node = dev->node;
+       dev->req_seq = PCI_FUNC(bdf);
+       host->last_slotid = -1;
+       if (otx_is_platform(PLATFORM_ASIM))
+               host->is_asim = true;
+       if (otx_is_platform(PLATFORM_EMULATOR))
+               host->is_emul = true;
+       host->dma_wait_delay =
+               ofnode_read_u32_default(dev->node, "marvell,dma-wait-delay", 1);
+       /* Force reset of eMMC */
+       writeq(0, host->base_addr + MIO_EMM_CFG());
+       debug("%s: Clearing MIO_EMM_CFG\n", __func__);
+       udelay(100);
+       emm_int.u = readq(host->base_addr + MIO_EMM_INT());
+       debug("%s: Writing 0x%llx to MIO_EMM_INT\n", __func__, emm_int.u);
+       writeq(emm_int.u, host->base_addr + MIO_EMM_INT());
+
+       debug("%s(%s): Getting I/O clock\n", __func__, dev->name);
+       host->sys_freq = octeontx_get_io_clock();
+       debug("%s(%s): I/O clock %llu\n", __func__, dev->name, host->sys_freq);
+
+       if (IS_ENABLED(CONFIG_ARCH_OCTEONTX2)) {
+               /* Flags for issues to work around */
+               dm_pci_read_config8(dev, PCI_REVISION_ID, &rev);
+               if (otx_is_soc(CN96XX)) {
+                       debug("%s: CN96XX revision %d\n", __func__, rev);
+                       switch (rev) {
+                       case 0:
+                               host->calibrate_glitch = true;
+                               host->cond_clock_glitch = true;
+                               break;
+                       case 1:
+                               break;
+                       case 2:
+                               break;
+                       case 0x10:      /* C0 */
+                               host->hs400_skew_needed = true;
+                               debug("HS400 skew support enabled\n");
+                               fallthrough;
+                       default:
+                               debug("CN96XX rev C0+ detected\n");
+                               host->tap_requires_noclk = true;
+                               break;
+                       }
+               } else if (otx_is_soc(CN95XX)) {
+                       if (!rev)
+                               host->cond_clock_glitch = true;
+               }
+       }
+
+       host_probed = true;
+
+       return 0;
+}
+
+/**
+ * This performs some initial setup before a probe occurs.
+ *
+ * @param dev: MMC slot device
+ *
+ * @return 0 for success, -1 on failure
+ *
+ * Do some pre-initialization before probing a slot.
+ */
+static int octeontx_mmc_host_child_pre_probe(struct udevice *dev)
+{
+       struct octeontx_mmc_host *host = dev_get_priv(dev_get_parent(dev));
+       struct octeontx_mmc_slot *slot;
+       struct mmc_uclass_priv *upriv;
+       ofnode node = dev->node;
+       u32 bus_id;
+       char name[16];
+       int err;
+
+       debug("%s(%s) Pre-Probe\n", __func__, dev->name);
+       if (ofnode_read_u32(node, "reg", &bus_id)) {
+               pr_err("%s(%s): Error: \"reg\" not found in device tree\n",
+                      __func__, dev->name);
+               return -1;
+       }
+       if (bus_id > OCTEONTX_MAX_MMC_SLOT) {
+               pr_err("%s(%s): Error: \"reg\" out of range of 0..%d\n",
+                      __func__, dev->name, OCTEONTX_MAX_MMC_SLOT);
+               return -1;
+       }
+
+       slot = &host->slots[bus_id];
+       dev->priv = slot;
+       slot->host = host;
+       slot->bus_id = bus_id;
+       slot->dev = dev;
+       slot->base_addr = host->base_addr;
+       slot->is_asim = host->is_asim;
+       slot->is_emul = host->is_emul;
+
+       snprintf(name, sizeof(name), "octeontx-mmc%d", bus_id);
+       err = device_set_name(dev, name);
+
+       if (!dev->uclass_priv) {
+               debug("%s(%s): Allocating uclass priv\n", __func__,
+                     dev->name);
+               upriv = calloc(1, sizeof(struct mmc_uclass_priv));
+               if (!upriv)
+                       return -ENOMEM;
+               dev->uclass_priv = upriv;
+               dev->uclass->priv = upriv;
+       } else {
+               upriv = dev->uclass_priv;
+       }
+
+       upriv->mmc = &slot->mmc;
+       debug("%s: uclass priv: %p, mmc: %p\n", dev->name, upriv, upriv->mmc);
+
+       debug("%s: ret: %d\n", __func__, err);
+       return err;
+}
+
+static const struct udevice_id octeontx_hsmmc_host_ids[] = {
+       { .compatible = "cavium,thunder-8890-mmc" },
+       { }
+};
+
+U_BOOT_DRIVER(octeontx_hsmmc_host) = {
+       .name   = "octeontx_hsmmc_host",
+       .id     = UCLASS_MISC,
+       .of_match = of_match_ptr(octeontx_hsmmc_host_ids),
+       .probe  = octeontx_mmc_host_probe,
+       .priv_auto_alloc_size = sizeof(struct octeontx_mmc_host),
+       .child_pre_probe = octeontx_mmc_host_child_pre_probe,
+       .flags  = DM_FLAG_PRE_RELOC,
+};
+
+static struct pci_device_id octeontx_mmc_supported[] = {
+       { PCI_VDEVICE(CAVIUM, PCI_DEVICE_ID_CAVIUM_EMMC) },
+       { },
+};
+
+U_BOOT_PCI_DEVICE(octeontx_hsmmc_host, octeontx_mmc_supported);
diff --git a/drivers/mmc/octeontx_hsmmc.h b/drivers/mmc/octeontx_hsmmc.h
new file mode 100644 (file)
index 0000000..70844b1
--- /dev/null
@@ -0,0 +1,207 @@
+/* SPDX-License-Identifier:    GPL-2.0
+ *
+ * Copyright (C) 2019 Marvell International Ltd.
+ *
+ * https://spdx.org/licenses
+ */
+#ifndef __OCTEONTX_HSMMC_H__
+#define __OCTEONTX_HSMMC_H__
+#include <asm/gpio.h>
+
+/** Name of our driver */
+#define OCTEONTX_MMC_DRIVER_NAME       "octeontx-hsmmc"
+
+/** Maximum supported MMC slots */
+#define OCTEONTX_MAX_MMC_SLOT          3
+
+#define POWER_ON_TIME                  40 /** See SD 4.1 spec figure 6-5 */
+
+/**
+ * Timeout used when waiting for commands to complete.  We need to keep this
+ * above the hardware watchdog timeout which is usually limited to 1000ms
+ */
+#define WATCHDOG_COUNT                 (1100)  /* in msecs */
+
+/**
+ * Long timeout for commands which might take a while to complete.
+ */
+#define MMC_TIMEOUT_LONG               1000
+
+/**
+ * Short timeout used for most commands in msecs
+ */
+#define MMC_TIMEOUT_SHORT              20
+
+#define NSEC_PER_SEC                   1000000000L
+
+#define MAX_NO_OF_TAPS                 64
+
+#define EXT_CSD_POWER_CLASS            187     /* R/W */
+
+/* default HS400 tuning block number */
+#define DEFAULT_HS400_TUNING_BLOCK     1
+
+struct octeontx_mmc_host;
+
+/** MMC/SD slot data structure */
+struct octeontx_mmc_slot {
+       struct mmc              mmc;
+       struct mmc_config       cfg;
+       struct octeontx_mmc_host *host;
+       struct udevice          *dev;
+       void                    *base_addr;     /** Same as host base_addr */
+       u64                     clock;
+       int                     bus_id;         /** slot number */
+       uint                    bus_width;
+       uint                    max_width;
+       int                     hs200_tap_adj;
+       int                     hs400_tap_adj;
+       int                     hs400_tuning_block;
+       struct gpio_desc        cd_gpio;
+       struct gpio_desc        wp_gpio;
+       struct gpio_desc        power_gpio;
+       enum bus_mode           mode;
+       union mio_emm_switch    cached_switch;
+       union mio_emm_switch    want_switch;
+       union mio_emm_rca       cached_rca;
+       union mio_emm_timing    taps;   /* otx2: MIO_EMM_TIMING */
+       union mio_emm_timing    hs200_taps;
+       union mio_emm_timing    hs400_taps;
+       /* These are used to see if our tuning is still valid or not */
+       enum bus_mode           last_mode;
+       u32                     last_clock;
+       u32                     block_len;
+       u32                     block_count;
+       int                     cmd_clk_skew;
+       int                     dat_clk_skew;
+       uint                    cmd_cnt;        /* otx: sample cmd in delay */
+       uint                    dat_cnt;        /* otx: sample data in delay */
+       uint                    drive;          /* Current drive */
+       uint                    slew;           /* clock skew */
+       uint                    cmd_out_hs200_delay;
+       uint                    data_out_hs200_delay;
+       uint                    cmd_out_hs400_delay;
+       uint                    data_out_hs400_delay;
+       uint                    clk_period;
+       bool                    valid:1;
+       bool                    is_acmd:1;
+       bool                    tuned:1;
+       bool                    hs200_tuned:1;
+       bool                    hs400_tuned:1;
+       bool                    is_1_8v:1;
+       bool                    is_3_3v:1;
+       bool                    is_ddr:1;
+       bool                    is_asim:1;
+       bool                    is_emul:1;
+       bool                    cd_inverted:1;
+       bool                    wp_inverted:1;
+       bool                    disable_ddr:1;
+       bool                    non_removable:1;
+};
+
+struct octeontx_mmc_cr_mods {
+       u8 ctype_xor;
+       u8 rtype_xor;
+};
+
+struct octeontx_mmc_cr {
+       u8 c;
+       u8 r;
+};
+
+struct octeontx_sd_mods {
+       struct octeontx_mmc_cr mmc;
+       struct octeontx_mmc_cr sd;
+       struct octeontx_mmc_cr sdacmd;
+};
+
+/** Host controller data structure */
+struct octeontx_mmc_host {
+       struct          udevice *dev;
+       void            *base_addr;
+       struct octeontx_mmc_slot slots[OCTEONTX_MAX_MMC_SLOT + 1];
+       pci_dev_t       pdev;
+       u64             sys_freq;
+       union mio_emm_cfg emm_cfg;
+       u64             timing_taps;
+       struct mmc      *last_mmc;      /** Last mmc used */
+       ofnode          node;
+       int             cur_slotid;
+       int             last_slotid;
+       int             max_width;
+       uint            per_tap_delay;
+       uint            num_slots;
+       uint            dma_wait_delay; /* Delay before polling DMA in usecs */
+       bool            initialized:1;
+       bool            timing_calibrated:1;
+       bool            is_asim:1;
+       bool            is_emul:1;
+       bool            calibrate_glitch:1;
+       bool            cond_clock_glitch:1;
+       bool            tap_requires_noclk:1;
+       bool            hs400_skew_needed:1;
+};
+
+/*
+ * NOTE: This was copied from the Linux kernel.
+ *
+ * MMC status in R1, for native mode (SPI bits are different)
+ * Type
+ *     e:error bit
+ *     s:status bit
+ *     r:detected and set for the actual command response
+ *     x:detected and set during command execution. the host must poll
+ *         the card by sending status command in order to read these bits.
+ * Clear condition
+ *     a:according to the card state
+ *     b:always related to the previous command. Reception of
+ *         a valid command will clear it (with a delay of one command)
+ *     c:clear by read
+ */
+#define R1_OUT_OF_RANGE                BIT(31)         /* er, c */
+#define R1_ADDRESS_ERROR       BIT(30)         /* erx, c */
+#define R1_BLOCK_LEN_ERROR     BIT(29)         /* er, c */
+#define R1_ERASE_SEQ_ERROR     BIT(28)         /* er, c */
+#define R1_ERASE_PARAM          BIT(27)                /* ex, c */
+#define R1_WP_VIOLATION                BIT(26)         /* erx, c */
+#define R1_CARD_IS_LOCKED      BIT(25)         /* sx, a */
+#define R1_LOCK_UNLOCK_FAILED  BIT(24)         /* erx, c */
+#define R1_COM_CRC_ERROR       BIT(23)         /* er, b */
+/*#define R1_ILLEGAL_COMMAND   BIT(22)*/               /* er, b */
+#define R1_CARD_ECC_FAILED     BIT(21)         /* ex, c */
+#define R1_CC_ERROR            BIT(20)         /* erx, c */
+#define R1_ERROR               BIT(19)         /* erx, c */
+#define R1_UNDERRUN            BIT(18)         /* ex, c */
+#define R1_OVERRUN             BIT(17)         /* ex, c */
+#define R1_CID_CSD_OVERWRITE   BIT(16)         /* erx, c, CID/CSD overwrite */
+#define R1_WP_ERASE_SKIP       BIT(15)         /* sx, c */
+#define R1_CARD_ECC_DISABLED   BIT(14)         /* sx, a */
+#define R1_ERASE_RESET         BIT(13)         /* sr, c */
+#define R1_STATUS(x)           ((x) & 0xFFFFE000)
+#define R1_CURRENT_STATE(x)    (((x) & 0x00001E00) >> 9) /* sx, b (4 bits) */
+#define R1_READY_FOR_DATA      BIT(8)          /* sx, a */
+#define R1_SWITCH_ERROR                BIT(7)          /* sx, c */
+
+#define R1_BLOCK_READ_MASK     R1_OUT_OF_RANGE |       \
+                               R1_ADDRESS_ERROR |      \
+                               R1_BLOCK_LEN_ERROR |    \
+                               R1_CARD_IS_LOCKED |     \
+                               R1_COM_CRC_ERROR |      \
+                               R1_ILLEGAL_COMMAND |    \
+                               R1_CARD_ECC_FAILED |    \
+                               R1_CC_ERROR |           \
+                               R1_ERROR
+#define R1_BLOCK_WRITE_MASK    R1_OUT_OF_RANGE |       \
+                               R1_ADDRESS_ERROR |      \
+                               R1_BLOCK_LEN_ERROR |    \
+                               R1_WP_VIOLATION |       \
+                               R1_CARD_IS_LOCKED |     \
+                               R1_COM_CRC_ERROR |      \
+                               R1_ILLEGAL_COMMAND |    \
+                               R1_CARD_ECC_FAILED |    \
+                               R1_CC_ERROR |           \
+                               R1_ERROR |              \
+                               R1_UNDERRUN |           \
+                               R1_OVERRUN
+
+#endif /* __OCTEONTX_HSMMC_H__ */