From: Zhikang Zhang Date: Thu, 3 Aug 2017 09:30:57 +0000 (-0700) Subject: nvme: Add NVM Express driver support X-Git-Tag: v2025.01-rc5-pxa1908~6064 X-Git-Url: http://git.dujemihanovic.xyz/%22/img/sics.gif/%22/static/git-favicon.png?a=commitdiff_plain;h=982388eaa991d251290676f25868eecefa08c0be;p=u-boot.git nvme: Add NVM Express driver support NVM Express (NVMe) is a register level interface that allows host software to communicate with a non-volatile memory subsystem. This interface is optimized for enterprise and client solid state drives, typically attached to the PCI express interface. This adds a U-Boot driver support of devices that follow the NVMe standard [1] and supports basic read/write operations. Tested with a 400GB Intel SSD 750 series NVMe card with controller id 8086:0953. [1] http://www.nvmexpress.org/resources/specifications/ Signed-off-by: Zhikang Zhang Signed-off-by: Wenbin Song Signed-off-by: Bin Meng Reviewed-by: Tom Rini --- diff --git a/doc/README.nvme b/doc/README.nvme new file mode 100644 index 0000000000..d2b917d61b --- /dev/null +++ b/doc/README.nvme @@ -0,0 +1,42 @@ +# +# Copyright (C) 2017 NXP Semiconductors +# Copyright (C) 2017 Bin Meng +# +# SPDX-License-Identifier: GPL-2.0+ +# + +What is NVMe +============ + +NVM Express (NVMe) is a register level interface that allows host software to +communicate with a non-volatile memory subsystem. This interface is optimized +for enterprise and client solid state drives, typically attached to the PCI +express interface. It is a scalable host controller interface designed to +address the needs of enterprise and client systems that utilize PCI express +based solid state drives (SSD). The interface provides optimized command +submission and completion paths. It includes support for parallel operation by +supporting up to 64K I/O queues with up to 64K commands per I/O queue. + +The device is comprised of some number of controllers, where each controller +is comprised of some number of namespaces, where each namespace is comprised +of some number of logical blocks. A namespace is a quantity of non-volatile +memory that is formatted into logical blocks. An NVMe namespace is equivalent +to a SCSI LUN. Each namespace is operated as an independent "device". + +How it works +------------ +There is an NVMe uclass driver (driver name "nvme"), an NVMe host controller +driver (driver name "nvme") and an NVMe namespace block driver (driver name +"nvme-blk"). The host controller driver is supposed to probe the hardware and +do necessary initialization to put the controller into a ready state at which +it is able to scan all available namespaces attached to it. Scanning namespace +is triggered by the NVMe uclass driver and the actual work is done in the NVMe +namespace block driver. + +Status +------ +It only support basic block read/write functions in the NVMe driver. + +Config options +-------------- +CONFIG_NVME Enable NVMe device support diff --git a/drivers/Kconfig b/drivers/Kconfig index 2e03133c43..613e60235d 100644 --- a/drivers/Kconfig +++ b/drivers/Kconfig @@ -50,6 +50,8 @@ source "drivers/mtd/Kconfig" source "drivers/net/Kconfig" +source "drivers/nvme/Kconfig" + source "drivers/pci/Kconfig" source "drivers/pcmcia/Kconfig" diff --git a/drivers/Makefile b/drivers/Makefile index e4a9cb4195..0cbfa5d07b 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -78,6 +78,7 @@ obj-y += firmware/ obj-$(CONFIG_FPGA) += fpga/ obj-y += misc/ obj-$(CONFIG_MMC) += mmc/ +obj-$(CONFIG_NVME) += nvme/ obj-y += pcmcia/ obj-y += dfu/ obj-$(CONFIG_X86) += pch/ diff --git a/drivers/nvme/Kconfig b/drivers/nvme/Kconfig new file mode 100644 index 0000000000..cad8dbc97b --- /dev/null +++ b/drivers/nvme/Kconfig @@ -0,0 +1,12 @@ +# +# Copyright (C) 2017, Bin Meng +# +# SPDX-License-Identifier: GPL-2.0+ +# + +config NVME + bool "NVM Express device support" + depends on BLK && PCI + help + This option enables support for NVM Express devices. + It supports basic functions of NVMe (read/write). diff --git a/drivers/nvme/Makefile b/drivers/nvme/Makefile new file mode 100644 index 0000000000..7bd9fa41c0 --- /dev/null +++ b/drivers/nvme/Makefile @@ -0,0 +1,7 @@ +# +# Copyright (C) 2017, Bin Meng +# +# SPDX-License-Identifier: GPL-2.0+ +# + +obj-y += nvme-uclass.o nvme.o diff --git a/drivers/nvme/nvme-uclass.c b/drivers/nvme/nvme-uclass.c new file mode 100644 index 0000000000..0895bc9c24 --- /dev/null +++ b/drivers/nvme/nvme-uclass.c @@ -0,0 +1,62 @@ +/* + * Copyright (C) 2017 NXP Semiconductors + * Copyright (C) 2017 Bin Meng + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#include +#include +#include +#include +#include "nvme.h" + +static int nvme_info_init(struct uclass *uc) +{ + struct nvme_info *info = (struct nvme_info *)uc->priv; + + info->ns_num = 0; + info->ndev_num = 0; + INIT_LIST_HEAD(&info->dev_list); + nvme_info = info; + + return 0; +} + +static int nvme_uclass_post_probe(struct udevice *udev) +{ + char name[20]; + char *str; + struct udevice *ns_udev; + int i, ret; + struct nvme_dev *ndev = dev_get_priv(udev); + + /* Create a blk device for each namespace */ + for (i = 0; i < ndev->nn; i++) { + sprintf(name, "nvme-blk#%d", nvme_info->ns_num); + str = strdup(name); + if (!str) + return -ENOMEM; + + /* The real blksz and size will be set by nvme_blk_probe() */ + ret = blk_create_device(udev, "nvme-blk", str, IF_TYPE_NVME, + nvme_info->ns_num++, 512, 0, &ns_udev); + if (ret) { + free(str); + nvme_info->ns_num--; + + return ret; + } + device_set_name_alloced(ns_udev); + } + + return 0; +} + +UCLASS_DRIVER(nvme) = { + .name = "nvme", + .id = UCLASS_NVME, + .init = nvme_info_init, + .post_probe = nvme_uclass_post_probe, + .priv_auto_alloc_size = sizeof(struct nvme_info), +}; diff --git a/drivers/nvme/nvme.c b/drivers/nvme/nvme.c new file mode 100644 index 0000000000..a60682af11 --- /dev/null +++ b/drivers/nvme/nvme.c @@ -0,0 +1,839 @@ +/* + * Copyright (C) 2017 NXP Semiconductors + * Copyright (C) 2017 Bin Meng + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#include +#include +#include +#include +#include +#include +#include "nvme.h" + +struct nvme_info *nvme_info; + +#define NVME_Q_DEPTH 2 +#define NVME_AQ_DEPTH 2 +#define NVME_SQ_SIZE(depth) (depth * sizeof(struct nvme_command)) +#define NVME_CQ_SIZE(depth) (depth * sizeof(struct nvme_completion)) +#define ADMIN_TIMEOUT 60 +#define IO_TIMEOUT 30 +#define MAX_PRP_POOL 512 + +/* + * An NVM Express queue. Each device has at least two (one for admin + * commands and one for I/O commands). + */ +struct nvme_queue { + struct nvme_dev *dev; + struct nvme_command *sq_cmds; + struct nvme_completion *cqes; + wait_queue_head_t sq_full; + u32 __iomem *q_db; + u16 q_depth; + s16 cq_vector; + u16 sq_head; + u16 sq_tail; + u16 cq_head; + u16 qid; + u8 cq_phase; + u8 cqe_seen; + unsigned long cmdid_data[]; +}; + +static int nvme_wait_ready(struct nvme_dev *dev, bool enabled) +{ + u32 bit = enabled ? NVME_CSTS_RDY : 0; + + while ((readl(&dev->bar->csts) & NVME_CSTS_RDY) != bit) + udelay(10000); + + return 0; +} + +static int nvme_setup_prps(struct nvme_dev *dev, u64 *prp2, + int total_len, u64 dma_addr) +{ + u32 page_size = dev->page_size; + int offset = dma_addr & (page_size - 1); + u64 *prp_pool; + int length = total_len; + int i, nprps; + length -= (page_size - offset); + + if (length <= 0) { + *prp2 = 0; + return 0; + } + + if (length) + dma_addr += (page_size - offset); + + if (length <= page_size) { + *prp2 = dma_addr; + return 0; + } + + nprps = DIV_ROUND_UP(length, page_size); + + if (nprps > dev->prp_entry_num) { + free(dev->prp_pool); + dev->prp_pool = malloc(nprps << 3); + if (!dev->prp_pool) { + printf("Error: malloc prp_pool fail\n"); + return -ENOMEM; + } + dev->prp_entry_num = nprps; + } + + prp_pool = dev->prp_pool; + i = 0; + while (nprps) { + if (i == ((page_size >> 3) - 1)) { + *(prp_pool + i) = cpu_to_le64((ulong)prp_pool + + page_size); + i = 0; + prp_pool += page_size; + } + *(prp_pool + i++) = cpu_to_le64(dma_addr); + dma_addr += page_size; + nprps--; + } + *prp2 = (ulong)dev->prp_pool; + + return 0; +} + +static __le16 nvme_get_cmd_id(void) +{ + static unsigned short cmdid; + + return cpu_to_le16((cmdid < USHRT_MAX) ? cmdid++ : 0); +} + +static u16 nvme_read_completion_status(struct nvme_queue *nvmeq, u16 index) +{ + u64 start = (ulong)&nvmeq->cqes[index]; + u64 stop = start + sizeof(struct nvme_completion); + + invalidate_dcache_range(start, stop); + + return le16_to_cpu(readw(&(nvmeq->cqes[index].status))); +} + +/** + * nvme_submit_cmd() - copy a command into a queue and ring the doorbell + * + * @nvmeq: The queue to use + * @cmd: The command to send + */ +static void nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd) +{ + u16 tail = nvmeq->sq_tail; + + memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd)); + flush_dcache_range((ulong)&nvmeq->sq_cmds[tail], + (ulong)&nvmeq->sq_cmds[tail] + sizeof(*cmd)); + + if (++tail == nvmeq->q_depth) + tail = 0; + writel(tail, nvmeq->q_db); + nvmeq->sq_tail = tail; +} + +static int nvme_submit_sync_cmd(struct nvme_queue *nvmeq, + struct nvme_command *cmd, + u32 *result, unsigned timeout) +{ + u16 head = nvmeq->cq_head; + u16 phase = nvmeq->cq_phase; + u16 status; + ulong start_time; + ulong timeout_us = timeout * 100000; + + cmd->common.command_id = nvme_get_cmd_id(); + nvme_submit_cmd(nvmeq, cmd); + + start_time = timer_get_us(); + + for (;;) { + status = nvme_read_completion_status(nvmeq, head); + if ((status & 0x01) == phase) + break; + if (timeout_us > 0 && (timer_get_us() - start_time) + >= timeout_us) + return -ETIMEDOUT; + } + + status >>= 1; + if (status) { + printf("ERROR: status = %x, phase = %d, head = %d\n", + status, phase, head); + status = 0; + if (++head == nvmeq->q_depth) { + head = 0; + phase = !phase; + } + writel(head, nvmeq->q_db + nvmeq->dev->db_stride); + nvmeq->cq_head = head; + nvmeq->cq_phase = phase; + + return -EIO; + } + + if (result) + *result = le32_to_cpu(readl(&(nvmeq->cqes[head].result))); + + if (++head == nvmeq->q_depth) { + head = 0; + phase = !phase; + } + writel(head, nvmeq->q_db + nvmeq->dev->db_stride); + nvmeq->cq_head = head; + nvmeq->cq_phase = phase; + + return status; +} + +static int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd, + u32 *result) +{ + return nvme_submit_sync_cmd(dev->queues[0], cmd, result, ADMIN_TIMEOUT); +} + +static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, + int qid, int depth) +{ + struct nvme_queue *nvmeq = malloc(sizeof(*nvmeq)); + if (!nvmeq) + return NULL; + memset(nvmeq, 0, sizeof(*nvmeq)); + + nvmeq->cqes = (void *)memalign(4096, NVME_CQ_SIZE(depth)); + if (!nvmeq->cqes) + goto free_nvmeq; + memset((void *)nvmeq->cqes, 0, NVME_CQ_SIZE(depth)); + + nvmeq->sq_cmds = (void *)memalign(4096, NVME_SQ_SIZE(depth)); + if (!nvmeq->sq_cmds) + goto free_queue; + memset((void *)nvmeq->sq_cmds, 0, NVME_SQ_SIZE(depth)); + + nvmeq->dev = dev; + + nvmeq->cq_head = 0; + nvmeq->cq_phase = 1; + nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; + nvmeq->q_depth = depth; + nvmeq->qid = qid; + dev->queue_count++; + dev->queues[qid] = nvmeq; + + return nvmeq; + + free_queue: + free((void *)nvmeq->cqes); + free_nvmeq: + free(nvmeq); + + return NULL; +} + +static int nvme_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id) +{ + struct nvme_command c; + + memset(&c, 0, sizeof(c)); + c.delete_queue.opcode = opcode; + c.delete_queue.qid = cpu_to_le16(id); + + return nvme_submit_admin_cmd(dev, &c, NULL); +} + +static int nvme_delete_sq(struct nvme_dev *dev, u16 sqid) +{ + return nvme_delete_queue(dev, nvme_admin_delete_sq, sqid); +} + +static int nvme_delete_cq(struct nvme_dev *dev, u16 cqid) +{ + return nvme_delete_queue(dev, nvme_admin_delete_cq, cqid); +} + +static int nvme_enable_ctrl(struct nvme_dev *dev) +{ + dev->ctrl_config &= ~NVME_CC_SHN_MASK; + dev->ctrl_config |= NVME_CC_ENABLE; + writel(cpu_to_le32(dev->ctrl_config), &dev->bar->cc); + + return nvme_wait_ready(dev, true); +} + +static int nvme_disable_ctrl(struct nvme_dev *dev) +{ + dev->ctrl_config &= ~NVME_CC_SHN_MASK; + dev->ctrl_config &= ~NVME_CC_ENABLE; + writel(cpu_to_le32(dev->ctrl_config), &dev->bar->cc); + + return nvme_wait_ready(dev, false); +} + +static void nvme_free_queue(struct nvme_queue *nvmeq) +{ + free((void *)nvmeq->cqes); + free(nvmeq->sq_cmds); + free(nvmeq); +} + +static void nvme_free_queues(struct nvme_dev *dev, int lowest) +{ + int i; + + for (i = dev->queue_count - 1; i >= lowest; i--) { + struct nvme_queue *nvmeq = dev->queues[i]; + dev->queue_count--; + dev->queues[i] = NULL; + nvme_free_queue(nvmeq); + } +} + +static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) +{ + struct nvme_dev *dev = nvmeq->dev; + + nvmeq->sq_tail = 0; + nvmeq->cq_head = 0; + nvmeq->cq_phase = 1; + nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride]; + memset((void *)nvmeq->cqes, 0, NVME_CQ_SIZE(nvmeq->q_depth)); + flush_dcache_range((ulong)nvmeq->cqes, + (ulong)nvmeq->cqes + NVME_CQ_SIZE(nvmeq->q_depth)); + dev->online_queues++; +} + +static int nvme_configure_admin_queue(struct nvme_dev *dev) +{ + int result; + u32 aqa; + u64 cap = nvme_readq(&dev->bar->cap); + struct nvme_queue *nvmeq; + /* most architectures use 4KB as the page size */ + unsigned page_shift = 12; + unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12; + unsigned dev_page_max = NVME_CAP_MPSMAX(cap) + 12; + + if (page_shift < dev_page_min) { + debug("Device minimum page size (%u) too large for host (%u)\n", + 1 << dev_page_min, 1 << page_shift); + return -ENODEV; + } + + if (page_shift > dev_page_max) { + debug("Device maximum page size (%u) smaller than host (%u)\n", + 1 << dev_page_max, 1 << page_shift); + page_shift = dev_page_max; + } + + result = nvme_disable_ctrl(dev); + if (result < 0) + return result; + + nvmeq = dev->queues[0]; + if (!nvmeq) { + nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH); + if (!nvmeq) + return -ENOMEM; + } + + aqa = nvmeq->q_depth - 1; + aqa |= aqa << 16; + aqa |= aqa << 16; + + dev->page_size = 1 << page_shift; + + dev->ctrl_config = NVME_CC_CSS_NVM; + dev->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT; + dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE; + dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES; + + writel(aqa, &dev->bar->aqa); + nvme_writeq((ulong)nvmeq->sq_cmds, &dev->bar->asq); + nvme_writeq((ulong)nvmeq->cqes, &dev->bar->acq); + + result = nvme_enable_ctrl(dev); + if (result) + goto free_nvmeq; + + nvmeq->cq_vector = 0; + + nvme_init_queue(dev->queues[0], 0); + + return result; + + free_nvmeq: + nvme_free_queues(dev, 0); + + return result; +} + +static int nvme_alloc_cq(struct nvme_dev *dev, u16 qid, + struct nvme_queue *nvmeq) +{ + struct nvme_command c; + int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED; + + memset(&c, 0, sizeof(c)); + c.create_cq.opcode = nvme_admin_create_cq; + c.create_cq.prp1 = cpu_to_le64((ulong)nvmeq->cqes); + c.create_cq.cqid = cpu_to_le16(qid); + c.create_cq.qsize = cpu_to_le16(nvmeq->q_depth - 1); + c.create_cq.cq_flags = cpu_to_le16(flags); + c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector); + + return nvme_submit_admin_cmd(dev, &c, NULL); +} + +static int nvme_alloc_sq(struct nvme_dev *dev, u16 qid, + struct nvme_queue *nvmeq) +{ + struct nvme_command c; + int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM; + + memset(&c, 0, sizeof(c)); + c.create_sq.opcode = nvme_admin_create_sq; + c.create_sq.prp1 = cpu_to_le64((ulong)nvmeq->sq_cmds); + c.create_sq.sqid = cpu_to_le16(qid); + c.create_sq.qsize = cpu_to_le16(nvmeq->q_depth - 1); + c.create_sq.sq_flags = cpu_to_le16(flags); + c.create_sq.cqid = cpu_to_le16(qid); + + return nvme_submit_admin_cmd(dev, &c, NULL); +} + +int nvme_identify(struct nvme_dev *dev, unsigned nsid, + unsigned cns, dma_addr_t dma_addr) +{ + struct nvme_command c; + u32 page_size = dev->page_size; + int offset = dma_addr & (page_size - 1); + int length = sizeof(struct nvme_id_ctrl); + + memset(&c, 0, sizeof(c)); + c.identify.opcode = nvme_admin_identify; + c.identify.nsid = cpu_to_le32(nsid); + c.identify.prp1 = cpu_to_le64(dma_addr); + + length -= (page_size - offset); + if (length <= 0) { + c.identify.prp2 = 0; + } else { + dma_addr += (page_size - offset); + c.identify.prp2 = dma_addr; + } + + c.identify.cns = cpu_to_le32(cns); + + return nvme_submit_admin_cmd(dev, &c, NULL); +} + +int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, + dma_addr_t dma_addr, u32 *result) +{ + struct nvme_command c; + + memset(&c, 0, sizeof(c)); + c.features.opcode = nvme_admin_get_features; + c.features.nsid = cpu_to_le32(nsid); + c.features.prp1 = cpu_to_le64(dma_addr); + c.features.fid = cpu_to_le32(fid); + + return nvme_submit_admin_cmd(dev, &c, result); +} + +int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, + dma_addr_t dma_addr, u32 *result) +{ + struct nvme_command c; + + memset(&c, 0, sizeof(c)); + c.features.opcode = nvme_admin_set_features; + c.features.prp1 = cpu_to_le64(dma_addr); + c.features.fid = cpu_to_le32(fid); + c.features.dword11 = cpu_to_le32(dword11); + + return nvme_submit_admin_cmd(dev, &c, result); +} + +static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) +{ + struct nvme_dev *dev = nvmeq->dev; + int result; + + nvmeq->cq_vector = qid - 1; + result = nvme_alloc_cq(dev, qid, nvmeq); + if (result < 0) + goto release_cq; + + result = nvme_alloc_sq(dev, qid, nvmeq); + if (result < 0) + goto release_sq; + + nvme_init_queue(nvmeq, qid); + + return result; + + release_sq: + nvme_delete_sq(dev, qid); + release_cq: + nvme_delete_cq(dev, qid); + + return result; +} + +static int nvme_set_queue_count(struct nvme_dev *dev, int count) +{ + int status; + u32 result; + u32 q_count = (count - 1) | ((count - 1) << 16); + + status = nvme_set_features(dev, NVME_FEAT_NUM_QUEUES, + q_count, 0, &result); + + if (status < 0) + return status; + if (status > 1) + return 0; + + return min(result & 0xffff, result >> 16) + 1; +} + +static void nvme_create_io_queues(struct nvme_dev *dev) +{ + unsigned int i; + + for (i = dev->queue_count; i <= dev->max_qid; i++) + if (!nvme_alloc_queue(dev, i, dev->q_depth)) + break; + + for (i = dev->online_queues; i <= dev->queue_count - 1; i++) + if (nvme_create_queue(dev->queues[i], i)) + break; +} + +static int nvme_setup_io_queues(struct nvme_dev *dev) +{ + int nr_io_queues; + int result; + + nr_io_queues = 1; + result = nvme_set_queue_count(dev, nr_io_queues); + if (result <= 0) + return result; + + if (result < nr_io_queues) + nr_io_queues = result; + + dev->max_qid = nr_io_queues; + + /* Free previously allocated queues */ + nvme_free_queues(dev, nr_io_queues + 1); + nvme_create_io_queues(dev); + + return 0; +} + +static int nvme_get_info_from_identify(struct nvme_dev *dev) +{ + u16 vendor, device; + struct nvme_id_ctrl buf, *ctrl = &buf; + int ret; + int shift = NVME_CAP_MPSMIN(nvme_readq(&dev->bar->cap)) + 12; + + ret = nvme_identify(dev, 0, 1, (dma_addr_t)ctrl); + if (ret) + return -EIO; + + dev->nn = le32_to_cpu(ctrl->nn); + dev->vwc = ctrl->vwc; + memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn)); + memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn)); + memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr)); + if (ctrl->mdts) + dev->max_transfer_shift = (ctrl->mdts + shift); + + /* Apply quirk stuff */ + dm_pci_read_config16(dev->pdev, PCI_VENDOR_ID, &vendor); + dm_pci_read_config16(dev->pdev, PCI_DEVICE_ID, &device); + if ((vendor == PCI_VENDOR_ID_INTEL) && + (device == 0x0953) && ctrl->vs[3]) { + unsigned int max_transfer_shift; + dev->stripe_size = (ctrl->vs[3] + shift); + max_transfer_shift = (ctrl->vs[3] + 18); + if (dev->max_transfer_shift) { + dev->max_transfer_shift = min(max_transfer_shift, + dev->max_transfer_shift); + } else { + dev->max_transfer_shift = max_transfer_shift; + } + } + + return 0; +} + +int nvme_scan_namespace(void) +{ + struct uclass *uc; + struct udevice *dev; + int ret; + + ret = uclass_get(UCLASS_NVME, &uc); + if (ret) + return ret; + + uclass_foreach_dev(dev, uc) { + ret = device_probe(dev); + if (ret) + return ret; + } + + return 0; +} + +static int nvme_blk_probe(struct udevice *udev) +{ + struct nvme_dev *ndev = dev_get_priv(udev->parent); + struct blk_desc *desc = dev_get_uclass_platdata(udev); + struct nvme_ns *ns = dev_get_priv(udev); + u8 flbas; + u16 vendor; + struct nvme_id_ns buf, *id = &buf; + + memset(ns, 0, sizeof(*ns)); + ns->dev = ndev; + ns->ns_id = desc->devnum - ndev->blk_dev_start + 1; + if (nvme_identify(ndev, ns->ns_id, 0, (dma_addr_t)id)) + return -EIO; + + flbas = id->flbas & NVME_NS_FLBAS_LBA_MASK; + ns->flbas = flbas; + ns->lba_shift = id->lbaf[flbas].ds; + ns->mode_select_num_blocks = le64_to_cpu(id->nuse); + ns->mode_select_block_len = 1 << ns->lba_shift; + list_add(&ns->list, &ndev->namespaces); + + desc->lba = ns->mode_select_num_blocks; + desc->log2blksz = ns->lba_shift; + desc->blksz = 1 << ns->lba_shift; + desc->bdev = udev; + dm_pci_read_config16(ndev->pdev, PCI_VENDOR_ID, &vendor); + sprintf(desc->vendor, "0x%.4x", vendor); + memcpy(desc->product, ndev->serial, sizeof(ndev->serial)); + memcpy(desc->revision, ndev->firmware_rev, sizeof(ndev->firmware_rev)); + part_init(desc); + + return 0; +} + +static ulong nvme_blk_read(struct udevice *udev, lbaint_t blknr, + lbaint_t blkcnt, void *buffer) +{ + struct nvme_ns *ns = dev_get_priv(udev); + struct nvme_dev *dev = ns->dev; + struct nvme_command c; + struct blk_desc *desc = dev_get_uclass_platdata(udev); + int status; + u64 prp2; + u64 total_len = blkcnt << desc->log2blksz; + u64 temp_len = total_len; + + u64 slba = blknr; + u16 lbas = 1 << (dev->max_transfer_shift - ns->lba_shift); + u64 total_lbas = blkcnt; + + c.rw.opcode = nvme_cmd_read; + c.rw.flags = 0; + c.rw.nsid = cpu_to_le32(ns->ns_id); + c.rw.control = 0; + c.rw.dsmgmt = 0; + c.rw.reftag = 0; + c.rw.apptag = 0; + c.rw.appmask = 0; + c.rw.metadata = 0; + + while (total_lbas) { + if (total_lbas < lbas) { + lbas = (u16)total_lbas; + total_lbas = 0; + } else { + total_lbas -= lbas; + } + + if (nvme_setup_prps + (dev, &prp2, lbas << ns->lba_shift, (ulong)buffer)) + return -EIO; + c.rw.slba = cpu_to_le64(slba); + slba += lbas; + c.rw.length = cpu_to_le16(lbas - 1); + c.rw.prp1 = cpu_to_le64((ulong)buffer); + c.rw.prp2 = cpu_to_le64(prp2); + status = nvme_submit_sync_cmd(dev->queues[1], + &c, NULL, IO_TIMEOUT); + if (status) + break; + temp_len -= lbas << ns->lba_shift; + buffer += lbas << ns->lba_shift; + } + + return (total_len - temp_len) >> desc->log2blksz; +} + +static ulong nvme_blk_write(struct udevice *udev, lbaint_t blknr, + lbaint_t blkcnt, const void *buffer) +{ + struct nvme_ns *ns = dev_get_priv(udev); + struct nvme_dev *dev = ns->dev; + struct nvme_command c; + struct blk_desc *desc = dev_get_uclass_platdata(udev); + int status; + u64 prp2; + u64 total_len = blkcnt << desc->log2blksz; + u64 temp_len = total_len; + + u64 slba = blknr; + u16 lbas = 1 << (dev->max_transfer_shift - ns->lba_shift); + u64 total_lbas = blkcnt; + + c.rw.opcode = nvme_cmd_write; + c.rw.flags = 0; + c.rw.nsid = cpu_to_le32(ns->ns_id); + c.rw.control = 0; + c.rw.dsmgmt = 0; + c.rw.reftag = 0; + c.rw.apptag = 0; + c.rw.appmask = 0; + c.rw.metadata = 0; + + while (total_lbas) { + if (total_lbas < lbas) { + lbas = (u16)total_lbas; + total_lbas = 0; + } else { + total_lbas -= lbas; + } + + if (nvme_setup_prps + (dev, &prp2, lbas << ns->lba_shift, (ulong)buffer)) + return -EIO; + c.rw.slba = cpu_to_le64(slba); + slba += lbas; + c.rw.length = cpu_to_le16(lbas - 1); + c.rw.prp1 = cpu_to_le64((ulong)buffer); + c.rw.prp2 = cpu_to_le64(prp2); + status = nvme_submit_sync_cmd(dev->queues[1], + &c, NULL, IO_TIMEOUT); + if (status) + break; + temp_len -= lbas << ns->lba_shift; + buffer += lbas << ns->lba_shift; + } + + return (total_len - temp_len) >> desc->log2blksz; +} + +static const struct blk_ops nvme_blk_ops = { + .read = nvme_blk_read, + .write = nvme_blk_write, +}; + +U_BOOT_DRIVER(nvme_blk) = { + .name = "nvme-blk", + .id = UCLASS_BLK, + .probe = nvme_blk_probe, + .ops = &nvme_blk_ops, + .priv_auto_alloc_size = sizeof(struct nvme_ns), +}; + +static int nvme_bind(struct udevice *udev) +{ + char name[20]; + sprintf(name, "nvme#%d", nvme_info->ndev_num++); + + return device_set_name(udev, name); +} + +static int nvme_probe(struct udevice *udev) +{ + int ret; + struct nvme_dev *ndev = dev_get_priv(udev); + u64 cap; + + ndev->pdev = pci_get_controller(udev); + ndev->instance = trailing_strtol(udev->name); + + INIT_LIST_HEAD(&ndev->namespaces); + ndev->bar = dm_pci_map_bar(udev, PCI_BASE_ADDRESS_0, + PCI_REGION_MEM); + if (readl(&ndev->bar->csts) == -1) { + ret = -ENODEV; + printf("Error: %s: Out of memory!\n", udev->name); + goto free_nvme; + } + + ndev->queues = malloc(2 * sizeof(struct nvme_queue)); + if (!ndev->queues) { + ret = -ENOMEM; + printf("Error: %s: Out of memory!\n", udev->name); + goto free_nvme; + } + memset(ndev->queues, 0, sizeof(2 * sizeof(struct nvme_queue))); + + ndev->prp_pool = malloc(MAX_PRP_POOL); + if (!ndev->prp_pool) { + ret = -ENOMEM; + printf("Error: %s: Out of memory!\n", udev->name); + goto free_nvme; + } + ndev->prp_entry_num = MAX_PRP_POOL >> 3; + + cap = nvme_readq(&ndev->bar->cap); + ndev->q_depth = min_t(int, NVME_CAP_MQES(cap) + 1, NVME_Q_DEPTH); + ndev->db_stride = 1 << NVME_CAP_STRIDE(cap); + ndev->dbs = ((void __iomem *)ndev->bar) + 4096; + + ret = nvme_configure_admin_queue(ndev); + if (ret) + goto free_queue; + + ret = nvme_setup_io_queues(ndev); + if (ret) + goto free_queue; + + nvme_get_info_from_identify(ndev); + ndev->blk_dev_start = nvme_info->ns_num; + list_add(&ndev->node, &nvme_info->dev_list); + + return 0; + +free_queue: + free((void *)ndev->queues); +free_nvme: + return ret; +} + +U_BOOT_DRIVER(nvme) = { + .name = "nvme", + .id = UCLASS_NVME, + .bind = nvme_bind, + .probe = nvme_probe, + .priv_auto_alloc_size = sizeof(struct nvme_dev), +}; + +struct pci_device_id nvme_supported[] = { + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x0953) }, + {} +}; + +U_BOOT_PCI_DEVICE(nvme, nvme_supported); diff --git a/drivers/nvme/nvme.h b/drivers/nvme/nvme.h new file mode 100644 index 0000000000..b7fdd0b8e2 --- /dev/null +++ b/drivers/nvme/nvme.h @@ -0,0 +1,717 @@ +/* + * Copyright (C) 2017 NXP Semiconductors + * Copyright (C) 2017 Bin Meng + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#ifndef __DRIVER_NVME_H__ +#define __DRIVER_NVME_H__ + +#include + +struct nvme_id_power_state { + __le16 max_power; /* centiwatts */ + __u8 rsvd2; + __u8 flags; + __le32 entry_lat; /* microseconds */ + __le32 exit_lat; /* microseconds */ + __u8 read_tput; + __u8 read_lat; + __u8 write_tput; + __u8 write_lat; + __le16 idle_power; + __u8 idle_scale; + __u8 rsvd19; + __le16 active_power; + __u8 active_work_scale; + __u8 rsvd23[9]; +}; + +enum { + NVME_PS_FLAGS_MAX_POWER_SCALE = 1 << 0, + NVME_PS_FLAGS_NON_OP_STATE = 1 << 1, +}; + +struct nvme_id_ctrl { + __le16 vid; + __le16 ssvid; + char sn[20]; + char mn[40]; + char fr[8]; + __u8 rab; + __u8 ieee[3]; + __u8 mic; + __u8 mdts; + __u16 cntlid; + __u32 ver; + __u8 rsvd84[172]; + __le16 oacs; + __u8 acl; + __u8 aerl; + __u8 frmw; + __u8 lpa; + __u8 elpe; + __u8 npss; + __u8 avscc; + __u8 apsta; + __le16 wctemp; + __le16 cctemp; + __u8 rsvd270[242]; + __u8 sqes; + __u8 cqes; + __u8 rsvd514[2]; + __le32 nn; + __le16 oncs; + __le16 fuses; + __u8 fna; + __u8 vwc; + __le16 awun; + __le16 awupf; + __u8 nvscc; + __u8 rsvd531; + __le16 acwu; + __u8 rsvd534[2]; + __le32 sgls; + __u8 rsvd540[1508]; + struct nvme_id_power_state psd[32]; + __u8 vs[1024]; +}; + +enum { + NVME_CTRL_ONCS_COMPARE = 1 << 0, + NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1, + NVME_CTRL_ONCS_DSM = 1 << 2, + NVME_CTRL_VWC_PRESENT = 1 << 0, +}; + +struct nvme_lbaf { + __le16 ms; + __u8 ds; + __u8 rp; +}; + +struct nvme_id_ns { + __le64 nsze; + __le64 ncap; + __le64 nuse; + __u8 nsfeat; + __u8 nlbaf; + __u8 flbas; + __u8 mc; + __u8 dpc; + __u8 dps; + __u8 nmic; + __u8 rescap; + __u8 fpi; + __u8 rsvd33; + __le16 nawun; + __le16 nawupf; + __le16 nacwu; + __le16 nabsn; + __le16 nabo; + __le16 nabspf; + __u16 rsvd46; + __le64 nvmcap[2]; + __u8 rsvd64[40]; + __u8 nguid[16]; + __u8 eui64[8]; + struct nvme_lbaf lbaf[16]; + __u8 rsvd192[192]; + __u8 vs[3712]; +}; + +enum { + NVME_NS_FEAT_THIN = 1 << 0, + NVME_NS_FLBAS_LBA_MASK = 0xf, + NVME_NS_FLBAS_META_EXT = 0x10, + NVME_LBAF_RP_BEST = 0, + NVME_LBAF_RP_BETTER = 1, + NVME_LBAF_RP_GOOD = 2, + NVME_LBAF_RP_DEGRADED = 3, + NVME_NS_DPC_PI_LAST = 1 << 4, + NVME_NS_DPC_PI_FIRST = 1 << 3, + NVME_NS_DPC_PI_TYPE3 = 1 << 2, + NVME_NS_DPC_PI_TYPE2 = 1 << 1, + NVME_NS_DPC_PI_TYPE1 = 1 << 0, + NVME_NS_DPS_PI_FIRST = 1 << 3, + NVME_NS_DPS_PI_MASK = 0x7, + NVME_NS_DPS_PI_TYPE1 = 1, + NVME_NS_DPS_PI_TYPE2 = 2, + NVME_NS_DPS_PI_TYPE3 = 3, +}; + +struct nvme_smart_log { + __u8 critical_warning; + __u8 temperature[2]; + __u8 avail_spare; + __u8 spare_thresh; + __u8 percent_used; + __u8 rsvd6[26]; + __u8 data_units_read[16]; + __u8 data_units_written[16]; + __u8 host_reads[16]; + __u8 host_writes[16]; + __u8 ctrl_busy_time[16]; + __u8 power_cycles[16]; + __u8 power_on_hours[16]; + __u8 unsafe_shutdowns[16]; + __u8 media_errors[16]; + __u8 num_err_log_entries[16]; + __le32 warning_temp_time; + __le32 critical_comp_time; + __le16 temp_sensor[8]; + __u8 rsvd216[296]; +}; + +enum { + NVME_SMART_CRIT_SPARE = 1 << 0, + NVME_SMART_CRIT_TEMPERATURE = 1 << 1, + NVME_SMART_CRIT_RELIABILITY = 1 << 2, + NVME_SMART_CRIT_MEDIA = 1 << 3, + NVME_SMART_CRIT_VOLATILE_MEMORY = 1 << 4, +}; + +struct nvme_lba_range_type { + __u8 type; + __u8 attributes; + __u8 rsvd2[14]; + __u64 slba; + __u64 nlb; + __u8 guid[16]; + __u8 rsvd48[16]; +}; + +enum { + NVME_LBART_TYPE_FS = 0x01, + NVME_LBART_TYPE_RAID = 0x02, + NVME_LBART_TYPE_CACHE = 0x03, + NVME_LBART_TYPE_SWAP = 0x04, + + NVME_LBART_ATTRIB_TEMP = 1 << 0, + NVME_LBART_ATTRIB_HIDE = 1 << 1, +}; + +struct nvme_reservation_status { + __le32 gen; + __u8 rtype; + __u8 regctl[2]; + __u8 resv5[2]; + __u8 ptpls; + __u8 resv10[13]; + struct { + __le16 cntlid; + __u8 rcsts; + __u8 resv3[5]; + __le64 hostid; + __le64 rkey; + } regctl_ds[]; +}; + +/* I/O commands */ + +enum nvme_opcode { + nvme_cmd_flush = 0x00, + nvme_cmd_write = 0x01, + nvme_cmd_read = 0x02, + nvme_cmd_write_uncor = 0x04, + nvme_cmd_compare = 0x05, + nvme_cmd_write_zeroes = 0x08, + nvme_cmd_dsm = 0x09, + nvme_cmd_resv_register = 0x0d, + nvme_cmd_resv_report = 0x0e, + nvme_cmd_resv_acquire = 0x11, + nvme_cmd_resv_release = 0x15, +}; + +struct nvme_common_command { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __le32 cdw2[2]; + __le64 metadata; + __le64 prp1; + __le64 prp2; + __le32 cdw10[6]; +}; + +struct nvme_rw_command { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2; + __le64 metadata; + __le64 prp1; + __le64 prp2; + __le64 slba; + __le16 length; + __le16 control; + __le32 dsmgmt; + __le32 reftag; + __le16 apptag; + __le16 appmask; +}; + +enum { + NVME_RW_LR = 1 << 15, + NVME_RW_FUA = 1 << 14, + NVME_RW_DSM_FREQ_UNSPEC = 0, + NVME_RW_DSM_FREQ_TYPICAL = 1, + NVME_RW_DSM_FREQ_RARE = 2, + NVME_RW_DSM_FREQ_READS = 3, + NVME_RW_DSM_FREQ_WRITES = 4, + NVME_RW_DSM_FREQ_RW = 5, + NVME_RW_DSM_FREQ_ONCE = 6, + NVME_RW_DSM_FREQ_PREFETCH = 7, + NVME_RW_DSM_FREQ_TEMP = 8, + NVME_RW_DSM_LATENCY_NONE = 0 << 4, + NVME_RW_DSM_LATENCY_IDLE = 1 << 4, + NVME_RW_DSM_LATENCY_NORM = 2 << 4, + NVME_RW_DSM_LATENCY_LOW = 3 << 4, + NVME_RW_DSM_SEQ_REQ = 1 << 6, + NVME_RW_DSM_COMPRESSED = 1 << 7, + NVME_RW_PRINFO_PRCHK_REF = 1 << 10, + NVME_RW_PRINFO_PRCHK_APP = 1 << 11, + NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12, + NVME_RW_PRINFO_PRACT = 1 << 13, +}; + +struct nvme_dsm_cmd { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2[2]; + __le64 prp1; + __le64 prp2; + __le32 nr; + __le32 attributes; + __u32 rsvd12[4]; +}; + +enum { + NVME_DSMGMT_IDR = 1 << 0, + NVME_DSMGMT_IDW = 1 << 1, + NVME_DSMGMT_AD = 1 << 2, +}; + +struct nvme_dsm_range { + __le32 cattr; + __le32 nlb; + __le64 slba; +}; + +/* Admin commands */ + +enum nvme_admin_opcode { + nvme_admin_delete_sq = 0x00, + nvme_admin_create_sq = 0x01, + nvme_admin_get_log_page = 0x02, + nvme_admin_delete_cq = 0x04, + nvme_admin_create_cq = 0x05, + nvme_admin_identify = 0x06, + nvme_admin_abort_cmd = 0x08, + nvme_admin_set_features = 0x09, + nvme_admin_get_features = 0x0a, + nvme_admin_async_event = 0x0c, + nvme_admin_activate_fw = 0x10, + nvme_admin_download_fw = 0x11, + nvme_admin_format_nvm = 0x80, + nvme_admin_security_send = 0x81, + nvme_admin_security_recv = 0x82, +}; + +enum { + NVME_QUEUE_PHYS_CONTIG = (1 << 0), + NVME_CQ_IRQ_ENABLED = (1 << 1), + NVME_SQ_PRIO_URGENT = (0 << 1), + NVME_SQ_PRIO_HIGH = (1 << 1), + NVME_SQ_PRIO_MEDIUM = (2 << 1), + NVME_SQ_PRIO_LOW = (3 << 1), + NVME_FEAT_ARBITRATION = 0x01, + NVME_FEAT_POWER_MGMT = 0x02, + NVME_FEAT_LBA_RANGE = 0x03, + NVME_FEAT_TEMP_THRESH = 0x04, + NVME_FEAT_ERR_RECOVERY = 0x05, + NVME_FEAT_VOLATILE_WC = 0x06, + NVME_FEAT_NUM_QUEUES = 0x07, + NVME_FEAT_IRQ_COALESCE = 0x08, + NVME_FEAT_IRQ_CONFIG = 0x09, + NVME_FEAT_WRITE_ATOMIC = 0x0a, + NVME_FEAT_ASYNC_EVENT = 0x0b, + NVME_FEAT_AUTO_PST = 0x0c, + NVME_FEAT_SW_PROGRESS = 0x80, + NVME_FEAT_HOST_ID = 0x81, + NVME_FEAT_RESV_MASK = 0x82, + NVME_FEAT_RESV_PERSIST = 0x83, + NVME_LOG_ERROR = 0x01, + NVME_LOG_SMART = 0x02, + NVME_LOG_FW_SLOT = 0x03, + NVME_LOG_RESERVATION = 0x80, + NVME_FWACT_REPL = (0 << 3), + NVME_FWACT_REPL_ACTV = (1 << 3), + NVME_FWACT_ACTV = (2 << 3), +}; + +struct nvme_identify { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2[2]; + __le64 prp1; + __le64 prp2; + __le32 cns; + __u32 rsvd11[5]; +}; + +struct nvme_features { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2[2]; + __le64 prp1; + __le64 prp2; + __le32 fid; + __le32 dword11; + __u32 rsvd12[4]; +}; + +struct nvme_create_cq { + __u8 opcode; + __u8 flags; + __u16 command_id; + __u32 rsvd1[5]; + __le64 prp1; + __u64 rsvd8; + __le16 cqid; + __le16 qsize; + __le16 cq_flags; + __le16 irq_vector; + __u32 rsvd12[4]; +}; + +struct nvme_create_sq { + __u8 opcode; + __u8 flags; + __u16 command_id; + __u32 rsvd1[5]; + __le64 prp1; + __u64 rsvd8; + __le16 sqid; + __le16 qsize; + __le16 sq_flags; + __le16 cqid; + __u32 rsvd12[4]; +}; + +struct nvme_delete_queue { + __u8 opcode; + __u8 flags; + __u16 command_id; + __u32 rsvd1[9]; + __le16 qid; + __u16 rsvd10; + __u32 rsvd11[5]; +}; + +struct nvme_abort_cmd { + __u8 opcode; + __u8 flags; + __u16 command_id; + __u32 rsvd1[9]; + __le16 sqid; + __u16 cid; + __u32 rsvd11[5]; +}; + +struct nvme_download_firmware { + __u8 opcode; + __u8 flags; + __u16 command_id; + __u32 rsvd1[5]; + __le64 prp1; + __le64 prp2; + __le32 numd; + __le32 offset; + __u32 rsvd12[4]; +}; + +struct nvme_format_cmd { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2[4]; + __le32 cdw10; + __u32 rsvd11[5]; +}; + +struct nvme_command { + union { + struct nvme_common_command common; + struct nvme_rw_command rw; + struct nvme_identify identify; + struct nvme_features features; + struct nvme_create_cq create_cq; + struct nvme_create_sq create_sq; + struct nvme_delete_queue delete_queue; + struct nvme_download_firmware dlfw; + struct nvme_format_cmd format; + struct nvme_dsm_cmd dsm; + struct nvme_abort_cmd abort; + }; +}; + +enum { + NVME_SC_SUCCESS = 0x0, + NVME_SC_INVALID_OPCODE = 0x1, + NVME_SC_INVALID_FIELD = 0x2, + NVME_SC_CMDID_CONFLICT = 0x3, + NVME_SC_DATA_XFER_ERROR = 0x4, + NVME_SC_POWER_LOSS = 0x5, + NVME_SC_INTERNAL = 0x6, + NVME_SC_ABORT_REQ = 0x7, + NVME_SC_ABORT_QUEUE = 0x8, + NVME_SC_FUSED_FAIL = 0x9, + NVME_SC_FUSED_MISSING = 0xa, + NVME_SC_INVALID_NS = 0xb, + NVME_SC_CMD_SEQ_ERROR = 0xc, + NVME_SC_SGL_INVALID_LAST = 0xd, + NVME_SC_SGL_INVALID_COUNT = 0xe, + NVME_SC_SGL_INVALID_DATA = 0xf, + NVME_SC_SGL_INVALID_METADATA = 0x10, + NVME_SC_SGL_INVALID_TYPE = 0x11, + NVME_SC_LBA_RANGE = 0x80, + NVME_SC_CAP_EXCEEDED = 0x81, + NVME_SC_NS_NOT_READY = 0x82, + NVME_SC_RESERVATION_CONFLICT = 0x83, + NVME_SC_CQ_INVALID = 0x100, + NVME_SC_QID_INVALID = 0x101, + NVME_SC_QUEUE_SIZE = 0x102, + NVME_SC_ABORT_LIMIT = 0x103, + NVME_SC_ABORT_MISSING = 0x104, + NVME_SC_ASYNC_LIMIT = 0x105, + NVME_SC_FIRMWARE_SLOT = 0x106, + NVME_SC_FIRMWARE_IMAGE = 0x107, + NVME_SC_INVALID_VECTOR = 0x108, + NVME_SC_INVALID_LOG_PAGE = 0x109, + NVME_SC_INVALID_FORMAT = 0x10a, + NVME_SC_FIRMWARE_NEEDS_RESET = 0x10b, + NVME_SC_INVALID_QUEUE = 0x10c, + NVME_SC_FEATURE_NOT_SAVEABLE = 0x10d, + NVME_SC_FEATURE_NOT_CHANGEABLE = 0x10e, + NVME_SC_FEATURE_NOT_PER_NS = 0x10f, + NVME_SC_FW_NEEDS_RESET_SUBSYS = 0x110, + NVME_SC_BAD_ATTRIBUTES = 0x180, + NVME_SC_INVALID_PI = 0x181, + NVME_SC_READ_ONLY = 0x182, + NVME_SC_WRITE_FAULT = 0x280, + NVME_SC_READ_ERROR = 0x281, + NVME_SC_GUARD_CHECK = 0x282, + NVME_SC_APPTAG_CHECK = 0x283, + NVME_SC_REFTAG_CHECK = 0x284, + NVME_SC_COMPARE_FAILED = 0x285, + NVME_SC_ACCESS_DENIED = 0x286, + NVME_SC_DNR = 0x4000, +}; + +struct nvme_completion { + __le32 result; /* Used by admin commands to return data */ + __u32 rsvd; + __le16 sq_head; /* how much of this queue may be reclaimed */ + __le16 sq_id; /* submission queue that generated this entry */ + __u16 command_id; /* of the command which completed */ + __le16 status; /* did the command fail, and if so, why? */ +}; + +struct nvme_user_io { + __u8 opcode; + __u8 flags; + __u16 control; + __u16 nblocks; + __u16 rsvd; + __u64 metadata; + __u64 addr; + __u64 slba; + __u32 dsmgmt; + __u32 reftag; + __u16 apptag; + __u16 appmask; +}; + +struct nvme_passthru_cmd { + __u8 opcode; + __u8 flags; + __u16 rsvd1; + __u32 nsid; + __u32 cdw2; + __u32 cdw3; + __u64 metadata; + __u64 addr; + __u32 metadata_len; + __u32 data_len; + __u32 cdw10; + __u32 cdw11; + __u32 cdw12; + __u32 cdw13; + __u32 cdw14; + __u32 cdw15; + __u32 timeout_ms; + __u32 result; +}; + +/* + * Registers should always be accessed with double word or quad word + * accesses. Registers with 64-bit address pointers should be written + * to with dword accesses by writing the low dword first (ptr[0]), + * then the high dword (ptr[1]) second. + */ +static inline u64 nvme_readq(__le64 volatile *regs) +{ +#if BITS_PER_LONG == 64 + return readq(regs); +#else + __u32 *ptr = (__u32 *)regs; + u64 val_lo = readl(ptr); + u64 val_hi = readl(ptr + 1); + + return val_lo + (val_hi << 32); +#endif +} + +static inline void nvme_writeq(const u64 val, __le64 volatile *regs) +{ +#if BITS_PER_LONG == 64 + writeq(val, regs); +#else + __u32 *ptr = (__u32 *)regs; + u32 val_lo = lower_32_bits(val); + u32 val_hi = upper_32_bits(val); + writel(val_lo, ptr); + writel(val_hi, ptr + 1); +#endif +} + +struct nvme_bar { + __u64 cap; /* Controller Capabilities */ + __u32 vs; /* Version */ + __u32 intms; /* Interrupt Mask Set */ + __u32 intmc; /* Interrupt Mask Clear */ + __u32 cc; /* Controller Configuration */ + __u32 rsvd1; /* Reserved */ + __u32 csts; /* Controller Status */ + __u32 rsvd2; /* Reserved */ + __u32 aqa; /* Admin Queue Attributes */ + __u64 asq; /* Admin SQ Base Address */ + __u64 acq; /* Admin CQ Base Address */ +}; + +#define NVME_CAP_MQES(cap) ((cap) & 0xffff) +#define NVME_CAP_TIMEOUT(cap) (((cap) >> 24) & 0xff) +#define NVME_CAP_STRIDE(cap) (((cap) >> 32) & 0xf) +#define NVME_CAP_MPSMIN(cap) (((cap) >> 48) & 0xf) +#define NVME_CAP_MPSMAX(cap) (((cap) >> 52) & 0xf) + +#define NVME_VS(major, minor) (((major) << 16) | ((minor) << 8)) + +enum { + NVME_CC_ENABLE = 1 << 0, + NVME_CC_CSS_NVM = 0 << 4, + NVME_CC_MPS_SHIFT = 7, + NVME_CC_ARB_RR = 0 << 11, + NVME_CC_ARB_WRRU = 1 << 11, + NVME_CC_ARB_VS = 7 << 11, + NVME_CC_SHN_NONE = 0 << 14, + NVME_CC_SHN_NORMAL = 1 << 14, + NVME_CC_SHN_ABRUPT = 2 << 14, + NVME_CC_SHN_MASK = 3 << 14, + NVME_CC_IOSQES = 6 << 16, + NVME_CC_IOCQES = 4 << 20, + NVME_CSTS_RDY = 1 << 0, + NVME_CSTS_CFS = 1 << 1, + NVME_CSTS_SHST_NORMAL = 0 << 2, + NVME_CSTS_SHST_OCCUR = 1 << 2, + NVME_CSTS_SHST_CMPLT = 2 << 2, + NVME_CSTS_SHST_MASK = 3 << 2, +}; + +/* Represents an NVM Express device. Each nvme_dev is a PCI function. */ +struct nvme_dev { + struct list_head node; + struct nvme_queue **queues; + u32 __iomem *dbs; + unsigned int cardnum; + struct udevice *pdev; + pci_dev_t pci_dev; + int instance; + uint8_t *hw_addr; + unsigned queue_count; + unsigned online_queues; + unsigned max_qid; + int q_depth; + u32 db_stride; + u32 ctrl_config; + struct nvme_bar __iomem *bar; + struct list_head namespaces; + const char *name; + char serial[20]; + char model[40]; + char firmware_rev[8]; + u32 max_transfer_shift; + u32 stripe_size; + u32 page_size; + u16 oncs; + u16 abort_limit; + u8 event_limit; + u8 vwc; + u64 *prp_pool; + u32 prp_entry_num; + u32 nn; + u32 blk_dev_start; +}; + +struct nvme_info { + int ns_num; /*the number of nvme namespaces*/ + int ndev_num; /*the number of nvme devices*/ + struct list_head dev_list; +}; + +/* + * The nvme_iod describes the data in an I/O, including the list of PRP + * entries. You can't see it in this data structure because C doesn't let + * me express that. Use nvme_alloc_iod to ensure there's enough space + * allocated to store the PRP list. + */ +struct nvme_iod { + unsigned long private; /* For the use of the submitter of the I/O */ + int npages; /* In the PRP list. 0 means small pool in use */ + int offset; /* Of PRP list */ + int nents; /* Used in scatterlist */ + int length; /* Of data, in bytes */ + dma_addr_t first_dma; +}; + +/* + * An NVM Express namespace is equivalent to a SCSI LUN. + * Each namespace is operated as an independent "device". + */ +struct nvme_ns { + struct list_head list; + struct nvme_dev *dev; + unsigned ns_id; + int devnum; + int lba_shift; + u16 ms; + u8 flbas; + u8 pi_type; + u64 mode_select_num_blocks; + u32 mode_select_block_len; +}; + +extern struct nvme_info *nvme_info; + +#endif /* __DRIVER_NVME_H__ */ diff --git a/include/nvme.h b/include/nvme.h new file mode 100644 index 0000000000..362440871b --- /dev/null +++ b/include/nvme.h @@ -0,0 +1,71 @@ +/* + * Copyright (C) 2017 NXP Semiconductors + * Copyright (C) 2017 Bin Meng + * + * SPDX-License-Identifier: GPL-2.0+ + */ + +#ifndef __NVME_H__ +#define __NVME_H__ + +struct nvme_dev; + +/** + * nvme_identify - identify controller or namespace capabilities and status + * + * This issues an identify command to the NVMe controller to return a data + * buffer that describes the controller or namespace capabilities and status. + * + * @dev: NVMe controller device + * @nsid: 0 for controller, namespace id for namespace to identify + * @cns: 1 for controller, 0 for namespace + * @dma_addr: dma buffer address to store the identify result + * @return: 0 on success, -ETIMEDOUT on command execution timeout, + * -EIO on command execution fails + */ +int nvme_identify(struct nvme_dev *dev, unsigned nsid, + unsigned cns, dma_addr_t dma_addr); + +/** + * nvme_get_features - retrieve the attributes of the feature specified + * + * This retrieves the attributes of the feature specified. + * + * @dev: NVMe controller device + * @fid: feature id to provide data + * @nsid: namespace id the command applies to + * @dma_addr: data structure used as part of the specified feature + * @result: command-specific result in the completion queue entry + * @return: 0 on success, -ETIMEDOUT on command execution timeout, + * -EIO on command execution fails + */ +int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, + dma_addr_t dma_addr, u32 *result); + +/** + * nvme_set_features - specify the attributes of the feature indicated + * + * This specifies the attributes of the feature indicated. + * + * @dev: NVMe controller device + * @fid: feature id to provide data + * @dword11: command-specific input parameter + * @dma_addr: data structure used as part of the specified feature + * @result: command-specific result in the completion queue entry + * @return: 0 on success, -ETIMEDOUT on command execution timeout, + * -EIO on command execution fails + */ +int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, + dma_addr_t dma_addr, u32 *result); + +/** + * nvme_scan_namespace - scan all namespaces attached to NVMe controllers + * + * This probes all registered NVMe uclass device drivers in the system, + * and tries to find all namespaces attached to the NVMe controllers. + * + * @return: 0 on success, -ve on error + */ +int nvme_scan_namespace(void); + +#endif /* __NVME_H__ */