123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302 |
- /*
- * Copyright (c) 2006-2023, RT-Thread Development Team
- *
- * SPDX-License-Identifier: Apache-2.0
- *
- * Change Logs:
- * Date Author Notes
- * 2023-02-25 GuEe-GUI the first version
- */
- #include <rthw.h>
- #include <rtthread.h>
- #include <rtdevice.h>
- #define DBG_TAG "rtdm.nvme"
- #define DBG_LVL DBG_INFO
- #include <rtdbg.h>
- static struct rt_dm_ida nvme_controller_ida = RT_DM_IDA_INIT(CUSTOM);
- static struct rt_dm_ida nvme_ida = RT_DM_IDA_INIT(NVME);
- static RT_DEFINE_SPINLOCK(nvme_lock);
- static rt_list_t nvme_nodes = RT_LIST_OBJECT_INIT(nvme_nodes);
- rt_inline rt_uint32_t nvme_readl(struct rt_nvme_controller *nvme, int offset)
- {
- return HWREG32(nvme->regs + offset);
- }
- rt_inline void nvme_writel(struct rt_nvme_controller *nvme, int offset, rt_uint32_t value)
- {
- HWREG32(nvme->regs + offset) = value;
- }
- rt_inline rt_uint64_t nvme_readq(struct rt_nvme_controller *nvme, int offset)
- {
- rt_uint32_t lo32, hi32;
- lo32 = HWREG32(nvme->regs + offset);
- hi32 = HWREG32(nvme->regs + offset + 4);
- return ((rt_uint64_t)hi32 << 32) + lo32;
- }
- rt_inline void nvme_writeq(struct rt_nvme_controller *nvme, int offset, rt_uint64_t value)
- {
- nvme_writel(nvme, offset, (rt_uint32_t)(value & 0xffffffff));
- nvme_writel(nvme, offset + 4, (rt_uint32_t)(value >> 32));
- }
- static rt_err_t nvme_poll_csts(struct rt_nvme_controller *nvme,
- rt_uint32_t mask, rt_uint32_t value)
- {
- rt_tick_t timeout;
- timeout = rt_tick_from_millisecond(RT_NVME_CAP_TIMEOUT(nvme->cap) * 500);
- timeout += rt_tick_get();
- do {
- if ((nvme_readl(nvme, RT_NVME_REG_CSTS) & mask) == value)
- {
- return RT_EOK;
- }
- rt_hw_cpu_relax();
- } while (rt_tick_get() < timeout);
- return -RT_ETIMEOUT;
- }
- static rt_err_t nvme_enable_ctrl(struct rt_nvme_controller *nvme)
- {
- nvme->ctrl_config &= ~RT_NVME_CC_SHN_MASK;
- nvme->ctrl_config |= RT_NVME_CC_ENABLE;
- nvme_writel(nvme, RT_NVME_REG_CC, nvme->ctrl_config);
- return nvme_poll_csts(nvme, RT_NVME_CSTS_RDY, RT_NVME_CSTS_RDY);
- }
- static rt_err_t nvme_disable_ctrl(struct rt_nvme_controller *nvme)
- {
- nvme->ctrl_config &= ~RT_NVME_CC_SHN_MASK;
- nvme->ctrl_config &= ~RT_NVME_CC_ENABLE;
- nvme_writel(nvme, RT_NVME_REG_CC, nvme->ctrl_config);
- return nvme_poll_csts(nvme, RT_NVME_CSTS_RDY, 0);
- }
- static rt_err_t nvme_shutdown_ctrl(struct rt_nvme_controller *nvme)
- {
- nvme->ctrl_config &= ~RT_NVME_CC_SHN_MASK;
- nvme->ctrl_config |= RT_NVME_CC_SHN_NORMAL;
- nvme_writel(nvme, RT_NVME_REG_CC, nvme->ctrl_config);
- return nvme_poll_csts(nvme, RT_NVME_CSTS_SHST_MASK, RT_NVME_CSTS_SHST_CMPLT);
- }
- rt_inline rt_le16_t nvme_next_cmdid(struct rt_nvme_controller *nvme)
- {
- return rt_cpu_to_le16((rt_uint16_t)rt_atomic_add(&nvme->cmdid, 1));
- }
- static rt_err_t nvme_submit_cmd(struct rt_nvme_queue *queue,
- struct rt_nvme_command *cmd)
- {
- rt_ubase_t level;
- rt_err_t err = RT_EOK;
- rt_uint16_t tail, head;
- struct rt_nvme_controller *nvme = queue->nvme;
- _retry:
- level = rt_spin_lock_irqsave(&queue->lock);
- tail = queue->sq_tail;
- head = queue->cq_head;
- if (tail + 1 == head)
- {
- /* IO queue is full, waiting for the last IO command to complete. */
- rt_spin_unlock_irqrestore(&queue->lock, level);
- rt_thread_yield();
- goto _retry;
- }
- cmd->common.cmdid = nvme_next_cmdid(nvme);
- rt_memcpy(&queue->sq_cmds[tail], cmd, sizeof(*cmd));
- if (nvme->ops->submit_cmd)
- {
- if ((err = nvme->ops->submit_cmd(queue, cmd)))
- {
- return err;
- }
- }
- if (++tail == queue->depth)
- {
- tail = 0;
- }
- HWREG32(queue->doorbell) = tail;
- queue->sq_tail = tail;
- queue->cmd = cmd;
- queue->err = RT_EOK;
- rt_spin_unlock_irqrestore(&queue->lock, level);
- err = rt_completion_wait(&queue->done,
- rt_tick_from_millisecond(queue->qid != 0 ? RT_WAITING_FOREVER : 60));
- return err ? : queue->err;
- }
- static rt_err_t nvme_set_features_simple(struct rt_nvme_controller *nvme,
- rt_uint32_t fid, rt_uint32_t dword11)
- {
- struct rt_nvme_command cmd;
- rt_memset(&cmd, 0, sizeof(cmd));
- cmd.features.opcode = RT_NVME_ADMIN_OPCODE_SET_FEATURES;
- cmd.features.fid = rt_cpu_to_le32(fid);
- cmd.features.dword11 = rt_cpu_to_le32(dword11);
- return nvme_submit_cmd(&nvme->admin_queue, &cmd);
- }
- static rt_err_t nvme_submit_io_cmd(struct rt_nvme_controller *nvme,
- struct rt_nvme_command *cmd)
- {
- rt_uint16_t qid;
- qid = rt_atomic_add(&nvme->ioqid[rt_hw_cpu_id()], RT_CPUS_NR);
- qid %= nvme->io_queue_max;
- return nvme_submit_cmd(&nvme->io_queues[qid], cmd);
- }
- /*
- * PRP Mode:
- *
- * |63 n+1|n 0|
- * +----------------------------------------+----------+---+---+
- * | Page Base Address | Offset | 0 | 0 |
- * +----------------------------------------+----------+---+---+
- * |
- * v
- * Host Physical Pages
- * +----------------------------+
- * +--------------+----------+ | Page k |
- * | PRP Entry1 | Offset +---------->+----------------------------+
- * +--------------+----------+ | Page k + 1 |
- * +----------------------------+
- * ...
- * +----------------------------+
- * +--------------+----------+ | Page k + m |
- * | PRP Entry2 | 0 +---------->+----------------------------+
- * +--------------+----------+ | Page k + m + 1 |
- * +----------------------------+
- * PRP List (In PRP Entry2):
- *
- * |63 n+1|n 0|
- * +----------------------------------------+------------------+
- * | Page Base Address k | 0h |
- * +----------------------------------------+------------------+
- * | Page Base Address k + 1 | 0h |
- * +----------------------------------------+------------------+
- * | ... |
- * +----------------------------------------+------------------+
- * | Page Base Address k + m | 0h |
- * +----------------------------------------+------------------+
- * | Page Base Address k + m + 1 | 0h |
- * +----------------------------------------+------------------+
- *
- * SGL Mode:
- * +----- Non-transport
- * LBA /
- * +---------------+---------------+-------/-------+---------------+
- * | 3KB | 4KB | 2KB | 4KB |
- * +-------+-------+-------+-------+---------------+--------+------+
- * | +-------------------------+ |
- * | | |
- * | +--------------------|------+
- * | | |
- * +-------v-------+ +-------v-------+ +-------v-------+
- * | A MEM BLOCK | | B MEM BLOCK | | C MEM BLOCK |
- * +-------^-------+ +-------^-------+ +-------^-------+
- * | | |
- * +----------------+ | |
- * | | |
- * Segment(0) | | |
- * +----------+----------+ | | |
- * | Address: A +--+ | |
- * +----------+----------+ | |
- * | Type: 0h | Len: 3KB | | |
- * +----------+----------+ | |
- * | Address: Segment(1) +--+ | |
- * +----------+----------+ | | |
- * | Type: 2h | Len: 48 | | | |
- * +----------+----------+ | | |
- * | | |
- * +------------------------+ | |
- * | | |
- * v | |
- * Segment(1) | |
- * +----------+----------+ | |
- * | Address: B +------+ |
- * +----------+----------+ |
- * | Type: 0h | Len: 4KB | |
- * +----------+----------+ |
- * | Address: <NULL> | |
- * +----------+----------+ |
- * | Type: 1h | Len: 2KB | |
- * +----------+----------+ |
- * | Address: Segment(2) +--+ |
- * +----------+----------+ | |
- * | Type: 0h | Len: 16 | | |
- * +----------+----------+ | |
- * | |
- * +------------------------+ |
- * | |
- * v |
- * Segment(2) |
- * +----------+----------+ |
- * | Address: C +---------------------------+
- * +----------+----------+
- * | Type: 0h | Len: 4KB |
- * +----------+----------+
- */
- static rt_ssize_t nvme_blk_rw(struct rt_nvme_device *ndev, rt_off_t slba,
- rt_ubase_t buffer_dma, rt_size_t lbas, rt_uint8_t opcode)
- {
- rt_err_t err;
- rt_uint16_t max_lbas;
- rt_uint32_t lba_shift;
- rt_size_t tlbas;
- rt_ssize_t data_length;
- struct rt_nvme_command cmd;
- struct rt_nvme_controller *nvme = ndev->ctrl;
- rt_memset(&cmd, 0, sizeof(cmd));
- cmd.rw.opcode = opcode;
- cmd.rw.flags = nvme->sgl_mode << RT_NVME_CMD_FLAGS_PSDT_SHIFT;
- cmd.rw.nsid = rt_cpu_to_le32(ndev->nsid);
- tlbas = lbas;
- lba_shift = ndev->lba_shift;
- max_lbas = 1 << (nvme->max_transfer_shift - lba_shift);
- if (nvme->sgl_mode)
- {
- while ((rt_ssize_t)lbas > 0)
- {
- if (lbas < max_lbas)
- {
- max_lbas = (rt_uint16_t)lbas;
- }
- data_length = max_lbas << lba_shift;
- cmd.rw.sgl.adddress = rt_cpu_to_le64(buffer_dma);
- cmd.rw.sgl.length = rt_cpu_to_le32(data_length);
- cmd.rw.sgl.sgl_identify = SGL_DESC_TYPE_DATA_BLOCK;
- cmd.rw.slba = rt_cpu_to_le16(slba);
- cmd.rw.length = rt_cpu_to_le16(max_lbas - 1);
- if ((err = nvme_submit_io_cmd(nvme, &cmd)))
- {
- tlbas -= lbas;
- break;
- }
- lbas -= max_lbas;
- slba += max_lbas;
- buffer_dma += data_length;
- }
- }
- else
- {
- void *prp_list = RT_NULL;
- rt_size_t prp_list_size = 0, page_size;
- page_size = nvme->page_size;
- while ((rt_ssize_t)lbas > 0)
- {
- rt_uint64_t prp2_addr, dma_addr;
- rt_ssize_t remain_length, page_offset;
- if (lbas < max_lbas)
- {
- max_lbas = (rt_uint16_t)lbas;
- }
- /*
- * PRP transfer:
- * 1. data_length <= 4KB:
- * prp1 = buffer_dma
- * prp2 = 0
- *
- * 2. 4KB < data_length <= 8KB:
- * prp1 = buffer_dma
- * prp2 = buffer_dma
- *
- * 3. 8KB < data_length:
- * prp1 = buffer_dma(0, 4k)
- * prp2 = buffer_dma(4k, ~)
- */
- dma_addr = buffer_dma;
- page_offset = buffer_dma & (page_size - 1);
- data_length = max_lbas << lba_shift;
- remain_length = data_length - (page_size - page_offset);
- do {
- rt_size_t prps_per_page, prps, pages;
- rt_uint64_t *prp_list_ptr, prp_list_dma;
- if (remain_length <= 0)
- {
- prp2_addr = 0;
- break;
- }
- if (remain_length)
- {
- dma_addr += (page_size - page_offset);
- }
- if (remain_length <= page_size)
- {
- prp2_addr = dma_addr;
- break;
- }
- prps_per_page = page_size / sizeof(rt_uint64_t);
- prps = RT_DIV_ROUND_UP(remain_length, page_size);
- pages = RT_DIV_ROUND_UP(prps - 1, prps_per_page - 1);
- if (prps > prp_list_size)
- {
- if (prp_list)
- {
- rt_free_align(prp_list);
- }
- prp_list = rt_malloc_align(pages * page_size, page_size);
- if (!prp_list)
- {
- LOG_D("No memory to create a PRP List");
- /* Ask user to try again */
- return tlbas - lbas;
- }
- prp_list_size = pages * (prps_per_page - 1) + 1;
- }
- prp_list_ptr = prp_list;
- prp_list_dma = (rt_uint64_t)rt_kmem_v2p(prp_list_ptr);
- prp2_addr = prp_list_dma;
- for (int i = 0; prps; --prps, ++i)
- {
- /* End of the entry, fill the next entry addr if remain */
- if ((i == (prps_per_page - 1)) && prps > 1)
- {
- prp_list_dma += page_size;
- *prp_list_ptr = rt_cpu_to_le64(prp_list_dma);
- /* Start to fill the next PRP */
- i = 0;
- }
- *prp_list_ptr = rt_cpu_to_le64(dma_addr);
- dma_addr += page_size;
- }
- rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, prp_list_ptr, prp_list_size);
- } while (0);
- cmd.rw.prp1 = rt_cpu_to_le64(buffer_dma);
- cmd.rw.prp2 = rt_cpu_to_le64(prp2_addr);
- cmd.rw.slba = rt_cpu_to_le16(slba);
- cmd.rw.length = rt_cpu_to_le16(max_lbas - 1);
- if ((err = nvme_submit_io_cmd(nvme, &cmd)))
- {
- tlbas -= lbas;
- break;
- }
- lbas -= max_lbas;
- slba += max_lbas;
- buffer_dma += data_length;
- }
- if (prp_list)
- {
- rt_free_align(prp_list);
- }
- }
- return tlbas;
- }
- static rt_ssize_t nvme_blk_read(struct rt_blk_disk *disk, rt_off_t sector,
- void *buffer, rt_size_t sector_count)
- {
- rt_ssize_t res;
- rt_uint32_t page_bits;
- rt_size_t buffer_size;
- rt_ubase_t buffer_dma;
- void *temp_buffer = RT_NULL;
- struct rt_nvme_device *ndev = rt_disk_to_nvme_device(disk);
- struct rt_nvme_controller *nvme = ndev->ctrl;
- buffer_size = (1 << ndev->lba_shift) * sector_count;
- buffer_dma = (rt_ubase_t)rt_kmem_v2p(buffer);
- if ((nvme->sgl_mode && (buffer_dma & RT_GENMASK(1, 0))) ||
- (!nvme->sgl_mode && (buffer_dma & ARCH_PAGE_MASK)))
- {
- LOG_D("DMA PRP direct %s buffer MUST 4-bytes or page aligned", "read");
- page_bits = rt_page_bits(buffer_size);
- temp_buffer = rt_pages_alloc(page_bits);
- if (!temp_buffer)
- {
- return -RT_ENOMEM;
- }
- buffer_dma = (rt_ubase_t)rt_kmem_v2p(temp_buffer);
- }
- res = nvme_blk_rw(ndev, sector, buffer_dma, sector_count, RT_NVME_CMD_READ);
- if (res > 0)
- {
- if (res != sector_count)
- {
- /*
- * Don't always aim for optimization, checking for equality
- * is much faster than multiplication calculation.
- */
- buffer_size = res * (1 << ndev->lba_shift);
- }
- if (temp_buffer)
- {
- rt_hw_cpu_dcache_ops(RT_HW_CACHE_INVALIDATE, temp_buffer, buffer_size);
- rt_memcpy(buffer, temp_buffer, buffer_size);
- }
- else
- {
- rt_hw_cpu_dcache_ops(RT_HW_CACHE_INVALIDATE, buffer, buffer_size);
- }
- }
- if (temp_buffer)
- {
- rt_pages_free(temp_buffer, page_bits);
- }
- return res;
- }
- static rt_ssize_t nvme_blk_write(struct rt_blk_disk *disk, rt_off_t sector,
- const void *buffer, rt_size_t sector_count)
- {
- rt_ssize_t res;
- rt_uint32_t page_bits;
- rt_size_t buffer_size;
- rt_ubase_t buffer_dma;
- void *temp_buffer = RT_NULL;
- struct rt_nvme_device *ndev = rt_disk_to_nvme_device(disk);
- struct rt_nvme_controller *nvme = ndev->ctrl;
- buffer_size = (1 << ndev->lba_shift) * sector_count;
- buffer_dma = (rt_ubase_t)rt_kmem_v2p((void *)buffer);
- if ((nvme->sgl_mode && (buffer_dma & RT_GENMASK(1, 0))) ||
- (!nvme->sgl_mode && (buffer_dma & ARCH_PAGE_MASK)))
- {
- LOG_D("DMA PRP direct %s buffer MUST 4-bytes or page aligned", "write");
- page_bits = rt_page_bits(buffer_size);
- temp_buffer = rt_pages_alloc(page_bits);
- if (!temp_buffer)
- {
- return -RT_ENOMEM;
- }
- buffer_dma = (rt_ubase_t)rt_kmem_v2p(temp_buffer);
- rt_memcpy(temp_buffer, buffer, buffer_size);
- buffer = temp_buffer;
- }
- rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, (void *)buffer, buffer_size);
- res = nvme_blk_rw(ndev, sector, buffer_dma, sector_count, RT_NVME_CMD_WRITE);
- if (temp_buffer)
- {
- rt_pages_free(temp_buffer, page_bits);
- }
- return res;
- }
- static rt_err_t nvme_blk_getgeome(struct rt_blk_disk *disk,
- struct rt_device_blk_geometry *geometry)
- {
- struct rt_nvme_device *ndev = rt_disk_to_nvme_device(disk);
- geometry->bytes_per_sector = 1 << ndev->lba_shift;
- geometry->block_size = 1 << ndev->lba_shift;
- geometry->sector_count = rt_le64_to_cpu(ndev->id.nsze);
- return RT_EOK;
- }
- static rt_err_t nvme_blk_sync(struct rt_blk_disk *disk)
- {
- struct rt_nvme_command cmd;
- struct rt_nvme_device *ndev = rt_disk_to_nvme_device(disk);
- rt_memset(&cmd, 0, sizeof(cmd));
- cmd.common.opcode = RT_NVME_CMD_FLUSH;
- cmd.common.nsid = rt_cpu_to_le32(ndev->nsid);
- return nvme_submit_io_cmd(ndev->ctrl, &cmd);
- }
- static rt_err_t nvme_blk_erase(struct rt_blk_disk *disk)
- {
- rt_err_t err = RT_EOK;
- rt_ssize_t slba, lbas, max_lbas;
- struct rt_nvme_command cmd;
- struct rt_nvme_device *ndev = rt_disk_to_nvme_device(disk);
- struct rt_nvme_controller *nvme = ndev->ctrl;
- if (!nvme->write_zeroes)
- {
- return -RT_ENOSYS;
- }
- rt_memset(&cmd, 0, sizeof(cmd));
- cmd.write_zeroes.opcode = RT_NVME_CMD_WRITE_ZEROES;
- cmd.write_zeroes.nsid = rt_cpu_to_le32(ndev->nsid);
- slba = 0;
- lbas = rt_le64_to_cpu(ndev->id.nsze);
- max_lbas = 1 << (nvme->max_transfer_shift - ndev->lba_shift);
- while ((rt_ssize_t)lbas > 0)
- {
- if (lbas < max_lbas)
- {
- max_lbas = (rt_uint16_t)lbas;
- }
- cmd.write_zeroes.slba = rt_cpu_to_le16(slba);
- cmd.write_zeroes.length = rt_cpu_to_le16(max_lbas - 1);
- if ((err = nvme_submit_io_cmd(nvme, &cmd)))
- {
- break;
- }
- lbas -= max_lbas;
- slba += max_lbas;
- }
- return err;
- }
- static rt_err_t nvme_blk_autorefresh(struct rt_blk_disk *disk, rt_bool_t is_auto)
- {
- struct rt_nvme_device *ndev = rt_disk_to_nvme_device(disk);
- struct rt_nvme_controller *nvme = ndev->ctrl;
- if (nvme->volatile_write_cache & RT_NVME_CTRL_VWC_PRESENT)
- {
- return nvme_set_features_simple(nvme, RT_NVME_FEAT_VOLATILE_WC, !!is_auto);
- }
- else if (!is_auto)
- {
- return RT_EOK;
- }
- return -RT_ENOSYS;
- }
- static const struct rt_blk_disk_ops nvme_blk_ops =
- {
- .read = nvme_blk_read,
- .write = nvme_blk_write,
- .getgeome = nvme_blk_getgeome,
- .sync = nvme_blk_sync,
- .erase = nvme_blk_erase,
- .autorefresh = nvme_blk_autorefresh,
- };
- static void nvme_queue_isr(int irqno, void *param)
- {
- rt_ubase_t level;
- rt_uint16_t head, phase, status;
- struct rt_nvme_queue *queue = param;
- struct rt_nvme_controller *nvme = queue->nvme;
- level = rt_spin_lock_irqsave(&queue->lock);
- head = queue->cq_head;
- phase = queue->cq_phase;
- status = HWREG16(&queue->cq_entry[head].status);
- status = rt_le16_to_cpu(status);
- if ((status & 0x01) == phase)
- {
- if ((status >> 1))
- {
- queue->err = -RT_EIO;
- goto _end_cmd;
- }
- if (nvme->ops->complete_cmd)
- {
- nvme->ops->complete_cmd(queue, queue->cmd);
- }
- _end_cmd:
- if (++head == queue->depth)
- {
- head = 0;
- phase = !phase;
- }
- HWREG32(queue->doorbell + nvme->doorbell_stride) = head;
- queue->cq_head = head;
- queue->cq_phase = phase;
- rt_completion_done(&queue->done);
- }
- rt_spin_unlock_irqrestore(&queue->lock, level);
- }
- static rt_err_t nvme_identify(struct rt_nvme_controller *nvme,
- rt_uint32_t nsid, rt_uint32_t cns, void *data)
- {
- rt_err_t err;
- rt_uint32_t page_size = nvme->page_size;
- rt_ubase_t data_phy = (rt_ubase_t)rt_kmem_v2p(data);
- int offset = data_phy & (page_size - 1);
- struct rt_nvme_command cmd;
- rt_memset(&cmd, 0, sizeof(cmd));
- cmd.identify.opcode = RT_NVME_ADMIN_OPCODE_IDENTIFY;
- cmd.identify.nsid = rt_cpu_to_le32(nsid);
- cmd.identify.prp1 = rt_cpu_to_le64(data_phy);
- if (sizeof(struct rt_nvme_id_ctrl) <= page_size - offset)
- {
- cmd.identify.prp2 = 0;
- }
- else
- {
- data_phy += (page_size - offset);
- cmd.identify.prp2 = rt_cpu_to_le64(data_phy);
- }
- cmd.identify.cns = rt_cpu_to_le32(cns);
- rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, data, sizeof(struct rt_nvme_id_ctrl));
- if (!(err = nvme_submit_cmd(&nvme->admin_queue, &cmd)))
- {
- rt_hw_cpu_dcache_ops(RT_HW_CACHE_INVALIDATE, data, sizeof(struct rt_nvme_id_ctrl));
- }
- return err;
- }
- static rt_err_t nvme_attach_queue(struct rt_nvme_queue *queue, rt_uint8_t opcode)
- {
- struct rt_nvme_command cmd;
- struct rt_nvme_controller *nvme = queue->nvme;
- rt_uint16_t flags = RT_NVME_QUEUE_PHYS_CONTIG;
- rt_memset(&cmd, 0, sizeof(cmd));
- if (opcode == RT_NVME_ADMIN_OPCODE_CREATE_CQ)
- {
- cmd.create_cq.opcode = opcode;
- cmd.create_cq.prp1 = rt_cpu_to_le64(queue->cq_entry_phy);
- cmd.create_cq.cqid = rt_cpu_to_le16(queue->qid);
- cmd.create_cq.qsize = rt_cpu_to_le16(queue->depth - 1);
- cmd.create_cq.cq_flags = rt_cpu_to_le16(flags | RT_NVME_CQ_IRQ_ENABLED);
- cmd.create_cq.irq_vector = rt_cpu_to_le16(nvme->irqs_nr > 1 ? queue->qid : 0);
- }
- else if (opcode == RT_NVME_ADMIN_OPCODE_CREATE_SQ)
- {
- cmd.create_sq.opcode = opcode;
- cmd.create_sq.prp1 = rt_cpu_to_le64(queue->sq_cmds_phy);
- cmd.create_sq.sqid = rt_cpu_to_le16(queue->qid);
- cmd.create_sq.qsize = rt_cpu_to_le16(queue->depth - 1);
- cmd.create_sq.sq_flags = rt_cpu_to_le16(flags | RT_NVME_SQ_PRIO_MEDIUM);
- cmd.create_sq.cqid = rt_cpu_to_le16(queue->qid);
- }
- else
- {
- LOG_E("What the fuck opcode = %x", opcode);
- RT_ASSERT(0);
- }
- return nvme_submit_cmd(&nvme->admin_queue, &cmd);
- }
- rt_inline rt_err_t nvme_attach_queue_sq(struct rt_nvme_queue *queue)
- {
- return nvme_attach_queue(queue, RT_NVME_ADMIN_OPCODE_CREATE_SQ);
- }
- rt_inline rt_err_t nvme_attach_queue_cq(struct rt_nvme_queue *queue)
- {
- return nvme_attach_queue(queue, RT_NVME_ADMIN_OPCODE_CREATE_CQ);
- }
- static rt_err_t nvme_detach_queue(struct rt_nvme_queue *queue,
- rt_uint8_t opcode)
- {
- struct rt_nvme_command cmd;
- struct rt_nvme_controller *nvme = queue->nvme;
- rt_memset(&cmd, 0, sizeof(cmd));
- cmd.delete_queue.opcode = opcode;
- cmd.delete_queue.qid = rt_cpu_to_le16(queue->qid);
- return nvme_submit_cmd(&nvme->admin_queue, &cmd);
- }
- rt_inline rt_ubase_t nvme_queue_dma_flags(void)
- {
- return RT_DMA_F_NOCACHE | RT_DMA_F_LINEAR;
- }
- static void nvme_free_queue(struct rt_nvme_queue *queue)
- {
- rt_ubase_t dma_flags;
- struct rt_nvme_controller *nvme = queue->nvme;
- if (nvme->ops->cleanup_queue)
- {
- rt_err_t err;
- if (!(err = nvme->ops->cleanup_queue(queue)))
- {
- LOG_W("Cleanup[%s] queue error = %s", nvme->ops->name, rt_strerror(err));
- }
- }
- dma_flags = nvme_queue_dma_flags();
- if (queue->sq_cmds)
- {
- rt_dma_free(nvme->dev, sizeof(*queue->sq_cmds) * queue->depth,
- queue->sq_cmds, queue->sq_cmds_phy, dma_flags);
- }
- if (queue->cq_entry)
- {
- rt_dma_free(nvme->dev, sizeof(*queue->cq_entry) * queue->depth,
- queue->cq_entry, queue->cq_entry_phy, dma_flags);
- }
- }
- static struct rt_nvme_queue *nvme_alloc_queue(struct rt_nvme_controller *nvme,
- int qid, int depth)
- {
- rt_err_t err;
- rt_ubase_t dma_flags;
- struct rt_nvme_queue *queue = &nvme->queue[qid];
- rt_memset(queue, 0, sizeof(*queue));
- queue->nvme = nvme;
- queue->doorbell = &nvme->doorbell_tbl[qid * 2 * nvme->doorbell_stride];
- queue->qid = qid;
- queue->depth = depth;
- queue->cq_head = 0;
- queue->cq_phase = 1;
- rt_completion_init(&queue->done);
- rt_spin_lock_init(&queue->lock);
- dma_flags = nvme_queue_dma_flags();
- /* struct rt_nvme_command */
- queue->sq_cmds = rt_dma_alloc(nvme->dev,
- sizeof(*queue->sq_cmds) * depth, &queue->sq_cmds_phy, dma_flags);
- if (!queue->sq_cmds)
- {
- err = -RT_ENOMEM;
- goto _fail;
- }
- /* struct rt_nvme_completion */
- queue->cq_entry = rt_dma_alloc(nvme->dev,
- sizeof(*queue->cq_entry) * depth, &queue->cq_entry_phy, dma_flags);
- if (!queue->cq_entry)
- {
- err = -RT_ENOMEM;
- goto _fail;
- }
- rt_memset(queue->sq_cmds, 0, sizeof(struct rt_nvme_command) * depth);
- rt_memset(queue->cq_entry, 0, sizeof(struct rt_nvme_completion) * depth);
- if (nvme->ops->setup_queue)
- {
- if (!(err = nvme->ops->setup_queue(queue)))
- {
- LOG_E("Setup[%s] queue error = %s", nvme->ops->name, rt_strerror(err));
- goto _fail;
- }
- }
- return queue;
- _fail:
- nvme_free_queue(queue);
- return rt_err_ptr(err);
- }
- static rt_err_t nvme_configure_admin_queue(struct rt_nvme_controller *nvme)
- {
- rt_err_t err;
- int irq;
- char name[RT_NAME_MAX];
- rt_uint32_t aqa;
- rt_uint32_t page_shift = ARCH_PAGE_SHIFT;
- rt_uint32_t page_min = RT_NVME_CAP_MPSMIN(nvme->cap) + 12;
- rt_uint32_t page_max = RT_NVME_CAP_MPSMAX(nvme->cap) + 12;
- struct rt_nvme_queue *admin_queue;
- if (page_shift < page_min)
- {
- LOG_E("Device %s page size (%u) %s than host (%u)",
- "minimum", 1 << page_min, "larger", 1 << page_shift);
- return -RT_EINVAL;
- }
- if (page_shift > page_max)
- {
- LOG_W("Device %s page size (%u) %s than host (%u)",
- "maximum", 1 << page_max, "smaller", 1 << page_shift);
- page_shift = page_max;
- }
- if ((err = nvme_disable_ctrl(nvme)))
- {
- return err;
- }
- admin_queue = nvme_alloc_queue(nvme, 0, RT_NVME_AQ_DEPTH);
- if (rt_is_err(admin_queue))
- {
- return rt_ptr_err(admin_queue);
- }
- aqa = admin_queue->depth - 1;
- aqa |= aqa << 16;
- nvme->page_shift = page_shift;
- nvme->page_size = 1U << page_shift;
- nvme->ctrl_config = RT_NVME_CC_CSS_NVM;
- nvme->ctrl_config |= (page_shift - 12) << RT_NVME_CC_MPS_SHIFT;
- nvme->ctrl_config |= RT_NVME_CC_ARB_RR | RT_NVME_CC_SHN_NONE;
- nvme->ctrl_config |= RT_NVME_CC_IOSQES | RT_NVME_CC_IOCQES;
- nvme_writel(nvme, RT_NVME_REG_AQA, aqa);
- nvme_writeq(nvme, RT_NVME_REG_ASQ, admin_queue->sq_cmds_phy);
- nvme_writeq(nvme, RT_NVME_REG_ACQ, admin_queue->cq_entry_phy);
- if ((err = nvme_enable_ctrl(nvme)))
- {
- nvme_free_queue(admin_queue);
- return err;
- }
- irq = nvme->irqs[0];
- rt_snprintf(name, RT_NAME_MAX, "%s-admin-queue", nvme->name);
- rt_hw_interrupt_install(irq, nvme_queue_isr, &nvme->admin_queue, name);
- rt_hw_interrupt_umask(irq);
- return RT_EOK;
- }
- static rt_err_t nvme_setup_io_queues(struct rt_nvme_controller *nvme)
- {
- rt_err_t err;
- rt_uint32_t value;
- int irq, cpuid = 0;
- char name[RT_NAME_MAX];
- rt_bool_t affinity_fixup = RT_FALSE;
- RT_IRQ_AFFINITY_DECLARE(affinity) = { 0 };
- struct rt_nvme_queue *queue;
- nvme->io_queue_max = nvme->irqs_nr > 1 ? nvme->irqs_nr - 1 : 1;
- value = (nvme->io_queue_max - 1) | ((nvme->io_queue_max - 1) << 16);
- if ((err = nvme_set_features_simple(nvme, RT_NVME_FEAT_NUM_QUEUES, value)))
- {
- return err;
- }
- for (int i = 0, q_idx = 1; i < nvme->io_queue_max; ++i, ++q_idx)
- {
- queue = nvme_alloc_queue(nvme, q_idx, nvme->queue_depth);
- if (!queue)
- {
- return -RT_ENOMEM;
- }
- if ((err = nvme_attach_queue_cq(queue)) ||
- (err = nvme_attach_queue_sq(queue)))
- {
- return err;
- }
- }
- for (int i = 0, irq_idx = 1; i < nvme->io_queue_max; ++i, ++irq_idx)
- {
- irq = nvme->irqs[irq_idx % nvme->irqs_nr];
- rt_snprintf(name, RT_NAME_MAX, "%s-io-queue%d", nvme->name, i);
- if (!affinity_fixup)
- {
- RT_IRQ_AFFINITY_SET(affinity, cpuid % RT_CPUS_NR);
- if (rt_pic_irq_set_affinity(irq, affinity))
- {
- /* Fixup in secondary CPU startup */
- affinity_fixup = RT_TRUE;
- }
- RT_IRQ_AFFINITY_CLEAR(affinity, cpuid++ % RT_CPUS_NR);
- }
- rt_hw_interrupt_install(irq, nvme_queue_isr, &nvme->io_queues[i], name);
- rt_hw_interrupt_umask(irq);
- }
- return RT_EOK;
- }
- static void nvme_remove_io_queues(struct rt_nvme_controller *nvme)
- {
- int irq;
- struct rt_nvme_queue *queue;
- for (int i = 0, irq_idx = 1; i < nvme->io_queue_max; ++i, ++irq_idx)
- {
- queue = &nvme->io_queues[i];
- nvme_detach_queue(queue, RT_NVME_ADMIN_OPCODE_DELETE_SQ);
- nvme_detach_queue(queue, RT_NVME_ADMIN_OPCODE_DELETE_CQ);
- nvme_free_queue(queue);
- irq = nvme->irqs[irq_idx % nvme->irqs_nr];
- rt_hw_interrupt_mask(irq);
- rt_pic_detach_irq(irq, queue);
- }
- }
- static void nvme_remove_admin_queues(struct rt_nvme_controller *nvme)
- {
- int irq = nvme->irqs[0];
- rt_hw_interrupt_mask(irq);
- rt_pic_detach_irq(irq, &nvme->admin_queue);
- nvme_free_queue(&nvme->admin_queue);
- }
- static void nvme_remove_devices(struct rt_nvme_controller *nvme)
- {
- struct rt_nvme_device *ndev, *next_ndev;
- rt_list_for_each_entry_safe(ndev, next_ndev, &nvme->ns_nodes, list)
- {
- rt_list_remove(&ndev->list);
- rt_hw_blk_disk_unregister(&ndev->parent);
- rt_free(ndev);
- }
- }
- static rt_err_t nvme_scan_device(struct rt_nvme_controller *nvme,
- rt_size_t number_of_ns)
- {
- rt_err_t err = RT_EOK;
- rt_uint32_t lbaf;
- struct rt_nvme_id_ns *id = RT_NULL;
- if (!(id = rt_malloc_align(sizeof(*id), nvme->page_size)))
- {
- return -RT_ENOMEM;
- }
- /* NVME Namespace is start with "1" */
- for (rt_uint32_t nsid = 1; nsid <= number_of_ns; ++nsid)
- {
- struct rt_nvme_device *ndev = rt_calloc(1, sizeof(*ndev));
- if (!ndev)
- {
- err = -RT_ENOMEM;
- goto _free_res;
- }
- rt_memset(id, 0, sizeof(*id));
- if ((err = nvme_identify(nvme, nsid, 0, id)))
- {
- goto _free_res;
- }
- if (!id->nsze)
- {
- continue;
- }
- ndev->ctrl = nvme;
- rt_memcpy(&ndev->id, id, sizeof(ndev->id));
- lbaf = id->flbas & RT_NVME_NS_FLBAS_LBA_MASK;
- lbaf |= ((id->flbas & RT_NVME_NS_FLBAS_LBA_UMASK) >> RT_NVME_NS_FLBAS_LBA_SHIFT);
- ndev->nsid = nsid;
- ndev->lba_shift = id->lbaf[lbaf].ds;
- ndev->parent.ida = &nvme_ida;
- ndev->parent.parallel_io = RT_TRUE;
- ndev->parent.ops = &nvme_blk_ops;
- ndev->parent.max_partitions = RT_BLK_PARTITION_MAX;
- rt_dm_dev_set_name(&ndev->parent.parent, "%sn%u", nvme->name, nsid);
- if ((err = rt_hw_blk_disk_register(&ndev->parent)))
- {
- goto _free_res;
- }
- rt_list_init(&ndev->list);
- rt_list_insert_before(&nvme->ns_nodes, &ndev->list);
- }
- _free_res:
- rt_free_align(id);
- return err;
- }
- rt_inline rt_size_t strip_len(const char *str, rt_size_t max_len)
- {
- rt_size_t size = 0;
- for (int i = 0; *str && i < max_len; ++i, ++str)
- {
- if (*str != ' ')
- {
- size = i + 1;
- }
- }
- return size;
- }
- rt_err_t rt_nvme_controller_register(struct rt_nvme_controller *nvme)
- {
- rt_err_t err;
- struct rt_nvme_id_ctrl *ctrl = RT_NULL;
- if (!nvme || !nvme->ops)
- {
- return -RT_EINVAL;
- }
- if (nvme_readl(nvme, RT_NVME_REG_CSTS) == (rt_uint32_t)-1)
- {
- LOG_E("Out of memory");
- return -RT_EINVAL;
- }
- if ((nvme->nvme_id = rt_dm_ida_alloc(&nvme_controller_ida)) < 0)
- {
- return -RT_EFULL;
- }
- rt_snprintf(nvme->name, RT_NAME_MAX, "nvme%u", nvme->nvme_id);
- nvme->cap = nvme_readq(nvme, RT_NVME_REG_CAP);
- nvme->queue_depth = RT_NVME_CAP_MQES(nvme->cap) + 1;
- nvme->doorbell_stride = 1 << RT_NVME_CAP_STRIDE(nvme->cap);
- nvme->doorbell_tbl = nvme->regs + RT_NVME_REG_DBS;
- if ((err = nvme_configure_admin_queue(nvme)))
- {
- LOG_E("Configure admin queue error = %s", rt_strerror(err));
- goto _free_admin_queue;
- }
- if ((err = nvme_setup_io_queues(nvme)))
- {
- LOG_E("Unable to setup I/O queues error = %s", rt_strerror(err));
- goto _free_admin_queue;
- }
- if (!(ctrl = rt_malloc_align(sizeof(*ctrl), nvme->page_size)))
- {
- err = -RT_ENOMEM;
- goto _fail;
- }
- if ((err = nvme_identify(nvme, 0, 1, ctrl)))
- {
- goto _fail;
- }
- if (ctrl->mdts)
- {
- nvme->max_transfer_shift = ctrl->mdts + (RT_NVME_CAP_MPSMIN(nvme->cap) + 12);
- }
- else
- {
- /* 1MB is recommended. */
- nvme->max_transfer_shift = 20;
- }
- nvme->volatile_write_cache = ctrl->vwc;
- nvme->write_zeroes = !!(rt_le64_to_cpu(ctrl->oncs) & RT_NVME_CTRL_ONCS_WRITE_ZEROES);
- if ((rt_le32_to_cpu(ctrl->sgls) & RT_NVME_ID_SGL_SUPPORT_MASK))
- {
- nvme->sgl_mode = RT_NVME_PSDT_SGL_MPTR_SGL;
- }
- LOG_I("NVM Express v%d.%d (%s, %-*.s, %-*.s)",
- nvme_readl(nvme, RT_NVME_REG_VS) >> 16,
- nvme_readl(nvme, RT_NVME_REG_VS) & 0xff,
- nvme->ops->name,
- strip_len(ctrl->mn, sizeof(ctrl->mn)), ctrl->mn,
- strip_len(ctrl->fr, sizeof(ctrl->fr)), ctrl->fr);
- rt_list_init(&nvme->ns_nodes);
- if ((err = nvme_scan_device(nvme, rt_le32_to_cpu(ctrl->nn))))
- {
- goto _fail;
- }
- rt_free_align(ctrl);
- rt_spin_lock(&nvme_lock);
- rt_list_insert_after(&nvme_nodes, &nvme->list);
- rt_spin_unlock(&nvme_lock);
- return RT_EOK;
- _fail:
- if (ctrl)
- {
- rt_free_align(ctrl);
- }
- nvme_remove_devices(nvme);
- nvme_remove_io_queues(nvme);
- _free_admin_queue:
- nvme_remove_admin_queues(nvme);
- rt_dm_ida_free(&nvme_controller_ida, nvme->nvme_id);
- return err;
- }
- rt_err_t rt_nvme_controller_unregister(struct rt_nvme_controller *nvme)
- {
- rt_err_t err;
- if (!nvme)
- {
- return -RT_EINVAL;
- }
- rt_spin_lock(&nvme_lock);
- rt_list_remove(&nvme->list);
- rt_spin_unlock(&nvme_lock);
- nvme_remove_devices(nvme);
- nvme_remove_io_queues(nvme);
- nvme_remove_admin_queues(nvme);
- rt_dm_ida_free(&nvme_controller_ida, nvme->nvme_id);
- if (!(err = nvme_shutdown_ctrl(nvme)))
- {
- err = nvme_disable_ctrl(nvme);
- }
- else
- {
- LOG_E("%s: shutdown error = %s", nvme->name, rt_strerror(err));
- }
- return err;
- }
- /*
- * NVME's IO queue should be Per-CPU, fixup the affinity after the secondary CPU
- * startup, this stage can make sure the affinity setting success as possible.
- */
- static int nvme_queue_affinify_fixup(void)
- {
- int cpuid = rt_hw_cpu_id();
- struct rt_nvme_controller *nvme;
- RT_IRQ_AFFINITY_DECLARE(affinity) = { 0 };
- RT_IRQ_AFFINITY_DECLARE(current_affinity) = { 0 };
- RT_IRQ_AFFINITY_SET(affinity, cpuid);
- rt_hw_spin_lock(&nvme_lock.lock);
- rt_list_for_each_entry(nvme, &nvme_nodes, list)
- {
- for (int i = cpuid % RT_CPUS_NR; i < nvme->io_queue_max; i += RT_CPUS_NR)
- {
- int irq = nvme->irqs[i];
- if (!rt_pic_irq_get_affinity(irq, current_affinity) &&
- !rt_bitmap_test_bit(current_affinity, cpuid))
- {
- rt_ubase_t level = rt_hw_interrupt_disable();
- rt_pic_irq_set_affinity(irq, affinity);
- rt_hw_interrupt_enable(level);
- }
- }
- }
- rt_hw_spin_unlock(&nvme_lock.lock);
- return 0;
- }
- INIT_SECONDARY_CPU_EXPORT(nvme_queue_affinify_fixup);
|