nvme.c 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302
  1. /*
  2. * Copyright (c) 2006-2023, RT-Thread Development Team
  3. *
  4. * SPDX-License-Identifier: Apache-2.0
  5. *
  6. * Change Logs:
  7. * Date Author Notes
  8. * 2023-02-25 GuEe-GUI the first version
  9. */
  10. #include <rthw.h>
  11. #include <rtthread.h>
  12. #include <rtdevice.h>
  13. #define DBG_TAG "rtdm.nvme"
  14. #define DBG_LVL DBG_INFO
  15. #include <rtdbg.h>
  16. static struct rt_dm_ida nvme_controller_ida = RT_DM_IDA_INIT(CUSTOM);
  17. static struct rt_dm_ida nvme_ida = RT_DM_IDA_INIT(NVME);
  18. static RT_DEFINE_SPINLOCK(nvme_lock);
  19. static rt_list_t nvme_nodes = RT_LIST_OBJECT_INIT(nvme_nodes);
  20. rt_inline rt_uint32_t nvme_readl(struct rt_nvme_controller *nvme, int offset)
  21. {
  22. return HWREG32(nvme->regs + offset);
  23. }
  24. rt_inline void nvme_writel(struct rt_nvme_controller *nvme, int offset, rt_uint32_t value)
  25. {
  26. HWREG32(nvme->regs + offset) = value;
  27. }
  28. rt_inline rt_uint64_t nvme_readq(struct rt_nvme_controller *nvme, int offset)
  29. {
  30. rt_uint32_t lo32, hi32;
  31. lo32 = HWREG32(nvme->regs + offset);
  32. hi32 = HWREG32(nvme->regs + offset + 4);
  33. return ((rt_uint64_t)hi32 << 32) + lo32;
  34. }
  35. rt_inline void nvme_writeq(struct rt_nvme_controller *nvme, int offset, rt_uint64_t value)
  36. {
  37. nvme_writel(nvme, offset, (rt_uint32_t)(value & 0xffffffff));
  38. nvme_writel(nvme, offset + 4, (rt_uint32_t)(value >> 32));
  39. }
  40. static rt_err_t nvme_poll_csts(struct rt_nvme_controller *nvme,
  41. rt_uint32_t mask, rt_uint32_t value)
  42. {
  43. rt_tick_t timeout;
  44. timeout = rt_tick_from_millisecond(RT_NVME_CAP_TIMEOUT(nvme->cap) * 500);
  45. timeout += rt_tick_get();
  46. do {
  47. if ((nvme_readl(nvme, RT_NVME_REG_CSTS) & mask) == value)
  48. {
  49. return RT_EOK;
  50. }
  51. rt_hw_cpu_relax();
  52. } while (rt_tick_get() < timeout);
  53. return -RT_ETIMEOUT;
  54. }
  55. static rt_err_t nvme_enable_ctrl(struct rt_nvme_controller *nvme)
  56. {
  57. nvme->ctrl_config &= ~RT_NVME_CC_SHN_MASK;
  58. nvme->ctrl_config |= RT_NVME_CC_ENABLE;
  59. nvme_writel(nvme, RT_NVME_REG_CC, nvme->ctrl_config);
  60. return nvme_poll_csts(nvme, RT_NVME_CSTS_RDY, RT_NVME_CSTS_RDY);
  61. }
  62. static rt_err_t nvme_disable_ctrl(struct rt_nvme_controller *nvme)
  63. {
  64. nvme->ctrl_config &= ~RT_NVME_CC_SHN_MASK;
  65. nvme->ctrl_config &= ~RT_NVME_CC_ENABLE;
  66. nvme_writel(nvme, RT_NVME_REG_CC, nvme->ctrl_config);
  67. return nvme_poll_csts(nvme, RT_NVME_CSTS_RDY, 0);
  68. }
  69. static rt_err_t nvme_shutdown_ctrl(struct rt_nvme_controller *nvme)
  70. {
  71. nvme->ctrl_config &= ~RT_NVME_CC_SHN_MASK;
  72. nvme->ctrl_config |= RT_NVME_CC_SHN_NORMAL;
  73. nvme_writel(nvme, RT_NVME_REG_CC, nvme->ctrl_config);
  74. return nvme_poll_csts(nvme, RT_NVME_CSTS_SHST_MASK, RT_NVME_CSTS_SHST_CMPLT);
  75. }
  76. rt_inline rt_le16_t nvme_next_cmdid(struct rt_nvme_controller *nvme)
  77. {
  78. return rt_cpu_to_le16((rt_uint16_t)rt_atomic_add(&nvme->cmdid, 1));
  79. }
  80. static rt_err_t nvme_submit_cmd(struct rt_nvme_queue *queue,
  81. struct rt_nvme_command *cmd)
  82. {
  83. rt_ubase_t level;
  84. rt_err_t err = RT_EOK;
  85. rt_uint16_t tail, head;
  86. struct rt_nvme_controller *nvme = queue->nvme;
  87. _retry:
  88. level = rt_spin_lock_irqsave(&queue->lock);
  89. tail = queue->sq_tail;
  90. head = queue->cq_head;
  91. if (tail + 1 == head)
  92. {
  93. /* IO queue is full, waiting for the last IO command to complete. */
  94. rt_spin_unlock_irqrestore(&queue->lock, level);
  95. rt_thread_yield();
  96. goto _retry;
  97. }
  98. cmd->common.cmdid = nvme_next_cmdid(nvme);
  99. rt_memcpy(&queue->sq_cmds[tail], cmd, sizeof(*cmd));
  100. if (nvme->ops->submit_cmd)
  101. {
  102. if ((err = nvme->ops->submit_cmd(queue, cmd)))
  103. {
  104. return err;
  105. }
  106. }
  107. if (++tail == queue->depth)
  108. {
  109. tail = 0;
  110. }
  111. HWREG32(queue->doorbell) = tail;
  112. queue->sq_tail = tail;
  113. queue->cmd = cmd;
  114. queue->err = RT_EOK;
  115. rt_spin_unlock_irqrestore(&queue->lock, level);
  116. err = rt_completion_wait(&queue->done,
  117. rt_tick_from_millisecond(queue->qid != 0 ? RT_WAITING_FOREVER : 60));
  118. return err ? : queue->err;
  119. }
  120. static rt_err_t nvme_set_features_simple(struct rt_nvme_controller *nvme,
  121. rt_uint32_t fid, rt_uint32_t dword11)
  122. {
  123. struct rt_nvme_command cmd;
  124. rt_memset(&cmd, 0, sizeof(cmd));
  125. cmd.features.opcode = RT_NVME_ADMIN_OPCODE_SET_FEATURES;
  126. cmd.features.fid = rt_cpu_to_le32(fid);
  127. cmd.features.dword11 = rt_cpu_to_le32(dword11);
  128. return nvme_submit_cmd(&nvme->admin_queue, &cmd);
  129. }
  130. static rt_err_t nvme_submit_io_cmd(struct rt_nvme_controller *nvme,
  131. struct rt_nvme_command *cmd)
  132. {
  133. rt_uint16_t qid;
  134. qid = rt_atomic_add(&nvme->ioqid[rt_hw_cpu_id()], RT_CPUS_NR);
  135. qid %= nvme->io_queue_max;
  136. return nvme_submit_cmd(&nvme->io_queues[qid], cmd);
  137. }
  138. /*
  139. * PRP Mode:
  140. *
  141. * |63 n+1|n 0|
  142. * +----------------------------------------+----------+---+---+
  143. * | Page Base Address | Offset | 0 | 0 |
  144. * +----------------------------------------+----------+---+---+
  145. * |
  146. * v
  147. * Host Physical Pages
  148. * +----------------------------+
  149. * +--------------+----------+ | Page k |
  150. * | PRP Entry1 | Offset +---------->+----------------------------+
  151. * +--------------+----------+ | Page k + 1 |
  152. * +----------------------------+
  153. * ...
  154. * +----------------------------+
  155. * +--------------+----------+ | Page k + m |
  156. * | PRP Entry2 | 0 +---------->+----------------------------+
  157. * +--------------+----------+ | Page k + m + 1 |
  158. * +----------------------------+
  159. * PRP List (In PRP Entry2):
  160. *
  161. * |63 n+1|n 0|
  162. * +----------------------------------------+------------------+
  163. * | Page Base Address k | 0h |
  164. * +----------------------------------------+------------------+
  165. * | Page Base Address k + 1 | 0h |
  166. * +----------------------------------------+------------------+
  167. * | ... |
  168. * +----------------------------------------+------------------+
  169. * | Page Base Address k + m | 0h |
  170. * +----------------------------------------+------------------+
  171. * | Page Base Address k + m + 1 | 0h |
  172. * +----------------------------------------+------------------+
  173. *
  174. * SGL Mode:
  175. * +----- Non-transport
  176. * LBA /
  177. * +---------------+---------------+-------/-------+---------------+
  178. * | 3KB | 4KB | 2KB | 4KB |
  179. * +-------+-------+-------+-------+---------------+--------+------+
  180. * | +-------------------------+ |
  181. * | | |
  182. * | +--------------------|------+
  183. * | | |
  184. * +-------v-------+ +-------v-------+ +-------v-------+
  185. * | A MEM BLOCK | | B MEM BLOCK | | C MEM BLOCK |
  186. * +-------^-------+ +-------^-------+ +-------^-------+
  187. * | | |
  188. * +----------------+ | |
  189. * | | |
  190. * Segment(0) | | |
  191. * +----------+----------+ | | |
  192. * | Address: A +--+ | |
  193. * +----------+----------+ | |
  194. * | Type: 0h | Len: 3KB | | |
  195. * +----------+----------+ | |
  196. * | Address: Segment(1) +--+ | |
  197. * +----------+----------+ | | |
  198. * | Type: 2h | Len: 48 | | | |
  199. * +----------+----------+ | | |
  200. * | | |
  201. * +------------------------+ | |
  202. * | | |
  203. * v | |
  204. * Segment(1) | |
  205. * +----------+----------+ | |
  206. * | Address: B +------+ |
  207. * +----------+----------+ |
  208. * | Type: 0h | Len: 4KB | |
  209. * +----------+----------+ |
  210. * | Address: <NULL> | |
  211. * +----------+----------+ |
  212. * | Type: 1h | Len: 2KB | |
  213. * +----------+----------+ |
  214. * | Address: Segment(2) +--+ |
  215. * +----------+----------+ | |
  216. * | Type: 0h | Len: 16 | | |
  217. * +----------+----------+ | |
  218. * | |
  219. * +------------------------+ |
  220. * | |
  221. * v |
  222. * Segment(2) |
  223. * +----------+----------+ |
  224. * | Address: C +---------------------------+
  225. * +----------+----------+
  226. * | Type: 0h | Len: 4KB |
  227. * +----------+----------+
  228. */
  229. static rt_ssize_t nvme_blk_rw(struct rt_nvme_device *ndev, rt_off_t slba,
  230. rt_ubase_t buffer_dma, rt_size_t lbas, rt_uint8_t opcode)
  231. {
  232. rt_err_t err;
  233. rt_uint16_t max_lbas;
  234. rt_uint32_t lba_shift;
  235. rt_size_t tlbas;
  236. rt_ssize_t data_length;
  237. struct rt_nvme_command cmd;
  238. struct rt_nvme_controller *nvme = ndev->ctrl;
  239. rt_memset(&cmd, 0, sizeof(cmd));
  240. cmd.rw.opcode = opcode;
  241. cmd.rw.flags = nvme->sgl_mode << RT_NVME_CMD_FLAGS_PSDT_SHIFT;
  242. cmd.rw.nsid = rt_cpu_to_le32(ndev->nsid);
  243. tlbas = lbas;
  244. lba_shift = ndev->lba_shift;
  245. max_lbas = 1 << (nvme->max_transfer_shift - lba_shift);
  246. if (nvme->sgl_mode)
  247. {
  248. while ((rt_ssize_t)lbas > 0)
  249. {
  250. if (lbas < max_lbas)
  251. {
  252. max_lbas = (rt_uint16_t)lbas;
  253. }
  254. data_length = max_lbas << lba_shift;
  255. cmd.rw.sgl.adddress = rt_cpu_to_le64(buffer_dma);
  256. cmd.rw.sgl.length = rt_cpu_to_le32(data_length);
  257. cmd.rw.sgl.sgl_identify = SGL_DESC_TYPE_DATA_BLOCK;
  258. cmd.rw.slba = rt_cpu_to_le16(slba);
  259. cmd.rw.length = rt_cpu_to_le16(max_lbas - 1);
  260. if ((err = nvme_submit_io_cmd(nvme, &cmd)))
  261. {
  262. tlbas -= lbas;
  263. break;
  264. }
  265. lbas -= max_lbas;
  266. slba += max_lbas;
  267. buffer_dma += data_length;
  268. }
  269. }
  270. else
  271. {
  272. void *prp_list = RT_NULL;
  273. rt_size_t prp_list_size = 0, page_size;
  274. page_size = nvme->page_size;
  275. while ((rt_ssize_t)lbas > 0)
  276. {
  277. rt_uint64_t prp2_addr, dma_addr;
  278. rt_ssize_t remain_length, page_offset;
  279. if (lbas < max_lbas)
  280. {
  281. max_lbas = (rt_uint16_t)lbas;
  282. }
  283. /*
  284. * PRP transfer:
  285. * 1. data_length <= 4KB:
  286. * prp1 = buffer_dma
  287. * prp2 = 0
  288. *
  289. * 2. 4KB < data_length <= 8KB:
  290. * prp1 = buffer_dma
  291. * prp2 = buffer_dma
  292. *
  293. * 3. 8KB < data_length:
  294. * prp1 = buffer_dma(0, 4k)
  295. * prp2 = buffer_dma(4k, ~)
  296. */
  297. dma_addr = buffer_dma;
  298. page_offset = buffer_dma & (page_size - 1);
  299. data_length = max_lbas << lba_shift;
  300. remain_length = data_length - (page_size - page_offset);
  301. do {
  302. rt_size_t prps_per_page, prps, pages;
  303. rt_uint64_t *prp_list_ptr, prp_list_dma;
  304. if (remain_length <= 0)
  305. {
  306. prp2_addr = 0;
  307. break;
  308. }
  309. if (remain_length)
  310. {
  311. dma_addr += (page_size - page_offset);
  312. }
  313. if (remain_length <= page_size)
  314. {
  315. prp2_addr = dma_addr;
  316. break;
  317. }
  318. prps_per_page = page_size / sizeof(rt_uint64_t);
  319. prps = RT_DIV_ROUND_UP(remain_length, page_size);
  320. pages = RT_DIV_ROUND_UP(prps - 1, prps_per_page - 1);
  321. if (prps > prp_list_size)
  322. {
  323. if (prp_list)
  324. {
  325. rt_free_align(prp_list);
  326. }
  327. prp_list = rt_malloc_align(pages * page_size, page_size);
  328. if (!prp_list)
  329. {
  330. LOG_D("No memory to create a PRP List");
  331. /* Ask user to try again */
  332. return tlbas - lbas;
  333. }
  334. prp_list_size = pages * (prps_per_page - 1) + 1;
  335. }
  336. prp_list_ptr = prp_list;
  337. prp_list_dma = (rt_uint64_t)rt_kmem_v2p(prp_list_ptr);
  338. prp2_addr = prp_list_dma;
  339. for (int i = 0; prps; --prps, ++i)
  340. {
  341. /* End of the entry, fill the next entry addr if remain */
  342. if ((i == (prps_per_page - 1)) && prps > 1)
  343. {
  344. prp_list_dma += page_size;
  345. *prp_list_ptr = rt_cpu_to_le64(prp_list_dma);
  346. /* Start to fill the next PRP */
  347. i = 0;
  348. }
  349. *prp_list_ptr = rt_cpu_to_le64(dma_addr);
  350. dma_addr += page_size;
  351. }
  352. rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, prp_list_ptr, prp_list_size);
  353. } while (0);
  354. cmd.rw.prp1 = rt_cpu_to_le64(buffer_dma);
  355. cmd.rw.prp2 = rt_cpu_to_le64(prp2_addr);
  356. cmd.rw.slba = rt_cpu_to_le16(slba);
  357. cmd.rw.length = rt_cpu_to_le16(max_lbas - 1);
  358. if ((err = nvme_submit_io_cmd(nvme, &cmd)))
  359. {
  360. tlbas -= lbas;
  361. break;
  362. }
  363. lbas -= max_lbas;
  364. slba += max_lbas;
  365. buffer_dma += data_length;
  366. }
  367. if (prp_list)
  368. {
  369. rt_free_align(prp_list);
  370. }
  371. }
  372. return tlbas;
  373. }
  374. static rt_ssize_t nvme_blk_read(struct rt_blk_disk *disk, rt_off_t sector,
  375. void *buffer, rt_size_t sector_count)
  376. {
  377. rt_ssize_t res;
  378. rt_uint32_t page_bits;
  379. rt_size_t buffer_size;
  380. rt_ubase_t buffer_dma;
  381. void *temp_buffer = RT_NULL;
  382. struct rt_nvme_device *ndev = rt_disk_to_nvme_device(disk);
  383. struct rt_nvme_controller *nvme = ndev->ctrl;
  384. buffer_size = (1 << ndev->lba_shift) * sector_count;
  385. buffer_dma = (rt_ubase_t)rt_kmem_v2p(buffer);
  386. if ((nvme->sgl_mode && (buffer_dma & RT_GENMASK(1, 0))) ||
  387. (!nvme->sgl_mode && (buffer_dma & ARCH_PAGE_MASK)))
  388. {
  389. LOG_D("DMA PRP direct %s buffer MUST 4-bytes or page aligned", "read");
  390. page_bits = rt_page_bits(buffer_size);
  391. temp_buffer = rt_pages_alloc(page_bits);
  392. if (!temp_buffer)
  393. {
  394. return -RT_ENOMEM;
  395. }
  396. buffer_dma = (rt_ubase_t)rt_kmem_v2p(temp_buffer);
  397. }
  398. res = nvme_blk_rw(ndev, sector, buffer_dma, sector_count, RT_NVME_CMD_READ);
  399. if (res > 0)
  400. {
  401. if (res != sector_count)
  402. {
  403. /*
  404. * Don't always aim for optimization, checking for equality
  405. * is much faster than multiplication calculation.
  406. */
  407. buffer_size = res * (1 << ndev->lba_shift);
  408. }
  409. if (temp_buffer)
  410. {
  411. rt_hw_cpu_dcache_ops(RT_HW_CACHE_INVALIDATE, temp_buffer, buffer_size);
  412. rt_memcpy(buffer, temp_buffer, buffer_size);
  413. }
  414. else
  415. {
  416. rt_hw_cpu_dcache_ops(RT_HW_CACHE_INVALIDATE, buffer, buffer_size);
  417. }
  418. }
  419. if (temp_buffer)
  420. {
  421. rt_pages_free(temp_buffer, page_bits);
  422. }
  423. return res;
  424. }
  425. static rt_ssize_t nvme_blk_write(struct rt_blk_disk *disk, rt_off_t sector,
  426. const void *buffer, rt_size_t sector_count)
  427. {
  428. rt_ssize_t res;
  429. rt_uint32_t page_bits;
  430. rt_size_t buffer_size;
  431. rt_ubase_t buffer_dma;
  432. void *temp_buffer = RT_NULL;
  433. struct rt_nvme_device *ndev = rt_disk_to_nvme_device(disk);
  434. struct rt_nvme_controller *nvme = ndev->ctrl;
  435. buffer_size = (1 << ndev->lba_shift) * sector_count;
  436. buffer_dma = (rt_ubase_t)rt_kmem_v2p((void *)buffer);
  437. if ((nvme->sgl_mode && (buffer_dma & RT_GENMASK(1, 0))) ||
  438. (!nvme->sgl_mode && (buffer_dma & ARCH_PAGE_MASK)))
  439. {
  440. LOG_D("DMA PRP direct %s buffer MUST 4-bytes or page aligned", "write");
  441. page_bits = rt_page_bits(buffer_size);
  442. temp_buffer = rt_pages_alloc(page_bits);
  443. if (!temp_buffer)
  444. {
  445. return -RT_ENOMEM;
  446. }
  447. buffer_dma = (rt_ubase_t)rt_kmem_v2p(temp_buffer);
  448. rt_memcpy(temp_buffer, buffer, buffer_size);
  449. buffer = temp_buffer;
  450. }
  451. rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, (void *)buffer, buffer_size);
  452. res = nvme_blk_rw(ndev, sector, buffer_dma, sector_count, RT_NVME_CMD_WRITE);
  453. if (temp_buffer)
  454. {
  455. rt_pages_free(temp_buffer, page_bits);
  456. }
  457. return res;
  458. }
  459. static rt_err_t nvme_blk_getgeome(struct rt_blk_disk *disk,
  460. struct rt_device_blk_geometry *geometry)
  461. {
  462. struct rt_nvme_device *ndev = rt_disk_to_nvme_device(disk);
  463. geometry->bytes_per_sector = 1 << ndev->lba_shift;
  464. geometry->block_size = 1 << ndev->lba_shift;
  465. geometry->sector_count = rt_le64_to_cpu(ndev->id.nsze);
  466. return RT_EOK;
  467. }
  468. static rt_err_t nvme_blk_sync(struct rt_blk_disk *disk)
  469. {
  470. struct rt_nvme_command cmd;
  471. struct rt_nvme_device *ndev = rt_disk_to_nvme_device(disk);
  472. rt_memset(&cmd, 0, sizeof(cmd));
  473. cmd.common.opcode = RT_NVME_CMD_FLUSH;
  474. cmd.common.nsid = rt_cpu_to_le32(ndev->nsid);
  475. return nvme_submit_io_cmd(ndev->ctrl, &cmd);
  476. }
  477. static rt_err_t nvme_blk_erase(struct rt_blk_disk *disk)
  478. {
  479. rt_err_t err = RT_EOK;
  480. rt_ssize_t slba, lbas, max_lbas;
  481. struct rt_nvme_command cmd;
  482. struct rt_nvme_device *ndev = rt_disk_to_nvme_device(disk);
  483. struct rt_nvme_controller *nvme = ndev->ctrl;
  484. if (!nvme->write_zeroes)
  485. {
  486. return -RT_ENOSYS;
  487. }
  488. rt_memset(&cmd, 0, sizeof(cmd));
  489. cmd.write_zeroes.opcode = RT_NVME_CMD_WRITE_ZEROES;
  490. cmd.write_zeroes.nsid = rt_cpu_to_le32(ndev->nsid);
  491. slba = 0;
  492. lbas = rt_le64_to_cpu(ndev->id.nsze);
  493. max_lbas = 1 << (nvme->max_transfer_shift - ndev->lba_shift);
  494. while ((rt_ssize_t)lbas > 0)
  495. {
  496. if (lbas < max_lbas)
  497. {
  498. max_lbas = (rt_uint16_t)lbas;
  499. }
  500. cmd.write_zeroes.slba = rt_cpu_to_le16(slba);
  501. cmd.write_zeroes.length = rt_cpu_to_le16(max_lbas - 1);
  502. if ((err = nvme_submit_io_cmd(nvme, &cmd)))
  503. {
  504. break;
  505. }
  506. lbas -= max_lbas;
  507. slba += max_lbas;
  508. }
  509. return err;
  510. }
  511. static rt_err_t nvme_blk_autorefresh(struct rt_blk_disk *disk, rt_bool_t is_auto)
  512. {
  513. struct rt_nvme_device *ndev = rt_disk_to_nvme_device(disk);
  514. struct rt_nvme_controller *nvme = ndev->ctrl;
  515. if (nvme->volatile_write_cache & RT_NVME_CTRL_VWC_PRESENT)
  516. {
  517. return nvme_set_features_simple(nvme, RT_NVME_FEAT_VOLATILE_WC, !!is_auto);
  518. }
  519. else if (!is_auto)
  520. {
  521. return RT_EOK;
  522. }
  523. return -RT_ENOSYS;
  524. }
  525. static const struct rt_blk_disk_ops nvme_blk_ops =
  526. {
  527. .read = nvme_blk_read,
  528. .write = nvme_blk_write,
  529. .getgeome = nvme_blk_getgeome,
  530. .sync = nvme_blk_sync,
  531. .erase = nvme_blk_erase,
  532. .autorefresh = nvme_blk_autorefresh,
  533. };
  534. static void nvme_queue_isr(int irqno, void *param)
  535. {
  536. rt_ubase_t level;
  537. rt_uint16_t head, phase, status;
  538. struct rt_nvme_queue *queue = param;
  539. struct rt_nvme_controller *nvme = queue->nvme;
  540. level = rt_spin_lock_irqsave(&queue->lock);
  541. head = queue->cq_head;
  542. phase = queue->cq_phase;
  543. status = HWREG16(&queue->cq_entry[head].status);
  544. status = rt_le16_to_cpu(status);
  545. if ((status & 0x01) == phase)
  546. {
  547. if ((status >> 1))
  548. {
  549. queue->err = -RT_EIO;
  550. goto _end_cmd;
  551. }
  552. if (nvme->ops->complete_cmd)
  553. {
  554. nvme->ops->complete_cmd(queue, queue->cmd);
  555. }
  556. _end_cmd:
  557. if (++head == queue->depth)
  558. {
  559. head = 0;
  560. phase = !phase;
  561. }
  562. HWREG32(queue->doorbell + nvme->doorbell_stride) = head;
  563. queue->cq_head = head;
  564. queue->cq_phase = phase;
  565. rt_completion_done(&queue->done);
  566. }
  567. rt_spin_unlock_irqrestore(&queue->lock, level);
  568. }
  569. static rt_err_t nvme_identify(struct rt_nvme_controller *nvme,
  570. rt_uint32_t nsid, rt_uint32_t cns, void *data)
  571. {
  572. rt_err_t err;
  573. rt_uint32_t page_size = nvme->page_size;
  574. rt_ubase_t data_phy = (rt_ubase_t)rt_kmem_v2p(data);
  575. int offset = data_phy & (page_size - 1);
  576. struct rt_nvme_command cmd;
  577. rt_memset(&cmd, 0, sizeof(cmd));
  578. cmd.identify.opcode = RT_NVME_ADMIN_OPCODE_IDENTIFY;
  579. cmd.identify.nsid = rt_cpu_to_le32(nsid);
  580. cmd.identify.prp1 = rt_cpu_to_le64(data_phy);
  581. if (sizeof(struct rt_nvme_id_ctrl) <= page_size - offset)
  582. {
  583. cmd.identify.prp2 = 0;
  584. }
  585. else
  586. {
  587. data_phy += (page_size - offset);
  588. cmd.identify.prp2 = rt_cpu_to_le64(data_phy);
  589. }
  590. cmd.identify.cns = rt_cpu_to_le32(cns);
  591. rt_hw_cpu_dcache_ops(RT_HW_CACHE_FLUSH, data, sizeof(struct rt_nvme_id_ctrl));
  592. if (!(err = nvme_submit_cmd(&nvme->admin_queue, &cmd)))
  593. {
  594. rt_hw_cpu_dcache_ops(RT_HW_CACHE_INVALIDATE, data, sizeof(struct rt_nvme_id_ctrl));
  595. }
  596. return err;
  597. }
  598. static rt_err_t nvme_attach_queue(struct rt_nvme_queue *queue, rt_uint8_t opcode)
  599. {
  600. struct rt_nvme_command cmd;
  601. struct rt_nvme_controller *nvme = queue->nvme;
  602. rt_uint16_t flags = RT_NVME_QUEUE_PHYS_CONTIG;
  603. rt_memset(&cmd, 0, sizeof(cmd));
  604. if (opcode == RT_NVME_ADMIN_OPCODE_CREATE_CQ)
  605. {
  606. cmd.create_cq.opcode = opcode;
  607. cmd.create_cq.prp1 = rt_cpu_to_le64(queue->cq_entry_phy);
  608. cmd.create_cq.cqid = rt_cpu_to_le16(queue->qid);
  609. cmd.create_cq.qsize = rt_cpu_to_le16(queue->depth - 1);
  610. cmd.create_cq.cq_flags = rt_cpu_to_le16(flags | RT_NVME_CQ_IRQ_ENABLED);
  611. cmd.create_cq.irq_vector = rt_cpu_to_le16(nvme->irqs_nr > 1 ? queue->qid : 0);
  612. }
  613. else if (opcode == RT_NVME_ADMIN_OPCODE_CREATE_SQ)
  614. {
  615. cmd.create_sq.opcode = opcode;
  616. cmd.create_sq.prp1 = rt_cpu_to_le64(queue->sq_cmds_phy);
  617. cmd.create_sq.sqid = rt_cpu_to_le16(queue->qid);
  618. cmd.create_sq.qsize = rt_cpu_to_le16(queue->depth - 1);
  619. cmd.create_sq.sq_flags = rt_cpu_to_le16(flags | RT_NVME_SQ_PRIO_MEDIUM);
  620. cmd.create_sq.cqid = rt_cpu_to_le16(queue->qid);
  621. }
  622. else
  623. {
  624. LOG_E("What the fuck opcode = %x", opcode);
  625. RT_ASSERT(0);
  626. }
  627. return nvme_submit_cmd(&nvme->admin_queue, &cmd);
  628. }
  629. rt_inline rt_err_t nvme_attach_queue_sq(struct rt_nvme_queue *queue)
  630. {
  631. return nvme_attach_queue(queue, RT_NVME_ADMIN_OPCODE_CREATE_SQ);
  632. }
  633. rt_inline rt_err_t nvme_attach_queue_cq(struct rt_nvme_queue *queue)
  634. {
  635. return nvme_attach_queue(queue, RT_NVME_ADMIN_OPCODE_CREATE_CQ);
  636. }
  637. static rt_err_t nvme_detach_queue(struct rt_nvme_queue *queue,
  638. rt_uint8_t opcode)
  639. {
  640. struct rt_nvme_command cmd;
  641. struct rt_nvme_controller *nvme = queue->nvme;
  642. rt_memset(&cmd, 0, sizeof(cmd));
  643. cmd.delete_queue.opcode = opcode;
  644. cmd.delete_queue.qid = rt_cpu_to_le16(queue->qid);
  645. return nvme_submit_cmd(&nvme->admin_queue, &cmd);
  646. }
  647. rt_inline rt_ubase_t nvme_queue_dma_flags(void)
  648. {
  649. return RT_DMA_F_NOCACHE | RT_DMA_F_LINEAR;
  650. }
  651. static void nvme_free_queue(struct rt_nvme_queue *queue)
  652. {
  653. rt_ubase_t dma_flags;
  654. struct rt_nvme_controller *nvme = queue->nvme;
  655. if (nvme->ops->cleanup_queue)
  656. {
  657. rt_err_t err;
  658. if (!(err = nvme->ops->cleanup_queue(queue)))
  659. {
  660. LOG_W("Cleanup[%s] queue error = %s", nvme->ops->name, rt_strerror(err));
  661. }
  662. }
  663. dma_flags = nvme_queue_dma_flags();
  664. if (queue->sq_cmds)
  665. {
  666. rt_dma_free(nvme->dev, sizeof(*queue->sq_cmds) * queue->depth,
  667. queue->sq_cmds, queue->sq_cmds_phy, dma_flags);
  668. }
  669. if (queue->cq_entry)
  670. {
  671. rt_dma_free(nvme->dev, sizeof(*queue->cq_entry) * queue->depth,
  672. queue->cq_entry, queue->cq_entry_phy, dma_flags);
  673. }
  674. }
  675. static struct rt_nvme_queue *nvme_alloc_queue(struct rt_nvme_controller *nvme,
  676. int qid, int depth)
  677. {
  678. rt_err_t err;
  679. rt_ubase_t dma_flags;
  680. struct rt_nvme_queue *queue = &nvme->queue[qid];
  681. rt_memset(queue, 0, sizeof(*queue));
  682. queue->nvme = nvme;
  683. queue->doorbell = &nvme->doorbell_tbl[qid * 2 * nvme->doorbell_stride];
  684. queue->qid = qid;
  685. queue->depth = depth;
  686. queue->cq_head = 0;
  687. queue->cq_phase = 1;
  688. rt_completion_init(&queue->done);
  689. rt_spin_lock_init(&queue->lock);
  690. dma_flags = nvme_queue_dma_flags();
  691. /* struct rt_nvme_command */
  692. queue->sq_cmds = rt_dma_alloc(nvme->dev,
  693. sizeof(*queue->sq_cmds) * depth, &queue->sq_cmds_phy, dma_flags);
  694. if (!queue->sq_cmds)
  695. {
  696. err = -RT_ENOMEM;
  697. goto _fail;
  698. }
  699. /* struct rt_nvme_completion */
  700. queue->cq_entry = rt_dma_alloc(nvme->dev,
  701. sizeof(*queue->cq_entry) * depth, &queue->cq_entry_phy, dma_flags);
  702. if (!queue->cq_entry)
  703. {
  704. err = -RT_ENOMEM;
  705. goto _fail;
  706. }
  707. rt_memset(queue->sq_cmds, 0, sizeof(struct rt_nvme_command) * depth);
  708. rt_memset(queue->cq_entry, 0, sizeof(struct rt_nvme_completion) * depth);
  709. if (nvme->ops->setup_queue)
  710. {
  711. if (!(err = nvme->ops->setup_queue(queue)))
  712. {
  713. LOG_E("Setup[%s] queue error = %s", nvme->ops->name, rt_strerror(err));
  714. goto _fail;
  715. }
  716. }
  717. return queue;
  718. _fail:
  719. nvme_free_queue(queue);
  720. return rt_err_ptr(err);
  721. }
  722. static rt_err_t nvme_configure_admin_queue(struct rt_nvme_controller *nvme)
  723. {
  724. rt_err_t err;
  725. int irq;
  726. char name[RT_NAME_MAX];
  727. rt_uint32_t aqa;
  728. rt_uint32_t page_shift = ARCH_PAGE_SHIFT;
  729. rt_uint32_t page_min = RT_NVME_CAP_MPSMIN(nvme->cap) + 12;
  730. rt_uint32_t page_max = RT_NVME_CAP_MPSMAX(nvme->cap) + 12;
  731. struct rt_nvme_queue *admin_queue;
  732. if (page_shift < page_min)
  733. {
  734. LOG_E("Device %s page size (%u) %s than host (%u)",
  735. "minimum", 1 << page_min, "larger", 1 << page_shift);
  736. return -RT_EINVAL;
  737. }
  738. if (page_shift > page_max)
  739. {
  740. LOG_W("Device %s page size (%u) %s than host (%u)",
  741. "maximum", 1 << page_max, "smaller", 1 << page_shift);
  742. page_shift = page_max;
  743. }
  744. if ((err = nvme_disable_ctrl(nvme)))
  745. {
  746. return err;
  747. }
  748. admin_queue = nvme_alloc_queue(nvme, 0, RT_NVME_AQ_DEPTH);
  749. if (rt_is_err(admin_queue))
  750. {
  751. return rt_ptr_err(admin_queue);
  752. }
  753. aqa = admin_queue->depth - 1;
  754. aqa |= aqa << 16;
  755. nvme->page_shift = page_shift;
  756. nvme->page_size = 1U << page_shift;
  757. nvme->ctrl_config = RT_NVME_CC_CSS_NVM;
  758. nvme->ctrl_config |= (page_shift - 12) << RT_NVME_CC_MPS_SHIFT;
  759. nvme->ctrl_config |= RT_NVME_CC_ARB_RR | RT_NVME_CC_SHN_NONE;
  760. nvme->ctrl_config |= RT_NVME_CC_IOSQES | RT_NVME_CC_IOCQES;
  761. nvme_writel(nvme, RT_NVME_REG_AQA, aqa);
  762. nvme_writeq(nvme, RT_NVME_REG_ASQ, admin_queue->sq_cmds_phy);
  763. nvme_writeq(nvme, RT_NVME_REG_ACQ, admin_queue->cq_entry_phy);
  764. if ((err = nvme_enable_ctrl(nvme)))
  765. {
  766. nvme_free_queue(admin_queue);
  767. return err;
  768. }
  769. irq = nvme->irqs[0];
  770. rt_snprintf(name, RT_NAME_MAX, "%s-admin-queue", nvme->name);
  771. rt_hw_interrupt_install(irq, nvme_queue_isr, &nvme->admin_queue, name);
  772. rt_hw_interrupt_umask(irq);
  773. return RT_EOK;
  774. }
  775. static rt_err_t nvme_setup_io_queues(struct rt_nvme_controller *nvme)
  776. {
  777. rt_err_t err;
  778. rt_uint32_t value;
  779. int irq, cpuid = 0;
  780. char name[RT_NAME_MAX];
  781. rt_bool_t affinity_fixup = RT_FALSE;
  782. RT_IRQ_AFFINITY_DECLARE(affinity) = { 0 };
  783. struct rt_nvme_queue *queue;
  784. nvme->io_queue_max = nvme->irqs_nr > 1 ? nvme->irqs_nr - 1 : 1;
  785. value = (nvme->io_queue_max - 1) | ((nvme->io_queue_max - 1) << 16);
  786. if ((err = nvme_set_features_simple(nvme, RT_NVME_FEAT_NUM_QUEUES, value)))
  787. {
  788. return err;
  789. }
  790. for (int i = 0, q_idx = 1; i < nvme->io_queue_max; ++i, ++q_idx)
  791. {
  792. queue = nvme_alloc_queue(nvme, q_idx, nvme->queue_depth);
  793. if (!queue)
  794. {
  795. return -RT_ENOMEM;
  796. }
  797. if ((err = nvme_attach_queue_cq(queue)) ||
  798. (err = nvme_attach_queue_sq(queue)))
  799. {
  800. return err;
  801. }
  802. }
  803. for (int i = 0, irq_idx = 1; i < nvme->io_queue_max; ++i, ++irq_idx)
  804. {
  805. irq = nvme->irqs[irq_idx % nvme->irqs_nr];
  806. rt_snprintf(name, RT_NAME_MAX, "%s-io-queue%d", nvme->name, i);
  807. if (!affinity_fixup)
  808. {
  809. RT_IRQ_AFFINITY_SET(affinity, cpuid % RT_CPUS_NR);
  810. if (rt_pic_irq_set_affinity(irq, affinity))
  811. {
  812. /* Fixup in secondary CPU startup */
  813. affinity_fixup = RT_TRUE;
  814. }
  815. RT_IRQ_AFFINITY_CLEAR(affinity, cpuid++ % RT_CPUS_NR);
  816. }
  817. rt_hw_interrupt_install(irq, nvme_queue_isr, &nvme->io_queues[i], name);
  818. rt_hw_interrupt_umask(irq);
  819. }
  820. return RT_EOK;
  821. }
  822. static void nvme_remove_io_queues(struct rt_nvme_controller *nvme)
  823. {
  824. int irq;
  825. struct rt_nvme_queue *queue;
  826. for (int i = 0, irq_idx = 1; i < nvme->io_queue_max; ++i, ++irq_idx)
  827. {
  828. queue = &nvme->io_queues[i];
  829. nvme_detach_queue(queue, RT_NVME_ADMIN_OPCODE_DELETE_SQ);
  830. nvme_detach_queue(queue, RT_NVME_ADMIN_OPCODE_DELETE_CQ);
  831. nvme_free_queue(queue);
  832. irq = nvme->irqs[irq_idx % nvme->irqs_nr];
  833. rt_hw_interrupt_mask(irq);
  834. rt_pic_detach_irq(irq, queue);
  835. }
  836. }
  837. static void nvme_remove_admin_queues(struct rt_nvme_controller *nvme)
  838. {
  839. int irq = nvme->irqs[0];
  840. rt_hw_interrupt_mask(irq);
  841. rt_pic_detach_irq(irq, &nvme->admin_queue);
  842. nvme_free_queue(&nvme->admin_queue);
  843. }
  844. static void nvme_remove_devices(struct rt_nvme_controller *nvme)
  845. {
  846. struct rt_nvme_device *ndev, *next_ndev;
  847. rt_list_for_each_entry_safe(ndev, next_ndev, &nvme->ns_nodes, list)
  848. {
  849. rt_list_remove(&ndev->list);
  850. rt_hw_blk_disk_unregister(&ndev->parent);
  851. rt_free(ndev);
  852. }
  853. }
  854. static rt_err_t nvme_scan_device(struct rt_nvme_controller *nvme,
  855. rt_size_t number_of_ns)
  856. {
  857. rt_err_t err = RT_EOK;
  858. rt_uint32_t lbaf;
  859. struct rt_nvme_id_ns *id = RT_NULL;
  860. if (!(id = rt_malloc_align(sizeof(*id), nvme->page_size)))
  861. {
  862. return -RT_ENOMEM;
  863. }
  864. /* NVME Namespace is start with "1" */
  865. for (rt_uint32_t nsid = 1; nsid <= number_of_ns; ++nsid)
  866. {
  867. struct rt_nvme_device *ndev = rt_calloc(1, sizeof(*ndev));
  868. if (!ndev)
  869. {
  870. err = -RT_ENOMEM;
  871. goto _free_res;
  872. }
  873. rt_memset(id, 0, sizeof(*id));
  874. if ((err = nvme_identify(nvme, nsid, 0, id)))
  875. {
  876. goto _free_res;
  877. }
  878. if (!id->nsze)
  879. {
  880. continue;
  881. }
  882. ndev->ctrl = nvme;
  883. rt_memcpy(&ndev->id, id, sizeof(ndev->id));
  884. lbaf = id->flbas & RT_NVME_NS_FLBAS_LBA_MASK;
  885. lbaf |= ((id->flbas & RT_NVME_NS_FLBAS_LBA_UMASK) >> RT_NVME_NS_FLBAS_LBA_SHIFT);
  886. ndev->nsid = nsid;
  887. ndev->lba_shift = id->lbaf[lbaf].ds;
  888. ndev->parent.ida = &nvme_ida;
  889. ndev->parent.parallel_io = RT_TRUE;
  890. ndev->parent.ops = &nvme_blk_ops;
  891. ndev->parent.max_partitions = RT_BLK_PARTITION_MAX;
  892. rt_dm_dev_set_name(&ndev->parent.parent, "%sn%u", nvme->name, nsid);
  893. if ((err = rt_hw_blk_disk_register(&ndev->parent)))
  894. {
  895. goto _free_res;
  896. }
  897. rt_list_init(&ndev->list);
  898. rt_list_insert_before(&nvme->ns_nodes, &ndev->list);
  899. }
  900. _free_res:
  901. rt_free_align(id);
  902. return err;
  903. }
  904. rt_inline rt_size_t strip_len(const char *str, rt_size_t max_len)
  905. {
  906. rt_size_t size = 0;
  907. for (int i = 0; *str && i < max_len; ++i, ++str)
  908. {
  909. if (*str != ' ')
  910. {
  911. size = i + 1;
  912. }
  913. }
  914. return size;
  915. }
  916. rt_err_t rt_nvme_controller_register(struct rt_nvme_controller *nvme)
  917. {
  918. rt_err_t err;
  919. struct rt_nvme_id_ctrl *ctrl = RT_NULL;
  920. if (!nvme || !nvme->ops)
  921. {
  922. return -RT_EINVAL;
  923. }
  924. if (nvme_readl(nvme, RT_NVME_REG_CSTS) == (rt_uint32_t)-1)
  925. {
  926. LOG_E("Out of memory");
  927. return -RT_EINVAL;
  928. }
  929. if ((nvme->nvme_id = rt_dm_ida_alloc(&nvme_controller_ida)) < 0)
  930. {
  931. return -RT_EFULL;
  932. }
  933. rt_snprintf(nvme->name, RT_NAME_MAX, "nvme%u", nvme->nvme_id);
  934. nvme->cap = nvme_readq(nvme, RT_NVME_REG_CAP);
  935. nvme->queue_depth = RT_NVME_CAP_MQES(nvme->cap) + 1;
  936. nvme->doorbell_stride = 1 << RT_NVME_CAP_STRIDE(nvme->cap);
  937. nvme->doorbell_tbl = nvme->regs + RT_NVME_REG_DBS;
  938. if ((err = nvme_configure_admin_queue(nvme)))
  939. {
  940. LOG_E("Configure admin queue error = %s", rt_strerror(err));
  941. goto _free_admin_queue;
  942. }
  943. if ((err = nvme_setup_io_queues(nvme)))
  944. {
  945. LOG_E("Unable to setup I/O queues error = %s", rt_strerror(err));
  946. goto _free_admin_queue;
  947. }
  948. if (!(ctrl = rt_malloc_align(sizeof(*ctrl), nvme->page_size)))
  949. {
  950. err = -RT_ENOMEM;
  951. goto _fail;
  952. }
  953. if ((err = nvme_identify(nvme, 0, 1, ctrl)))
  954. {
  955. goto _fail;
  956. }
  957. if (ctrl->mdts)
  958. {
  959. nvme->max_transfer_shift = ctrl->mdts + (RT_NVME_CAP_MPSMIN(nvme->cap) + 12);
  960. }
  961. else
  962. {
  963. /* 1MB is recommended. */
  964. nvme->max_transfer_shift = 20;
  965. }
  966. nvme->volatile_write_cache = ctrl->vwc;
  967. nvme->write_zeroes = !!(rt_le64_to_cpu(ctrl->oncs) & RT_NVME_CTRL_ONCS_WRITE_ZEROES);
  968. if ((rt_le32_to_cpu(ctrl->sgls) & RT_NVME_ID_SGL_SUPPORT_MASK))
  969. {
  970. nvme->sgl_mode = RT_NVME_PSDT_SGL_MPTR_SGL;
  971. }
  972. LOG_I("NVM Express v%d.%d (%s, %-*.s, %-*.s)",
  973. nvme_readl(nvme, RT_NVME_REG_VS) >> 16,
  974. nvme_readl(nvme, RT_NVME_REG_VS) & 0xff,
  975. nvme->ops->name,
  976. strip_len(ctrl->mn, sizeof(ctrl->mn)), ctrl->mn,
  977. strip_len(ctrl->fr, sizeof(ctrl->fr)), ctrl->fr);
  978. rt_list_init(&nvme->ns_nodes);
  979. if ((err = nvme_scan_device(nvme, rt_le32_to_cpu(ctrl->nn))))
  980. {
  981. goto _fail;
  982. }
  983. rt_free_align(ctrl);
  984. rt_spin_lock(&nvme_lock);
  985. rt_list_insert_after(&nvme_nodes, &nvme->list);
  986. rt_spin_unlock(&nvme_lock);
  987. return RT_EOK;
  988. _fail:
  989. if (ctrl)
  990. {
  991. rt_free_align(ctrl);
  992. }
  993. nvme_remove_devices(nvme);
  994. nvme_remove_io_queues(nvme);
  995. _free_admin_queue:
  996. nvme_remove_admin_queues(nvme);
  997. rt_dm_ida_free(&nvme_controller_ida, nvme->nvme_id);
  998. return err;
  999. }
  1000. rt_err_t rt_nvme_controller_unregister(struct rt_nvme_controller *nvme)
  1001. {
  1002. rt_err_t err;
  1003. if (!nvme)
  1004. {
  1005. return -RT_EINVAL;
  1006. }
  1007. rt_spin_lock(&nvme_lock);
  1008. rt_list_remove(&nvme->list);
  1009. rt_spin_unlock(&nvme_lock);
  1010. nvme_remove_devices(nvme);
  1011. nvme_remove_io_queues(nvme);
  1012. nvme_remove_admin_queues(nvme);
  1013. rt_dm_ida_free(&nvme_controller_ida, nvme->nvme_id);
  1014. if (!(err = nvme_shutdown_ctrl(nvme)))
  1015. {
  1016. err = nvme_disable_ctrl(nvme);
  1017. }
  1018. else
  1019. {
  1020. LOG_E("%s: shutdown error = %s", nvme->name, rt_strerror(err));
  1021. }
  1022. return err;
  1023. }
  1024. /*
  1025. * NVME's IO queue should be Per-CPU, fixup the affinity after the secondary CPU
  1026. * startup, this stage can make sure the affinity setting success as possible.
  1027. */
  1028. static int nvme_queue_affinify_fixup(void)
  1029. {
  1030. int cpuid = rt_hw_cpu_id();
  1031. struct rt_nvme_controller *nvme;
  1032. RT_IRQ_AFFINITY_DECLARE(affinity) = { 0 };
  1033. RT_IRQ_AFFINITY_DECLARE(current_affinity) = { 0 };
  1034. RT_IRQ_AFFINITY_SET(affinity, cpuid);
  1035. rt_hw_spin_lock(&nvme_lock.lock);
  1036. rt_list_for_each_entry(nvme, &nvme_nodes, list)
  1037. {
  1038. for (int i = cpuid % RT_CPUS_NR; i < nvme->io_queue_max; i += RT_CPUS_NR)
  1039. {
  1040. int irq = nvme->irqs[i];
  1041. if (!rt_pic_irq_get_affinity(irq, current_affinity) &&
  1042. !rt_bitmap_test_bit(current_affinity, cpuid))
  1043. {
  1044. rt_ubase_t level = rt_hw_interrupt_disable();
  1045. rt_pic_irq_set_affinity(irq, affinity);
  1046. rt_hw_interrupt_enable(level);
  1047. }
  1048. }
  1049. }
  1050. rt_hw_spin_unlock(&nvme_lock.lock);
  1051. return 0;
  1052. }
  1053. INIT_SECONDARY_CPU_EXPORT(nvme_queue_affinify_fixup);