LCOV - code coverage report
Current view: top level - lib/ublk - ublk.c (source / functions) Hit Total Coverage
Test: ut_cov_unit.info Lines: 0 1205 0.0 %
Date: 2024-12-14 23:42:31 Functions: 0 80 0.0 %

          Line data    Source code
       1             : /*   SPDX-License-Identifier: BSD-3-Clause
       2             :  *   Copyright (C) 2022 Intel Corporation.
       3             :  *   All rights reserved.
       4             :  */
       5             : 
       6             : #include <liburing.h>
       7             : 
       8             : #include "spdk/stdinc.h"
       9             : #include "spdk/string.h"
      10             : #include "spdk/bdev.h"
      11             : #include "spdk/endian.h"
      12             : #include "spdk/env.h"
      13             : #include "spdk/likely.h"
      14             : #include "spdk/log.h"
      15             : #include "spdk/util.h"
      16             : #include "spdk/queue.h"
      17             : #include "spdk/json.h"
      18             : #include "spdk/ublk.h"
      19             : #include "spdk/thread.h"
      20             : 
      21             : #include "ublk_internal.h"
      22             : 
      23             : #define UBLK_CTRL_DEV                                   "/dev/ublk-control"
      24             : #define UBLK_BLK_CDEV                                   "/dev/ublkc"
      25             : 
      26             : #define LINUX_SECTOR_SHIFT                              9
      27             : #define UBLK_IO_MAX_BYTES                               SPDK_BDEV_LARGE_BUF_MAX_SIZE
      28             : #define UBLK_DEV_MAX_QUEUES                             32
      29             : #define UBLK_DEV_MAX_QUEUE_DEPTH                        1024
      30             : #define UBLK_QUEUE_REQUEST                              32
      31             : #define UBLK_STOP_BUSY_WAITING_MS                       10000
      32             : #define UBLK_BUSY_POLLING_INTERVAL_US                   20000
      33             : #define UBLK_DEFAULT_CTRL_URING_POLLING_INTERVAL_US     1000
      34             : /* By default, kernel ublk_drv driver can support up to 64 block devices */
      35             : #define UBLK_DEFAULT_MAX_SUPPORTED_DEVS                 64
      36             : 
      37             : #define UBLK_IOBUF_SMALL_CACHE_SIZE                     128
      38             : #define UBLK_IOBUF_LARGE_CACHE_SIZE                     32
      39             : 
      40             : #define UBLK_DEBUGLOG(ublk, format, ...) \
      41             :         SPDK_DEBUGLOG(ublk, "ublk%d: " format, ublk->ublk_id, ##__VA_ARGS__);
      42             : 
      43             : static uint32_t g_num_ublk_poll_groups = 0;
      44             : static uint32_t g_next_ublk_poll_group = 0;
      45             : static uint32_t g_ublks_max = UBLK_DEFAULT_MAX_SUPPORTED_DEVS;
      46             : static struct spdk_cpuset g_core_mask;
      47             : 
      48             : struct ublk_queue;
      49             : struct ublk_poll_group;
      50             : struct ublk_io;
      51             : static void _ublk_submit_bdev_io(struct ublk_queue *q, struct ublk_io *io);
      52             : static void ublk_dev_queue_fini(struct ublk_queue *q);
      53             : static int ublk_poll(void *arg);
      54             : 
      55             : static int ublk_set_params(struct spdk_ublk_dev *ublk);
      56             : static int ublk_start_dev(struct spdk_ublk_dev *ublk, bool is_recovering);
      57             : static void ublk_free_dev(struct spdk_ublk_dev *ublk);
      58             : static void ublk_delete_dev(void *arg);
      59             : static int ublk_close_dev(struct spdk_ublk_dev *ublk);
      60             : static int ublk_ctrl_start_recovery(struct spdk_ublk_dev *ublk);
      61             : 
      62             : static int ublk_ctrl_cmd_submit(struct spdk_ublk_dev *ublk, uint32_t cmd_op);
      63             : 
      64             : static const char *ublk_op_name[64] = {
      65             :         [UBLK_CMD_GET_DEV_INFO] = "UBLK_CMD_GET_DEV_INFO",
      66             :         [UBLK_CMD_ADD_DEV] =    "UBLK_CMD_ADD_DEV",
      67             :         [UBLK_CMD_DEL_DEV] =    "UBLK_CMD_DEL_DEV",
      68             :         [UBLK_CMD_START_DEV] =  "UBLK_CMD_START_DEV",
      69             :         [UBLK_CMD_STOP_DEV] =   "UBLK_CMD_STOP_DEV",
      70             :         [UBLK_CMD_SET_PARAMS] = "UBLK_CMD_SET_PARAMS",
      71             :         [UBLK_CMD_START_USER_RECOVERY] = "UBLK_CMD_START_USER_RECOVERY",
      72             :         [UBLK_CMD_END_USER_RECOVERY] = "UBLK_CMD_END_USER_RECOVERY",
      73             : };
      74             : 
      75             : typedef void (*ublk_get_buf_cb)(struct ublk_io *io);
      76             : 
      77             : struct ublk_io {
      78             :         void                    *payload;
      79             :         void                    *mpool_entry;
      80             :         bool                    need_data;
      81             :         bool                    user_copy;
      82             :         uint16_t                tag;
      83             :         uint64_t                payload_size;
      84             :         uint32_t                cmd_op;
      85             :         int32_t                 result;
      86             :         struct spdk_bdev_desc   *bdev_desc;
      87             :         struct spdk_io_channel  *bdev_ch;
      88             :         const struct ublksrv_io_desc    *iod;
      89             :         ublk_get_buf_cb         get_buf_cb;
      90             :         struct ublk_queue       *q;
      91             :         /* for bdev io_wait */
      92             :         struct spdk_bdev_io_wait_entry bdev_io_wait;
      93             :         struct spdk_iobuf_entry iobuf;
      94             : 
      95             :         TAILQ_ENTRY(ublk_io)    tailq;
      96             : };
      97             : 
      98             : struct ublk_queue {
      99             :         uint32_t                q_id;
     100             :         uint32_t                q_depth;
     101             :         struct ublk_io          *ios;
     102             :         TAILQ_HEAD(, ublk_io)   completed_io_list;
     103             :         TAILQ_HEAD(, ublk_io)   inflight_io_list;
     104             :         uint32_t                cmd_inflight;
     105             :         bool                    is_stopping;
     106             :         struct ublksrv_io_desc  *io_cmd_buf;
     107             :         /* ring depth == dev_info->queue_depth. */
     108             :         struct io_uring         ring;
     109             :         struct spdk_ublk_dev    *dev;
     110             :         struct ublk_poll_group  *poll_group;
     111             :         struct spdk_io_channel  *bdev_ch;
     112             : 
     113             :         TAILQ_ENTRY(ublk_queue) tailq;
     114             : };
     115             : 
     116             : struct spdk_ublk_dev {
     117             :         struct spdk_bdev        *bdev;
     118             :         struct spdk_bdev_desc   *bdev_desc;
     119             : 
     120             :         int                     cdev_fd;
     121             :         struct ublk_params      dev_params;
     122             :         struct ublksrv_ctrl_dev_info    dev_info;
     123             : 
     124             :         uint32_t                ublk_id;
     125             :         uint32_t                num_queues;
     126             :         uint32_t                queue_depth;
     127             :         uint32_t                online_num_queues;
     128             :         uint32_t                sector_per_block_shift;
     129             :         struct ublk_queue       queues[UBLK_DEV_MAX_QUEUES];
     130             : 
     131             :         struct spdk_poller      *retry_poller;
     132             :         int                     retry_count;
     133             :         uint32_t                queues_closed;
     134             :         ublk_ctrl_cb            ctrl_cb;
     135             :         void                    *cb_arg;
     136             :         uint32_t                current_cmd_op;
     137             :         uint32_t                ctrl_ops_in_progress;
     138             :         bool                    is_closing;
     139             :         bool                    is_recovering;
     140             : 
     141             :         TAILQ_ENTRY(spdk_ublk_dev) tailq;
     142             :         TAILQ_ENTRY(spdk_ublk_dev) wait_tailq;
     143             : };
     144             : 
     145             : struct ublk_poll_group {
     146             :         struct spdk_thread              *ublk_thread;
     147             :         struct spdk_poller              *ublk_poller;
     148             :         struct spdk_iobuf_channel       iobuf_ch;
     149             :         TAILQ_HEAD(, ublk_queue)        queue_list;
     150             : };
     151             : 
     152             : struct ublk_tgt {
     153             :         int                     ctrl_fd;
     154             :         bool                    active;
     155             :         bool                    is_destroying;
     156             :         spdk_ublk_fini_cb       cb_fn;
     157             :         void                    *cb_arg;
     158             :         struct io_uring         ctrl_ring;
     159             :         struct spdk_poller      *ctrl_poller;
     160             :         uint32_t                ctrl_ops_in_progress;
     161             :         struct ublk_poll_group  *poll_groups;
     162             :         uint32_t                num_ublk_devs;
     163             :         uint64_t                features;
     164             :         /* `ublk_drv` supports UBLK_F_CMD_IOCTL_ENCODE */
     165             :         bool                    ioctl_encode;
     166             :         /* `ublk_drv` supports UBLK_F_USER_COPY */
     167             :         bool                    user_copy;
     168             :         /* `ublk_drv` supports UBLK_F_USER_RECOVERY */
     169             :         bool                    user_recovery;
     170             : };
     171             : 
     172             : static TAILQ_HEAD(, spdk_ublk_dev) g_ublk_devs = TAILQ_HEAD_INITIALIZER(g_ublk_devs);
     173             : static struct ublk_tgt g_ublk_tgt;
     174             : 
     175             : /* helpers for using io_uring */
     176             : static inline int
     177           0 : ublk_setup_ring(uint32_t depth, struct io_uring *r, unsigned flags)
     178             : {
     179           0 :         struct io_uring_params p = {};
     180             : 
     181           0 :         p.flags = flags | IORING_SETUP_CQSIZE;
     182           0 :         p.cq_entries = depth;
     183             : 
     184           0 :         return io_uring_queue_init_params(depth, r, &p);
     185           0 : }
     186             : 
     187             : static inline struct io_uring_sqe *
     188           0 : ublk_uring_get_sqe(struct io_uring *r, uint32_t idx)
     189             : {
     190             :         /* Need to update the idx since we set IORING_SETUP_SQE128 parameter in ublk_setup_ring */
     191           0 :         return &r->sq.sqes[idx << 1];
     192             : }
     193             : 
     194             : static inline void *
     195           0 : ublk_get_sqe_cmd(struct io_uring_sqe *sqe)
     196             : {
     197           0 :         return (void *)&sqe->addr3;
     198             : }
     199             : 
     200             : static inline void
     201           0 : ublk_set_sqe_cmd_op(struct io_uring_sqe *sqe, uint32_t cmd_op)
     202             : {
     203           0 :         uint32_t opc = cmd_op;
     204             : 
     205           0 :         if (g_ublk_tgt.ioctl_encode) {
     206           0 :                 switch (cmd_op) {
     207             :                 /* ctrl uring */
     208             :                 case UBLK_CMD_GET_DEV_INFO:
     209           0 :                         opc = _IOR('u', UBLK_CMD_GET_DEV_INFO, struct ublksrv_ctrl_cmd);
     210           0 :                         break;
     211             :                 case UBLK_CMD_ADD_DEV:
     212           0 :                         opc = _IOWR('u', UBLK_CMD_ADD_DEV, struct ublksrv_ctrl_cmd);
     213           0 :                         break;
     214             :                 case UBLK_CMD_DEL_DEV:
     215           0 :                         opc = _IOWR('u', UBLK_CMD_DEL_DEV, struct ublksrv_ctrl_cmd);
     216           0 :                         break;
     217             :                 case UBLK_CMD_START_DEV:
     218           0 :                         opc = _IOWR('u', UBLK_CMD_START_DEV, struct ublksrv_ctrl_cmd);
     219           0 :                         break;
     220             :                 case UBLK_CMD_STOP_DEV:
     221           0 :                         opc = _IOWR('u', UBLK_CMD_STOP_DEV, struct ublksrv_ctrl_cmd);
     222           0 :                         break;
     223             :                 case UBLK_CMD_SET_PARAMS:
     224           0 :                         opc = _IOWR('u', UBLK_CMD_SET_PARAMS, struct ublksrv_ctrl_cmd);
     225           0 :                         break;
     226             :                 case UBLK_CMD_START_USER_RECOVERY:
     227           0 :                         opc = _IOWR('u', UBLK_CMD_START_USER_RECOVERY, struct ublksrv_ctrl_cmd);
     228           0 :                         break;
     229             :                 case UBLK_CMD_END_USER_RECOVERY:
     230           0 :                         opc = _IOWR('u', UBLK_CMD_END_USER_RECOVERY, struct ublksrv_ctrl_cmd);
     231           0 :                         break;
     232             : 
     233             :                 /* io uring */
     234             :                 case UBLK_IO_FETCH_REQ:
     235           0 :                         opc = _IOWR('u', UBLK_IO_FETCH_REQ, struct ublksrv_io_cmd);
     236           0 :                         break;
     237             :                 case UBLK_IO_COMMIT_AND_FETCH_REQ:
     238           0 :                         opc = _IOWR('u', UBLK_IO_COMMIT_AND_FETCH_REQ, struct ublksrv_io_cmd);
     239           0 :                         break;
     240             :                 case UBLK_IO_NEED_GET_DATA:
     241           0 :                         opc = _IOWR('u', UBLK_IO_NEED_GET_DATA, struct ublksrv_io_cmd);
     242           0 :                         break;
     243             :                 default:
     244           0 :                         break;
     245             :                 }
     246           0 :         }
     247             : 
     248           0 :         sqe->off = opc;
     249           0 : }
     250             : 
     251             : static inline uint64_t
     252           0 : build_user_data(uint16_t tag, uint8_t op)
     253             : {
     254           0 :         assert(!(tag >> 16) && !(op >> 8));
     255             : 
     256           0 :         return tag | (op << 16);
     257             : }
     258             : 
     259             : static inline uint16_t
     260           0 : user_data_to_tag(uint64_t user_data)
     261             : {
     262           0 :         return user_data & 0xffff;
     263             : }
     264             : 
     265             : static inline uint8_t
     266           0 : user_data_to_op(uint64_t user_data)
     267             : {
     268           0 :         return (user_data >> 16) & 0xff;
     269             : }
     270             : 
     271             : static inline uint64_t
     272           0 : ublk_user_copy_pos(uint16_t q_id, uint16_t tag)
     273             : {
     274           0 :         return (uint64_t)UBLKSRV_IO_BUF_OFFSET + ((((uint64_t)q_id) << UBLK_QID_OFF) | (((
     275           0 :                                 uint64_t)tag) << UBLK_TAG_OFF));
     276             : }
     277             : 
     278             : void
     279           0 : spdk_ublk_init(void)
     280             : {
     281           0 :         assert(spdk_thread_is_app_thread(NULL));
     282             : 
     283           0 :         g_ublk_tgt.ctrl_fd = -1;
     284           0 :         g_ublk_tgt.ctrl_ring.ring_fd = -1;
     285           0 : }
     286             : 
     287             : static void
     288           0 : ublk_ctrl_cmd_error(struct spdk_ublk_dev *ublk, int32_t res)
     289             : {
     290           0 :         assert(res != 0);
     291             : 
     292           0 :         SPDK_ERRLOG("ctrlr cmd %s failed, %s\n", ublk_op_name[ublk->current_cmd_op], spdk_strerror(-res));
     293           0 :         if (ublk->ctrl_cb) {
     294           0 :                 ublk->ctrl_cb(ublk->cb_arg, res);
     295           0 :                 ublk->ctrl_cb = NULL;
     296           0 :         }
     297             : 
     298           0 :         switch (ublk->current_cmd_op) {
     299             :         case UBLK_CMD_ADD_DEV:
     300             :         case UBLK_CMD_SET_PARAMS:
     301             :         case UBLK_CMD_START_USER_RECOVERY:
     302             :         case UBLK_CMD_END_USER_RECOVERY:
     303           0 :                 ublk_delete_dev(ublk);
     304           0 :                 break;
     305             :         case UBLK_CMD_START_DEV:
     306           0 :                 ublk_close_dev(ublk);
     307           0 :                 break;
     308             :         case UBLK_CMD_GET_DEV_INFO:
     309           0 :                 ublk_free_dev(ublk);
     310           0 :                 break;
     311             :         case UBLK_CMD_STOP_DEV:
     312             :         case UBLK_CMD_DEL_DEV:
     313           0 :                 break;
     314             :         default:
     315           0 :                 SPDK_ERRLOG("No match cmd operation,cmd_op = %d\n", ublk->current_cmd_op);
     316           0 :                 break;
     317             :         }
     318           0 : }
     319             : 
     320             : static void
     321           0 : ublk_ctrl_process_cqe(struct io_uring_cqe *cqe)
     322             : {
     323           0 :         struct spdk_ublk_dev *ublk;
     324           0 :         int rc = 0;
     325             : 
     326           0 :         ublk = (struct spdk_ublk_dev *)cqe->user_data;
     327           0 :         UBLK_DEBUGLOG(ublk, "ctrl cmd %s completed\n", ublk_op_name[ublk->current_cmd_op]);
     328           0 :         ublk->ctrl_ops_in_progress--;
     329             : 
     330           0 :         if (spdk_unlikely(cqe->res != 0)) {
     331           0 :                 ublk_ctrl_cmd_error(ublk, cqe->res);
     332           0 :                 return;
     333             :         }
     334             : 
     335           0 :         switch (ublk->current_cmd_op) {
     336             :         case UBLK_CMD_ADD_DEV:
     337           0 :                 rc = ublk_set_params(ublk);
     338           0 :                 if (rc < 0) {
     339           0 :                         ublk_delete_dev(ublk);
     340           0 :                         goto cb_done;
     341             :                 }
     342           0 :                 break;
     343             :         case UBLK_CMD_SET_PARAMS:
     344           0 :                 rc = ublk_start_dev(ublk, false);
     345           0 :                 if (rc < 0) {
     346           0 :                         ublk_delete_dev(ublk);
     347           0 :                         goto cb_done;
     348             :                 }
     349           0 :                 break;
     350             :         case UBLK_CMD_START_DEV:
     351           0 :                 goto cb_done;
     352             :                 break;
     353             :         case UBLK_CMD_STOP_DEV:
     354           0 :                 break;
     355             :         case UBLK_CMD_DEL_DEV:
     356           0 :                 if (ublk->ctrl_cb) {
     357           0 :                         ublk->ctrl_cb(ublk->cb_arg, 0);
     358           0 :                         ublk->ctrl_cb = NULL;
     359           0 :                 }
     360           0 :                 ublk_free_dev(ublk);
     361           0 :                 break;
     362             :         case UBLK_CMD_GET_DEV_INFO:
     363           0 :                 rc = ublk_ctrl_start_recovery(ublk);
     364           0 :                 if (rc < 0) {
     365           0 :                         ublk_delete_dev(ublk);
     366           0 :                         goto cb_done;
     367             :                 }
     368           0 :                 break;
     369             :         case UBLK_CMD_START_USER_RECOVERY:
     370           0 :                 rc = ublk_start_dev(ublk, true);
     371           0 :                 if (rc < 0) {
     372           0 :                         ublk_delete_dev(ublk);
     373           0 :                         goto cb_done;
     374             :                 }
     375           0 :                 break;
     376             :         case UBLK_CMD_END_USER_RECOVERY:
     377           0 :                 SPDK_NOTICELOG("Ublk %u recover done successfully\n", ublk->ublk_id);
     378           0 :                 ublk->is_recovering = false;
     379           0 :                 goto cb_done;
     380             :                 break;
     381             :         default:
     382           0 :                 SPDK_ERRLOG("No match cmd operation,cmd_op = %d\n", ublk->current_cmd_op);
     383           0 :                 break;
     384             :         }
     385             : 
     386           0 :         return;
     387             : 
     388             : cb_done:
     389           0 :         if (ublk->ctrl_cb) {
     390           0 :                 ublk->ctrl_cb(ublk->cb_arg, rc);
     391           0 :                 ublk->ctrl_cb = NULL;
     392           0 :         }
     393           0 : }
     394             : 
     395             : static int
     396           0 : ublk_ctrl_poller(void *arg)
     397             : {
     398           0 :         struct io_uring *ring = &g_ublk_tgt.ctrl_ring;
     399           0 :         struct io_uring_cqe *cqe;
     400           0 :         const int max = 8;
     401           0 :         int i, count = 0, rc;
     402             : 
     403           0 :         if (!g_ublk_tgt.ctrl_ops_in_progress) {
     404           0 :                 return SPDK_POLLER_IDLE;
     405             :         }
     406             : 
     407           0 :         for (i = 0; i < max; i++) {
     408           0 :                 rc = io_uring_peek_cqe(ring, &cqe);
     409           0 :                 if (rc == -EAGAIN) {
     410           0 :                         break;
     411             :                 }
     412             : 
     413           0 :                 assert(cqe != NULL);
     414           0 :                 g_ublk_tgt.ctrl_ops_in_progress--;
     415             : 
     416           0 :                 ublk_ctrl_process_cqe(cqe);
     417             : 
     418           0 :                 io_uring_cqe_seen(ring, cqe);
     419           0 :                 count++;
     420           0 :         }
     421             : 
     422           0 :         return count > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
     423           0 : }
     424             : 
     425             : static int
     426           0 : ublk_ctrl_cmd_submit(struct spdk_ublk_dev *ublk, uint32_t cmd_op)
     427             : {
     428           0 :         uint32_t dev_id = ublk->ublk_id;
     429           0 :         int rc = -EINVAL;
     430           0 :         struct io_uring_sqe *sqe;
     431           0 :         struct ublksrv_ctrl_cmd *cmd;
     432             : 
     433           0 :         UBLK_DEBUGLOG(ublk, "ctrl cmd %s\n", ublk_op_name[cmd_op]);
     434             : 
     435           0 :         sqe = io_uring_get_sqe(&g_ublk_tgt.ctrl_ring);
     436           0 :         if (!sqe) {
     437           0 :                 SPDK_ERRLOG("No available sqe in ctrl ring\n");
     438           0 :                 assert(false);
     439             :                 return -ENOENT;
     440             :         }
     441             : 
     442           0 :         cmd = (struct ublksrv_ctrl_cmd *)ublk_get_sqe_cmd(sqe);
     443           0 :         sqe->fd = g_ublk_tgt.ctrl_fd;
     444           0 :         sqe->opcode = IORING_OP_URING_CMD;
     445           0 :         sqe->ioprio = 0;
     446           0 :         cmd->dev_id = dev_id;
     447           0 :         cmd->queue_id = -1;
     448           0 :         ublk->current_cmd_op = cmd_op;
     449             : 
     450           0 :         switch (cmd_op) {
     451             :         case UBLK_CMD_ADD_DEV:
     452             :         case UBLK_CMD_GET_DEV_INFO:
     453           0 :                 cmd->addr = (__u64)(uintptr_t)&ublk->dev_info;
     454           0 :                 cmd->len = sizeof(ublk->dev_info);
     455           0 :                 break;
     456             :         case UBLK_CMD_SET_PARAMS:
     457           0 :                 cmd->addr = (__u64)(uintptr_t)&ublk->dev_params;
     458           0 :                 cmd->len = sizeof(ublk->dev_params);
     459           0 :                 break;
     460             :         case UBLK_CMD_START_DEV:
     461           0 :                 cmd->data[0] = getpid();
     462           0 :                 break;
     463             :         case UBLK_CMD_STOP_DEV:
     464           0 :                 break;
     465             :         case UBLK_CMD_DEL_DEV:
     466           0 :                 break;
     467             :         case UBLK_CMD_START_USER_RECOVERY:
     468           0 :                 break;
     469             :         case UBLK_CMD_END_USER_RECOVERY:
     470           0 :                 cmd->data[0] = getpid();
     471           0 :                 break;
     472             :         default:
     473           0 :                 SPDK_ERRLOG("No match cmd operation,cmd_op = %d\n", cmd_op);
     474           0 :                 return -EINVAL;
     475             :         }
     476           0 :         ublk_set_sqe_cmd_op(sqe, cmd_op);
     477           0 :         io_uring_sqe_set_data(sqe, ublk);
     478             : 
     479           0 :         rc = io_uring_submit(&g_ublk_tgt.ctrl_ring);
     480           0 :         if (rc < 0) {
     481           0 :                 SPDK_ERRLOG("uring submit rc %d\n", rc);
     482           0 :                 assert(false);
     483             :                 return rc;
     484             :         }
     485           0 :         g_ublk_tgt.ctrl_ops_in_progress++;
     486           0 :         ublk->ctrl_ops_in_progress++;
     487             : 
     488           0 :         return 0;
     489           0 : }
     490             : 
     491             : static int
     492           0 : ublk_ctrl_cmd_get_features(void)
     493             : {
     494           0 :         int rc;
     495           0 :         struct io_uring_sqe *sqe;
     496           0 :         struct io_uring_cqe *cqe;
     497           0 :         struct ublksrv_ctrl_cmd *cmd;
     498           0 :         uint32_t cmd_op;
     499             : 
     500           0 :         sqe = io_uring_get_sqe(&g_ublk_tgt.ctrl_ring);
     501           0 :         if (!sqe) {
     502           0 :                 SPDK_ERRLOG("No available sqe in ctrl ring\n");
     503           0 :                 assert(false);
     504             :                 return -ENOENT;
     505             :         }
     506             : 
     507           0 :         cmd = (struct ublksrv_ctrl_cmd *)ublk_get_sqe_cmd(sqe);
     508           0 :         sqe->fd = g_ublk_tgt.ctrl_fd;
     509           0 :         sqe->opcode = IORING_OP_URING_CMD;
     510           0 :         sqe->ioprio = 0;
     511           0 :         cmd->dev_id = -1;
     512           0 :         cmd->queue_id = -1;
     513           0 :         cmd->addr = (__u64)(uintptr_t)&g_ublk_tgt.features;
     514           0 :         cmd->len = sizeof(g_ublk_tgt.features);
     515             : 
     516           0 :         cmd_op = UBLK_U_CMD_GET_FEATURES;
     517           0 :         ublk_set_sqe_cmd_op(sqe, cmd_op);
     518             : 
     519           0 :         rc = io_uring_submit(&g_ublk_tgt.ctrl_ring);
     520           0 :         if (rc < 0) {
     521           0 :                 SPDK_ERRLOG("uring submit rc %d\n", rc);
     522           0 :                 return rc;
     523             :         }
     524             : 
     525           0 :         rc = io_uring_wait_cqe(&g_ublk_tgt.ctrl_ring, &cqe);
     526           0 :         if (rc < 0) {
     527           0 :                 SPDK_ERRLOG("wait cqe rc %d\n", rc);
     528           0 :                 return rc;
     529             :         }
     530             : 
     531           0 :         if (cqe->res == 0) {
     532           0 :                 g_ublk_tgt.ioctl_encode = !!(g_ublk_tgt.features & UBLK_F_CMD_IOCTL_ENCODE);
     533           0 :                 g_ublk_tgt.user_copy = !!(g_ublk_tgt.features & UBLK_F_USER_COPY);
     534           0 :                 g_ublk_tgt.user_recovery = !!(g_ublk_tgt.features & UBLK_F_USER_RECOVERY);
     535           0 :         }
     536           0 :         io_uring_cqe_seen(&g_ublk_tgt.ctrl_ring, cqe);
     537             : 
     538           0 :         return 0;
     539           0 : }
     540             : 
     541             : static int
     542           0 : ublk_queue_cmd_buf_sz(uint32_t q_depth)
     543             : {
     544           0 :         uint32_t size = q_depth * sizeof(struct ublksrv_io_desc);
     545           0 :         uint32_t page_sz = getpagesize();
     546             : 
     547             :         /* round up size */
     548           0 :         return (size + page_sz - 1) & ~(page_sz - 1);
     549           0 : }
     550             : 
     551             : static int
     552           0 : ublk_get_max_support_devs(void)
     553             : {
     554           0 :         FILE *file;
     555           0 :         char str[128];
     556             : 
     557           0 :         file = fopen("/sys/module/ublk_drv/parameters/ublks_max", "r");
     558           0 :         if (!file) {
     559           0 :                 return -ENOENT;
     560             :         }
     561             : 
     562           0 :         if (!fgets(str, sizeof(str), file)) {
     563           0 :                 fclose(file);
     564           0 :                 return -EINVAL;
     565             :         }
     566           0 :         fclose(file);
     567             : 
     568           0 :         spdk_str_chomp(str);
     569           0 :         return spdk_strtol(str, 10);
     570           0 : }
     571             : 
     572             : static int
     573           0 : ublk_open(void)
     574             : {
     575           0 :         int rc, ublks_max;
     576             : 
     577           0 :         g_ublk_tgt.ctrl_fd = open(UBLK_CTRL_DEV, O_RDWR);
     578           0 :         if (g_ublk_tgt.ctrl_fd < 0) {
     579           0 :                 rc = errno;
     580           0 :                 SPDK_ERRLOG("UBLK conrol dev %s can't be opened, error=%s\n", UBLK_CTRL_DEV, spdk_strerror(errno));
     581           0 :                 return -rc;
     582             :         }
     583             : 
     584           0 :         ublks_max = ublk_get_max_support_devs();
     585           0 :         if (ublks_max > 0) {
     586           0 :                 g_ublks_max = ublks_max;
     587           0 :         }
     588             : 
     589             :         /* We need to set SQPOLL for kernels 6.1 and earlier, since they would not defer ublk ctrl
     590             :          * ring processing to a workqueue.  Ctrl ring processing is minimal, so SQPOLL is fine.
     591             :          * All the commands sent via control uring for a ublk device is executed one by one, so use
     592             :          * ublks_max * 2 as the number of uring entries is enough.
     593             :          */
     594           0 :         rc = ublk_setup_ring(g_ublks_max * 2, &g_ublk_tgt.ctrl_ring,
     595           0 :                              IORING_SETUP_SQE128 | IORING_SETUP_SQPOLL);
     596           0 :         if (rc < 0) {
     597           0 :                 SPDK_ERRLOG("UBLK ctrl queue_init: %s\n", spdk_strerror(-rc));
     598           0 :                 goto err;
     599             :         }
     600             : 
     601           0 :         rc = ublk_ctrl_cmd_get_features();
     602           0 :         if (rc) {
     603           0 :                 goto err;
     604             :         }
     605             : 
     606           0 :         return 0;
     607             : 
     608             : err:
     609           0 :         close(g_ublk_tgt.ctrl_fd);
     610           0 :         g_ublk_tgt.ctrl_fd = -1;
     611           0 :         return rc;
     612           0 : }
     613             : 
     614             : static int
     615           0 : ublk_parse_core_mask(const char *mask)
     616             : {
     617           0 :         struct spdk_cpuset tmp_mask;
     618           0 :         int rc;
     619             : 
     620           0 :         if (mask == NULL) {
     621           0 :                 spdk_env_get_cpuset(&g_core_mask);
     622           0 :                 return 0;
     623             :         }
     624             : 
     625           0 :         rc = spdk_cpuset_parse(&g_core_mask, mask);
     626           0 :         if (rc < 0) {
     627           0 :                 SPDK_ERRLOG("invalid cpumask %s\n", mask);
     628           0 :                 return -EINVAL;
     629             :         }
     630             : 
     631           0 :         if (spdk_cpuset_count(&g_core_mask) == 0) {
     632           0 :                 SPDK_ERRLOG("no cpus specified\n");
     633           0 :                 return -EINVAL;
     634             :         }
     635             : 
     636           0 :         spdk_env_get_cpuset(&tmp_mask);
     637           0 :         spdk_cpuset_and(&tmp_mask, &g_core_mask);
     638             : 
     639           0 :         if (!spdk_cpuset_equal(&tmp_mask, &g_core_mask)) {
     640           0 :                 SPDK_ERRLOG("one of selected cpu is outside of core mask(=%s)\n",
     641             :                             spdk_cpuset_fmt(&g_core_mask));
     642           0 :                 return -EINVAL;
     643             :         }
     644             : 
     645           0 :         return 0;
     646           0 : }
     647             : 
     648             : static void
     649           0 : ublk_poller_register(void *args)
     650             : {
     651           0 :         struct ublk_poll_group *poll_group = args;
     652           0 :         int rc;
     653             : 
     654           0 :         assert(spdk_get_thread() == poll_group->ublk_thread);
     655             :         /* Bind ublk spdk_thread to current CPU core in order to avoid thread context switch
     656             :          * during uring processing as required by ublk kernel.
     657             :          */
     658           0 :         spdk_thread_bind(spdk_get_thread(), true);
     659             : 
     660           0 :         TAILQ_INIT(&poll_group->queue_list);
     661           0 :         poll_group->ublk_poller = SPDK_POLLER_REGISTER(ublk_poll, poll_group, 0);
     662           0 :         rc = spdk_iobuf_channel_init(&poll_group->iobuf_ch, "ublk",
     663             :                                      UBLK_IOBUF_SMALL_CACHE_SIZE, UBLK_IOBUF_LARGE_CACHE_SIZE);
     664           0 :         if (rc != 0) {
     665           0 :                 assert(false);
     666             :         }
     667           0 : }
     668             : 
     669             : int
     670           0 : ublk_create_target(const char *cpumask_str)
     671             : {
     672           0 :         int rc;
     673           0 :         uint32_t i;
     674           0 :         char thread_name[32];
     675           0 :         struct ublk_poll_group *poll_group;
     676             : 
     677           0 :         if (g_ublk_tgt.active == true) {
     678           0 :                 SPDK_ERRLOG("UBLK target has been created\n");
     679           0 :                 return -EBUSY;
     680             :         }
     681             : 
     682           0 :         rc = ublk_parse_core_mask(cpumask_str);
     683           0 :         if (rc != 0) {
     684           0 :                 return rc;
     685             :         }
     686             : 
     687           0 :         assert(g_ublk_tgt.poll_groups == NULL);
     688           0 :         g_ublk_tgt.poll_groups = calloc(spdk_env_get_core_count(), sizeof(*poll_group));
     689           0 :         if (!g_ublk_tgt.poll_groups) {
     690           0 :                 return -ENOMEM;
     691             :         }
     692             : 
     693           0 :         rc = ublk_open();
     694           0 :         if (rc != 0) {
     695           0 :                 SPDK_ERRLOG("Fail to open UBLK, error=%s\n", spdk_strerror(-rc));
     696           0 :                 free(g_ublk_tgt.poll_groups);
     697           0 :                 g_ublk_tgt.poll_groups = NULL;
     698           0 :                 return rc;
     699             :         }
     700             : 
     701           0 :         spdk_iobuf_register_module("ublk");
     702             : 
     703           0 :         SPDK_ENV_FOREACH_CORE(i) {
     704           0 :                 if (!spdk_cpuset_get_cpu(&g_core_mask, i)) {
     705           0 :                         continue;
     706             :                 }
     707           0 :                 snprintf(thread_name, sizeof(thread_name), "ublk_thread%u", i);
     708           0 :                 poll_group = &g_ublk_tgt.poll_groups[g_num_ublk_poll_groups];
     709           0 :                 poll_group->ublk_thread = spdk_thread_create(thread_name, &g_core_mask);
     710           0 :                 spdk_thread_send_msg(poll_group->ublk_thread, ublk_poller_register, poll_group);
     711           0 :                 g_num_ublk_poll_groups++;
     712           0 :         }
     713             : 
     714           0 :         assert(spdk_thread_is_app_thread(NULL));
     715           0 :         g_ublk_tgt.active = true;
     716           0 :         g_ublk_tgt.ctrl_ops_in_progress = 0;
     717           0 :         g_ublk_tgt.ctrl_poller = SPDK_POLLER_REGISTER(ublk_ctrl_poller, NULL,
     718             :                                  UBLK_DEFAULT_CTRL_URING_POLLING_INTERVAL_US);
     719             : 
     720           0 :         SPDK_NOTICELOG("UBLK target created successfully\n");
     721             : 
     722           0 :         return 0;
     723           0 : }
     724             : 
     725             : static void
     726           0 : _ublk_fini_done(void *args)
     727             : {
     728           0 :         SPDK_DEBUGLOG(ublk, "\n");
     729             : 
     730           0 :         g_num_ublk_poll_groups = 0;
     731           0 :         g_next_ublk_poll_group = 0;
     732           0 :         g_ublk_tgt.is_destroying = false;
     733           0 :         g_ublk_tgt.active = false;
     734           0 :         g_ublk_tgt.features = 0;
     735           0 :         g_ublk_tgt.ioctl_encode = false;
     736           0 :         g_ublk_tgt.user_copy = false;
     737           0 :         g_ublk_tgt.user_recovery = false;
     738             : 
     739           0 :         if (g_ublk_tgt.cb_fn) {
     740           0 :                 g_ublk_tgt.cb_fn(g_ublk_tgt.cb_arg);
     741           0 :                 g_ublk_tgt.cb_fn = NULL;
     742           0 :                 g_ublk_tgt.cb_arg = NULL;
     743           0 :         }
     744             : 
     745           0 :         if (g_ublk_tgt.poll_groups) {
     746           0 :                 free(g_ublk_tgt.poll_groups);
     747           0 :                 g_ublk_tgt.poll_groups = NULL;
     748           0 :         }
     749             : 
     750           0 : }
     751             : 
     752             : static void
     753           0 : ublk_thread_exit(void *args)
     754             : {
     755           0 :         struct spdk_thread *ublk_thread = spdk_get_thread();
     756           0 :         uint32_t i;
     757             : 
     758           0 :         for (i = 0; i < g_num_ublk_poll_groups; i++) {
     759           0 :                 if (g_ublk_tgt.poll_groups[i].ublk_thread == ublk_thread) {
     760           0 :                         spdk_poller_unregister(&g_ublk_tgt.poll_groups[i].ublk_poller);
     761           0 :                         spdk_iobuf_channel_fini(&g_ublk_tgt.poll_groups[i].iobuf_ch);
     762           0 :                         spdk_thread_bind(ublk_thread, false);
     763           0 :                         spdk_thread_exit(ublk_thread);
     764           0 :                 }
     765           0 :         }
     766           0 : }
     767             : 
     768             : static int
     769           0 : ublk_close_dev(struct spdk_ublk_dev *ublk)
     770             : {
     771           0 :         int rc;
     772             : 
     773             :         /* set is_closing */
     774           0 :         if (ublk->is_closing) {
     775           0 :                 return -EBUSY;
     776             :         }
     777           0 :         ublk->is_closing = true;
     778             : 
     779           0 :         rc = ublk_ctrl_cmd_submit(ublk, UBLK_CMD_STOP_DEV);
     780           0 :         if (rc < 0) {
     781           0 :                 SPDK_ERRLOG("stop dev %d failed\n", ublk->ublk_id);
     782           0 :         }
     783           0 :         return rc;
     784           0 : }
     785             : 
     786             : static void
     787           0 : _ublk_fini(void *args)
     788             : {
     789           0 :         struct spdk_ublk_dev    *ublk, *ublk_tmp;
     790             : 
     791           0 :         TAILQ_FOREACH_SAFE(ublk, &g_ublk_devs, tailq, ublk_tmp) {
     792           0 :                 ublk_close_dev(ublk);
     793           0 :         }
     794             : 
     795             :         /* Check if all ublks closed */
     796           0 :         if (TAILQ_EMPTY(&g_ublk_devs)) {
     797           0 :                 SPDK_DEBUGLOG(ublk, "finish shutdown\n");
     798           0 :                 spdk_poller_unregister(&g_ublk_tgt.ctrl_poller);
     799           0 :                 if (g_ublk_tgt.ctrl_ring.ring_fd >= 0) {
     800           0 :                         io_uring_queue_exit(&g_ublk_tgt.ctrl_ring);
     801           0 :                         g_ublk_tgt.ctrl_ring.ring_fd = -1;
     802           0 :                 }
     803           0 :                 if (g_ublk_tgt.ctrl_fd >= 0) {
     804           0 :                         close(g_ublk_tgt.ctrl_fd);
     805           0 :                         g_ublk_tgt.ctrl_fd = -1;
     806           0 :                 }
     807           0 :                 spdk_for_each_thread(ublk_thread_exit, NULL, _ublk_fini_done);
     808           0 :         } else {
     809           0 :                 spdk_thread_send_msg(spdk_get_thread(), _ublk_fini, NULL);
     810             :         }
     811           0 : }
     812             : 
     813             : int
     814           0 : spdk_ublk_fini(spdk_ublk_fini_cb cb_fn, void *cb_arg)
     815             : {
     816           0 :         assert(spdk_thread_is_app_thread(NULL));
     817             : 
     818           0 :         if (g_ublk_tgt.is_destroying == true) {
     819             :                 /* UBLK target is being destroying */
     820           0 :                 return -EBUSY;
     821             :         }
     822           0 :         g_ublk_tgt.cb_fn = cb_fn;
     823           0 :         g_ublk_tgt.cb_arg = cb_arg;
     824           0 :         g_ublk_tgt.is_destroying = true;
     825           0 :         _ublk_fini(NULL);
     826             : 
     827           0 :         return 0;
     828           0 : }
     829             : 
     830             : int
     831           0 : ublk_destroy_target(spdk_ublk_fini_cb cb_fn, void *cb_arg)
     832             : {
     833           0 :         int rc;
     834             : 
     835           0 :         if (g_ublk_tgt.active == false) {
     836             :                 /* UBLK target has not been created */
     837           0 :                 return -ENOENT;
     838             :         }
     839             : 
     840           0 :         rc = spdk_ublk_fini(cb_fn, cb_arg);
     841             : 
     842           0 :         return rc;
     843           0 : }
     844             : 
     845             : struct spdk_ublk_dev *
     846           0 : ublk_dev_find_by_id(uint32_t ublk_id)
     847             : {
     848           0 :         struct spdk_ublk_dev *ublk;
     849             : 
     850             :         /* check whether ublk has already been registered by ublk path. */
     851           0 :         TAILQ_FOREACH(ublk, &g_ublk_devs, tailq) {
     852           0 :                 if (ublk->ublk_id == ublk_id) {
     853           0 :                         return ublk;
     854             :                 }
     855           0 :         }
     856             : 
     857           0 :         return NULL;
     858           0 : }
     859             : 
     860             : uint32_t
     861           0 : ublk_dev_get_id(struct spdk_ublk_dev *ublk)
     862             : {
     863           0 :         return ublk->ublk_id;
     864             : }
     865             : 
     866           0 : struct spdk_ublk_dev *ublk_dev_first(void)
     867             : {
     868           0 :         return TAILQ_FIRST(&g_ublk_devs);
     869             : }
     870             : 
     871           0 : struct spdk_ublk_dev *ublk_dev_next(struct spdk_ublk_dev *prev)
     872             : {
     873           0 :         return TAILQ_NEXT(prev, tailq);
     874             : }
     875             : 
     876             : uint32_t
     877           0 : ublk_dev_get_queue_depth(struct spdk_ublk_dev *ublk)
     878             : {
     879           0 :         return ublk->queue_depth;
     880             : }
     881             : 
     882             : uint32_t
     883           0 : ublk_dev_get_num_queues(struct spdk_ublk_dev *ublk)
     884             : {
     885           0 :         return ublk->num_queues;
     886             : }
     887             : 
     888             : const char *
     889           0 : ublk_dev_get_bdev_name(struct spdk_ublk_dev *ublk)
     890             : {
     891           0 :         return spdk_bdev_get_name(ublk->bdev);
     892             : }
     893             : 
     894             : void
     895           0 : spdk_ublk_write_config_json(struct spdk_json_write_ctx *w)
     896             : {
     897           0 :         struct spdk_ublk_dev *ublk;
     898             : 
     899           0 :         spdk_json_write_array_begin(w);
     900             : 
     901           0 :         if (g_ublk_tgt.active) {
     902           0 :                 spdk_json_write_object_begin(w);
     903             : 
     904           0 :                 spdk_json_write_named_string(w, "method", "ublk_create_target");
     905           0 :                 spdk_json_write_named_object_begin(w, "params");
     906           0 :                 spdk_json_write_named_string(w, "cpumask", spdk_cpuset_fmt(&g_core_mask));
     907           0 :                 spdk_json_write_object_end(w);
     908             : 
     909           0 :                 spdk_json_write_object_end(w);
     910           0 :         }
     911             : 
     912           0 :         TAILQ_FOREACH(ublk, &g_ublk_devs, tailq) {
     913           0 :                 spdk_json_write_object_begin(w);
     914             : 
     915           0 :                 spdk_json_write_named_string(w, "method", "ublk_start_disk");
     916             : 
     917           0 :                 spdk_json_write_named_object_begin(w, "params");
     918           0 :                 spdk_json_write_named_string(w, "bdev_name", ublk_dev_get_bdev_name(ublk));
     919           0 :                 spdk_json_write_named_uint32(w, "ublk_id", ublk->ublk_id);
     920           0 :                 spdk_json_write_named_uint32(w, "num_queues", ublk->num_queues);
     921           0 :                 spdk_json_write_named_uint32(w, "queue_depth", ublk->queue_depth);
     922           0 :                 spdk_json_write_object_end(w);
     923             : 
     924           0 :                 spdk_json_write_object_end(w);
     925           0 :         }
     926             : 
     927           0 :         spdk_json_write_array_end(w);
     928           0 : }
     929             : 
     930             : static void
     931           0 : ublk_dev_list_register(struct spdk_ublk_dev *ublk)
     932             : {
     933           0 :         UBLK_DEBUGLOG(ublk, "add to tailq\n");
     934           0 :         TAILQ_INSERT_TAIL(&g_ublk_devs, ublk, tailq);
     935           0 :         g_ublk_tgt.num_ublk_devs++;
     936           0 : }
     937             : 
     938             : static void
     939           0 : ublk_dev_list_unregister(struct spdk_ublk_dev *ublk)
     940             : {
     941             :         /*
     942             :          * ublk device may be stopped before registered.
     943             :          * check whether it was registered.
     944             :          */
     945             : 
     946           0 :         if (ublk_dev_find_by_id(ublk->ublk_id)) {
     947           0 :                 UBLK_DEBUGLOG(ublk, "remove from tailq\n");
     948           0 :                 TAILQ_REMOVE(&g_ublk_devs, ublk, tailq);
     949           0 :                 assert(g_ublk_tgt.num_ublk_devs);
     950           0 :                 g_ublk_tgt.num_ublk_devs--;
     951           0 :                 return;
     952             :         }
     953             : 
     954           0 :         UBLK_DEBUGLOG(ublk, "not found in tailq\n");
     955           0 :         assert(false);
     956             : }
     957             : 
     958             : static void
     959           0 : ublk_delete_dev(void *arg)
     960             : {
     961           0 :         struct spdk_ublk_dev *ublk = arg;
     962           0 :         int rc = 0;
     963           0 :         uint32_t q_idx;
     964             : 
     965           0 :         assert(spdk_thread_is_app_thread(NULL));
     966           0 :         for (q_idx = 0; q_idx < ublk->num_queues; q_idx++) {
     967           0 :                 ublk_dev_queue_fini(&ublk->queues[q_idx]);
     968           0 :         }
     969             : 
     970           0 :         if (ublk->cdev_fd >= 0) {
     971           0 :                 close(ublk->cdev_fd);
     972           0 :         }
     973             : 
     974           0 :         rc = ublk_ctrl_cmd_submit(ublk, UBLK_CMD_DEL_DEV);
     975           0 :         if (rc < 0) {
     976           0 :                 SPDK_ERRLOG("delete dev %d failed\n", ublk->ublk_id);
     977           0 :         }
     978           0 : }
     979             : 
     980             : static int
     981           0 : _ublk_close_dev_retry(void *arg)
     982             : {
     983           0 :         struct spdk_ublk_dev *ublk = arg;
     984             : 
     985           0 :         if (ublk->ctrl_ops_in_progress > 0) {
     986           0 :                 if (ublk->retry_count-- > 0) {
     987           0 :                         return SPDK_POLLER_BUSY;
     988             :                 }
     989           0 :                 SPDK_ERRLOG("Timeout on ctrl op completion.\n");
     990           0 :         }
     991           0 :         spdk_poller_unregister(&ublk->retry_poller);
     992           0 :         ublk_delete_dev(ublk);
     993           0 :         return SPDK_POLLER_BUSY;
     994           0 : }
     995             : 
     996             : static void
     997           0 : ublk_try_close_dev(void *arg)
     998             : {
     999           0 :         struct spdk_ublk_dev *ublk = arg;
    1000             : 
    1001           0 :         assert(spdk_thread_is_app_thread(NULL));
    1002             : 
    1003           0 :         ublk->queues_closed += 1;
    1004           0 :         SPDK_DEBUGLOG(ublk_io, "ublkb%u closed queues %u\n", ublk->ublk_id, ublk->queues_closed);
    1005             : 
    1006           0 :         if (ublk->queues_closed < ublk->num_queues) {
    1007           0 :                 return;
    1008             :         }
    1009             : 
    1010           0 :         if (ublk->ctrl_ops_in_progress > 0) {
    1011           0 :                 assert(ublk->retry_poller == NULL);
    1012           0 :                 ublk->retry_count = UBLK_STOP_BUSY_WAITING_MS * 1000ULL / UBLK_BUSY_POLLING_INTERVAL_US;
    1013           0 :                 ublk->retry_poller = SPDK_POLLER_REGISTER(_ublk_close_dev_retry, ublk,
    1014             :                                      UBLK_BUSY_POLLING_INTERVAL_US);
    1015           0 :         } else {
    1016           0 :                 ublk_delete_dev(ublk);
    1017             :         }
    1018           0 : }
    1019             : 
    1020             : static void
    1021           0 : ublk_try_close_queue(struct ublk_queue *q)
    1022             : {
    1023           0 :         struct spdk_ublk_dev *ublk = q->dev;
    1024             : 
    1025             :         /* Close queue until no I/O is submitted to bdev in flight,
    1026             :          * no I/O is waiting to commit result, and all I/Os are aborted back.
    1027             :          */
    1028           0 :         if (!TAILQ_EMPTY(&q->inflight_io_list) || !TAILQ_EMPTY(&q->completed_io_list) || q->cmd_inflight) {
    1029             :                 /* wait for next retry */
    1030           0 :                 return;
    1031             :         }
    1032             : 
    1033           0 :         TAILQ_REMOVE(&q->poll_group->queue_list, q, tailq);
    1034           0 :         spdk_put_io_channel(q->bdev_ch);
    1035           0 :         q->bdev_ch = NULL;
    1036             : 
    1037           0 :         spdk_thread_send_msg(spdk_thread_get_app_thread(), ublk_try_close_dev, ublk);
    1038           0 : }
    1039             : 
    1040             : int
    1041           0 : ublk_stop_disk(uint32_t ublk_id, ublk_ctrl_cb ctrl_cb, void *cb_arg)
    1042             : {
    1043           0 :         struct spdk_ublk_dev *ublk;
    1044             : 
    1045           0 :         assert(spdk_thread_is_app_thread(NULL));
    1046             : 
    1047           0 :         ublk = ublk_dev_find_by_id(ublk_id);
    1048           0 :         if (ublk == NULL) {
    1049           0 :                 SPDK_ERRLOG("no ublk dev with ublk_id=%u\n", ublk_id);
    1050           0 :                 return -ENODEV;
    1051             :         }
    1052           0 :         if (ublk->is_closing) {
    1053           0 :                 SPDK_WARNLOG("ublk %d is closing\n", ublk->ublk_id);
    1054           0 :                 return -EBUSY;
    1055             :         }
    1056           0 :         if (ublk->ctrl_cb) {
    1057           0 :                 SPDK_WARNLOG("ublk %d is busy with RPC call\n", ublk->ublk_id);
    1058           0 :                 return -EBUSY;
    1059             :         }
    1060             : 
    1061           0 :         ublk->ctrl_cb = ctrl_cb;
    1062           0 :         ublk->cb_arg = cb_arg;
    1063           0 :         return ublk_close_dev(ublk);
    1064           0 : }
    1065             : 
    1066             : static inline void
    1067           0 : ublk_mark_io_done(struct ublk_io *io, int res)
    1068             : {
    1069             :         /*
    1070             :          * mark io done by target, so that SPDK can commit its
    1071             :          * result and fetch new request via io_uring command.
    1072             :          */
    1073           0 :         io->cmd_op = UBLK_IO_COMMIT_AND_FETCH_REQ;
    1074           0 :         io->result = res;
    1075           0 :         io->need_data = false;
    1076           0 : }
    1077             : 
    1078             : static void
    1079           0 : ublk_io_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
    1080             : {
    1081           0 :         struct ublk_io  *io = cb_arg;
    1082           0 :         struct ublk_queue *q = io->q;
    1083           0 :         int res;
    1084             : 
    1085           0 :         if (success) {
    1086           0 :                 res = io->result;
    1087           0 :         } else {
    1088           0 :                 res = -EIO;
    1089             :         }
    1090             : 
    1091           0 :         ublk_mark_io_done(io, res);
    1092             : 
    1093           0 :         SPDK_DEBUGLOG(ublk_io, "(qid %d tag %d res %d)\n",
    1094             :                       q->q_id, io->tag, res);
    1095           0 :         TAILQ_REMOVE(&q->inflight_io_list, io, tailq);
    1096           0 :         TAILQ_INSERT_TAIL(&q->completed_io_list, io, tailq);
    1097             : 
    1098           0 :         if (bdev_io != NULL) {
    1099           0 :                 spdk_bdev_free_io(bdev_io);
    1100           0 :         }
    1101           0 : }
    1102             : 
    1103             : static void
    1104           0 : ublk_queue_user_copy(struct ublk_io *io, bool is_write)
    1105             : {
    1106           0 :         struct ublk_queue *q = io->q;
    1107           0 :         const struct ublksrv_io_desc *iod = io->iod;
    1108           0 :         struct io_uring_sqe *sqe;
    1109           0 :         uint64_t pos;
    1110           0 :         uint32_t nbytes;
    1111             : 
    1112           0 :         nbytes = iod->nr_sectors * (1ULL << LINUX_SECTOR_SHIFT);
    1113           0 :         pos = ublk_user_copy_pos(q->q_id, io->tag);
    1114           0 :         sqe = io_uring_get_sqe(&q->ring);
    1115           0 :         assert(sqe);
    1116             : 
    1117           0 :         if (is_write) {
    1118           0 :                 io_uring_prep_read(sqe, 0, io->payload, nbytes, pos);
    1119           0 :         } else {
    1120           0 :                 io_uring_prep_write(sqe, 0, io->payload, nbytes, pos);
    1121             :         }
    1122           0 :         io_uring_sqe_set_flags(sqe, IOSQE_FIXED_FILE);
    1123           0 :         io_uring_sqe_set_data64(sqe, build_user_data(io->tag, 0));
    1124             : 
    1125           0 :         io->user_copy = true;
    1126           0 :         TAILQ_REMOVE(&q->inflight_io_list, io, tailq);
    1127           0 :         TAILQ_INSERT_TAIL(&q->completed_io_list, io, tailq);
    1128           0 : }
    1129             : 
    1130             : static void
    1131           0 : ublk_user_copy_read_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
    1132             : {
    1133           0 :         struct ublk_io  *io = cb_arg;
    1134             : 
    1135           0 :         spdk_bdev_free_io(bdev_io);
    1136             : 
    1137           0 :         if (success) {
    1138           0 :                 ublk_queue_user_copy(io, false);
    1139           0 :                 return;
    1140             :         }
    1141             :         /* READ IO Error */
    1142           0 :         ublk_io_done(NULL, false, cb_arg);
    1143           0 : }
    1144             : 
    1145             : static void
    1146           0 : ublk_resubmit_io(void *arg)
    1147             : {
    1148           0 :         struct ublk_io *io = (struct ublk_io *)arg;
    1149             : 
    1150           0 :         _ublk_submit_bdev_io(io->q, io);
    1151           0 : }
    1152             : 
    1153             : static void
    1154           0 : ublk_queue_io(struct ublk_io *io)
    1155             : {
    1156           0 :         int rc;
    1157           0 :         struct spdk_bdev *bdev = io->q->dev->bdev;
    1158           0 :         struct ublk_queue *q = io->q;
    1159             : 
    1160           0 :         io->bdev_io_wait.bdev = bdev;
    1161           0 :         io->bdev_io_wait.cb_fn = ublk_resubmit_io;
    1162           0 :         io->bdev_io_wait.cb_arg = io;
    1163             : 
    1164           0 :         rc = spdk_bdev_queue_io_wait(bdev, q->bdev_ch, &io->bdev_io_wait);
    1165           0 :         if (rc != 0) {
    1166           0 :                 SPDK_ERRLOG("Queue io failed in ublk_queue_io, rc=%d.\n", rc);
    1167           0 :                 ublk_io_done(NULL, false, io);
    1168           0 :         }
    1169           0 : }
    1170             : 
    1171             : static void
    1172           0 : ublk_io_get_buffer_cb(struct spdk_iobuf_entry *iobuf, void *buf)
    1173             : {
    1174           0 :         struct ublk_io *io = SPDK_CONTAINEROF(iobuf, struct ublk_io, iobuf);
    1175             : 
    1176           0 :         io->mpool_entry = buf;
    1177           0 :         assert(io->payload == NULL);
    1178           0 :         io->payload = (void *)(uintptr_t)SPDK_ALIGN_CEIL((uintptr_t)buf, 4096ULL);
    1179           0 :         io->get_buf_cb(io);
    1180           0 : }
    1181             : 
    1182             : static void
    1183           0 : ublk_io_get_buffer(struct ublk_io *io, struct spdk_iobuf_channel *iobuf_ch,
    1184             :                    ublk_get_buf_cb get_buf_cb)
    1185             : {
    1186           0 :         void *buf;
    1187             : 
    1188           0 :         io->payload_size = io->iod->nr_sectors * (1ULL << LINUX_SECTOR_SHIFT);
    1189           0 :         io->get_buf_cb = get_buf_cb;
    1190           0 :         buf = spdk_iobuf_get(iobuf_ch, io->payload_size, &io->iobuf, ublk_io_get_buffer_cb);
    1191             : 
    1192           0 :         if (buf != NULL) {
    1193           0 :                 ublk_io_get_buffer_cb(&io->iobuf, buf);
    1194           0 :         }
    1195           0 : }
    1196             : 
    1197             : static void
    1198           0 : ublk_io_put_buffer(struct ublk_io *io, struct spdk_iobuf_channel *iobuf_ch)
    1199             : {
    1200           0 :         if (io->payload) {
    1201           0 :                 spdk_iobuf_put(iobuf_ch, io->mpool_entry, io->payload_size);
    1202           0 :                 io->mpool_entry = NULL;
    1203           0 :                 io->payload = NULL;
    1204           0 :         }
    1205           0 : }
    1206             : 
    1207             : static void
    1208           0 : _ublk_submit_bdev_io(struct ublk_queue *q, struct ublk_io *io)
    1209             : {
    1210           0 :         struct spdk_ublk_dev *ublk = q->dev;
    1211           0 :         struct spdk_bdev_desc *desc = io->bdev_desc;
    1212           0 :         struct spdk_io_channel *ch = io->bdev_ch;
    1213           0 :         uint64_t offset_blocks, num_blocks;
    1214           0 :         spdk_bdev_io_completion_cb read_cb;
    1215           0 :         uint8_t ublk_op;
    1216           0 :         int rc = 0;
    1217           0 :         const struct ublksrv_io_desc *iod = io->iod;
    1218             : 
    1219           0 :         ublk_op = ublksrv_get_op(iod);
    1220           0 :         offset_blocks = iod->start_sector >> ublk->sector_per_block_shift;
    1221           0 :         num_blocks = iod->nr_sectors >> ublk->sector_per_block_shift;
    1222             : 
    1223           0 :         switch (ublk_op) {
    1224             :         case UBLK_IO_OP_READ:
    1225           0 :                 if (g_ublk_tgt.user_copy) {
    1226           0 :                         read_cb = ublk_user_copy_read_done;
    1227           0 :                 } else {
    1228           0 :                         read_cb = ublk_io_done;
    1229             :                 }
    1230           0 :                 rc = spdk_bdev_read_blocks(desc, ch, io->payload, offset_blocks, num_blocks, read_cb, io);
    1231           0 :                 break;
    1232             :         case UBLK_IO_OP_WRITE:
    1233           0 :                 rc = spdk_bdev_write_blocks(desc, ch, io->payload, offset_blocks, num_blocks, ublk_io_done, io);
    1234           0 :                 break;
    1235             :         case UBLK_IO_OP_FLUSH:
    1236           0 :                 rc = spdk_bdev_flush_blocks(desc, ch, 0, spdk_bdev_get_num_blocks(ublk->bdev), ublk_io_done, io);
    1237           0 :                 break;
    1238             :         case UBLK_IO_OP_DISCARD:
    1239           0 :                 rc = spdk_bdev_unmap_blocks(desc, ch, offset_blocks, num_blocks, ublk_io_done, io);
    1240           0 :                 break;
    1241             :         case UBLK_IO_OP_WRITE_ZEROES:
    1242           0 :                 rc = spdk_bdev_write_zeroes_blocks(desc, ch, offset_blocks, num_blocks, ublk_io_done, io);
    1243           0 :                 break;
    1244             :         default:
    1245           0 :                 rc = -1;
    1246           0 :         }
    1247             : 
    1248           0 :         if (rc < 0) {
    1249           0 :                 if (rc == -ENOMEM) {
    1250           0 :                         SPDK_INFOLOG(ublk, "No memory, start to queue io.\n");
    1251           0 :                         ublk_queue_io(io);
    1252           0 :                 } else {
    1253           0 :                         SPDK_ERRLOG("ublk io failed in ublk_queue_io, rc=%d, ublk_op=%u\n", rc, ublk_op);
    1254           0 :                         ublk_io_done(NULL, false, io);
    1255             :                 }
    1256           0 :         }
    1257           0 : }
    1258             : 
    1259             : static void
    1260           0 : read_get_buffer_done(struct ublk_io *io)
    1261             : {
    1262           0 :         _ublk_submit_bdev_io(io->q, io);
    1263           0 : }
    1264             : 
    1265             : static void
    1266           0 : user_copy_write_get_buffer_done(struct ublk_io *io)
    1267             : {
    1268           0 :         ublk_queue_user_copy(io, true);
    1269           0 : }
    1270             : 
    1271             : static void
    1272           0 : ublk_submit_bdev_io(struct ublk_queue *q, struct ublk_io *io)
    1273             : {
    1274           0 :         struct spdk_iobuf_channel *iobuf_ch = &q->poll_group->iobuf_ch;
    1275           0 :         const struct ublksrv_io_desc *iod = io->iod;
    1276           0 :         uint8_t ublk_op;
    1277             : 
    1278           0 :         io->result = iod->nr_sectors * (1ULL << LINUX_SECTOR_SHIFT);
    1279           0 :         ublk_op = ublksrv_get_op(iod);
    1280           0 :         switch (ublk_op) {
    1281             :         case UBLK_IO_OP_READ:
    1282           0 :                 ublk_io_get_buffer(io, iobuf_ch, read_get_buffer_done);
    1283           0 :                 break;
    1284             :         case UBLK_IO_OP_WRITE:
    1285           0 :                 if (g_ublk_tgt.user_copy) {
    1286           0 :                         ublk_io_get_buffer(io, iobuf_ch, user_copy_write_get_buffer_done);
    1287           0 :                 } else {
    1288           0 :                         _ublk_submit_bdev_io(q, io);
    1289             :                 }
    1290           0 :                 break;
    1291             :         default:
    1292           0 :                 _ublk_submit_bdev_io(q, io);
    1293           0 :                 break;
    1294             :         }
    1295           0 : }
    1296             : 
    1297             : static inline void
    1298           0 : ublksrv_queue_io_cmd(struct ublk_queue *q,
    1299             :                      struct ublk_io *io, unsigned tag)
    1300             : {
    1301           0 :         struct ublksrv_io_cmd *cmd;
    1302           0 :         struct io_uring_sqe *sqe;
    1303           0 :         unsigned int cmd_op = 0;;
    1304           0 :         uint64_t user_data;
    1305             : 
    1306             :         /* each io should have operation of fetching or committing */
    1307           0 :         assert((io->cmd_op == UBLK_IO_FETCH_REQ) || (io->cmd_op == UBLK_IO_NEED_GET_DATA) ||
    1308             :                (io->cmd_op == UBLK_IO_COMMIT_AND_FETCH_REQ));
    1309           0 :         cmd_op = io->cmd_op;
    1310             : 
    1311           0 :         sqe = io_uring_get_sqe(&q->ring);
    1312           0 :         assert(sqe);
    1313             : 
    1314           0 :         cmd = (struct ublksrv_io_cmd *)ublk_get_sqe_cmd(sqe);
    1315           0 :         if (cmd_op == UBLK_IO_COMMIT_AND_FETCH_REQ) {
    1316           0 :                 cmd->result = io->result;
    1317           0 :         }
    1318             : 
    1319             :         /* These fields should be written once, never change */
    1320           0 :         ublk_set_sqe_cmd_op(sqe, cmd_op);
    1321             :         /* dev->cdev_fd */
    1322           0 :         sqe->fd              = 0;
    1323           0 :         sqe->opcode  = IORING_OP_URING_CMD;
    1324           0 :         sqe->flags   = IOSQE_FIXED_FILE;
    1325           0 :         sqe->rw_flags        = 0;
    1326           0 :         cmd->tag     = tag;
    1327           0 :         cmd->addr    = g_ublk_tgt.user_copy ? 0 : (__u64)(uintptr_t)(io->payload);
    1328           0 :         cmd->q_id    = q->q_id;
    1329             : 
    1330           0 :         user_data = build_user_data(tag, cmd_op);
    1331           0 :         io_uring_sqe_set_data64(sqe, user_data);
    1332             : 
    1333           0 :         io->cmd_op = 0;
    1334             : 
    1335           0 :         SPDK_DEBUGLOG(ublk_io, "(qid %d tag %u cmd_op %u) iof %x stopping %d\n",
    1336             :                       q->q_id, tag, cmd_op,
    1337             :                       io->cmd_op, q->is_stopping);
    1338           0 : }
    1339             : 
    1340             : static int
    1341           0 : ublk_io_xmit(struct ublk_queue *q)
    1342             : {
    1343           0 :         TAILQ_HEAD(, ublk_io) buffer_free_list;
    1344           0 :         struct spdk_iobuf_channel *iobuf_ch;
    1345           0 :         int rc = 0, count = 0;
    1346           0 :         struct ublk_io *io;
    1347             : 
    1348           0 :         if (TAILQ_EMPTY(&q->completed_io_list)) {
    1349           0 :                 return 0;
    1350             :         }
    1351             : 
    1352           0 :         TAILQ_INIT(&buffer_free_list);
    1353           0 :         while (!TAILQ_EMPTY(&q->completed_io_list)) {
    1354           0 :                 io = TAILQ_FIRST(&q->completed_io_list);
    1355           0 :                 assert(io != NULL);
    1356             :                 /*
    1357             :                  * Remove IO from list now assuming it will be completed. It will be inserted
    1358             :                  * back to the head if it cannot be completed. This approach is specifically
    1359             :                  * taken to work around a scan-build use-after-free mischaracterization.
    1360             :                  */
    1361           0 :                 TAILQ_REMOVE(&q->completed_io_list, io, tailq);
    1362           0 :                 if (!io->user_copy) {
    1363           0 :                         if (!io->need_data) {
    1364           0 :                                 TAILQ_INSERT_TAIL(&buffer_free_list, io, tailq);
    1365           0 :                         }
    1366           0 :                         ublksrv_queue_io_cmd(q, io, io->tag);
    1367           0 :                 }
    1368           0 :                 count++;
    1369             :         }
    1370             : 
    1371           0 :         q->cmd_inflight += count;
    1372           0 :         rc = io_uring_submit(&q->ring);
    1373           0 :         if (rc != count) {
    1374           0 :                 SPDK_ERRLOG("could not submit all commands\n");
    1375           0 :                 assert(false);
    1376             :         }
    1377             : 
    1378             :         /* Note: for READ io, ublk will always copy the data out of
    1379             :          * the buffers in the io_uring_submit context.  Since we
    1380             :          * are not using SQPOLL for IO rings, we can safely free
    1381             :          * those IO buffers here.  This design doesn't seem ideal,
    1382             :          * but it's what's possible since there is no discrete
    1383             :          * COMMIT_REQ operation.  That will need to change in the
    1384             :          * future should we ever want to support async copy
    1385             :          * operations.
    1386             :          */
    1387           0 :         iobuf_ch = &q->poll_group->iobuf_ch;
    1388           0 :         while (!TAILQ_EMPTY(&buffer_free_list)) {
    1389           0 :                 io = TAILQ_FIRST(&buffer_free_list);
    1390           0 :                 TAILQ_REMOVE(&buffer_free_list, io, tailq);
    1391           0 :                 ublk_io_put_buffer(io, iobuf_ch);
    1392             :         }
    1393           0 :         return rc;
    1394           0 : }
    1395             : 
    1396             : static void
    1397           0 : write_get_buffer_done(struct ublk_io *io)
    1398             : {
    1399           0 :         io->need_data = true;
    1400           0 :         io->cmd_op = UBLK_IO_NEED_GET_DATA;
    1401           0 :         io->result = 0;
    1402             : 
    1403           0 :         TAILQ_REMOVE(&io->q->inflight_io_list, io, tailq);
    1404           0 :         TAILQ_INSERT_TAIL(&io->q->completed_io_list, io, tailq);
    1405           0 : }
    1406             : 
    1407             : static int
    1408           0 : ublk_io_recv(struct ublk_queue *q)
    1409             : {
    1410           0 :         struct io_uring_cqe *cqe;
    1411           0 :         unsigned head, tag;
    1412           0 :         int fetch, count = 0;
    1413           0 :         struct ublk_io *io;
    1414           0 :         struct spdk_iobuf_channel *iobuf_ch;
    1415             : 
    1416           0 :         if (q->cmd_inflight == 0) {
    1417           0 :                 return 0;
    1418             :         }
    1419             : 
    1420           0 :         iobuf_ch = &q->poll_group->iobuf_ch;
    1421           0 :         io_uring_for_each_cqe(&q->ring, head, cqe) {
    1422           0 :                 tag = user_data_to_tag(cqe->user_data);
    1423           0 :                 io = &q->ios[tag];
    1424             : 
    1425           0 :                 SPDK_DEBUGLOG(ublk_io, "res %d qid %d tag %u, user copy %u, cmd_op %u\n",
    1426             :                               cqe->res, q->q_id, tag, io->user_copy, user_data_to_op(cqe->user_data));
    1427             : 
    1428           0 :                 q->cmd_inflight--;
    1429           0 :                 TAILQ_INSERT_TAIL(&q->inflight_io_list, io, tailq);
    1430             : 
    1431           0 :                 if (!io->user_copy) {
    1432           0 :                         fetch = (cqe->res != UBLK_IO_RES_ABORT) && !q->is_stopping;
    1433           0 :                         if (!fetch) {
    1434           0 :                                 q->is_stopping = true;
    1435           0 :                                 if (io->cmd_op == UBLK_IO_FETCH_REQ) {
    1436           0 :                                         io->cmd_op = 0;
    1437           0 :                                 }
    1438           0 :                         }
    1439             : 
    1440           0 :                         if (cqe->res == UBLK_IO_RES_OK) {
    1441           0 :                                 ublk_submit_bdev_io(q, io);
    1442           0 :                         } else if (cqe->res == UBLK_IO_RES_NEED_GET_DATA) {
    1443           0 :                                 ublk_io_get_buffer(io, iobuf_ch, write_get_buffer_done);
    1444           0 :                         } else {
    1445           0 :                                 if (cqe->res != UBLK_IO_RES_ABORT) {
    1446           0 :                                         SPDK_ERRLOG("ublk received error io: res %d qid %d tag %u cmd_op %u\n",
    1447             :                                                     cqe->res, q->q_id, tag, user_data_to_op(cqe->user_data));
    1448           0 :                                 }
    1449           0 :                                 TAILQ_REMOVE(&q->inflight_io_list, io, tailq);
    1450             :                         }
    1451           0 :                 } else {
    1452             : 
    1453             :                         /* clear `user_copy` for next use of this IO structure */
    1454           0 :                         io->user_copy = false;
    1455             : 
    1456           0 :                         assert((ublksrv_get_op(io->iod) == UBLK_IO_OP_READ) ||
    1457             :                                (ublksrv_get_op(io->iod) == UBLK_IO_OP_WRITE));
    1458           0 :                         if (cqe->res != io->result) {
    1459             :                                 /* EIO */
    1460           0 :                                 ublk_io_done(NULL, false, io);
    1461           0 :                         } else {
    1462           0 :                                 if (ublksrv_get_op(io->iod) == UBLK_IO_OP_READ) {
    1463             :                                         /* bdev_io is already freed in first READ cycle */
    1464           0 :                                         ublk_io_done(NULL, true, io);
    1465           0 :                                 } else {
    1466           0 :                                         _ublk_submit_bdev_io(q, io);
    1467             :                                 }
    1468             :                         }
    1469             :                 }
    1470           0 :                 count += 1;
    1471           0 :                 if (count == UBLK_QUEUE_REQUEST) {
    1472           0 :                         break;
    1473             :                 }
    1474           0 :         }
    1475           0 :         io_uring_cq_advance(&q->ring, count);
    1476             : 
    1477           0 :         return count;
    1478           0 : }
    1479             : 
    1480             : static int
    1481           0 : ublk_poll(void *arg)
    1482             : {
    1483           0 :         struct ublk_poll_group *poll_group = arg;
    1484           0 :         struct ublk_queue *q, *q_tmp;
    1485           0 :         int sent, received, count = 0;
    1486             : 
    1487           0 :         TAILQ_FOREACH_SAFE(q, &poll_group->queue_list, tailq, q_tmp) {
    1488           0 :                 sent = ublk_io_xmit(q);
    1489           0 :                 received = ublk_io_recv(q);
    1490           0 :                 if (spdk_unlikely(q->is_stopping)) {
    1491           0 :                         ublk_try_close_queue(q);
    1492           0 :                 }
    1493           0 :                 count += sent + received;
    1494           0 :         }
    1495           0 :         if (count > 0) {
    1496           0 :                 return SPDK_POLLER_BUSY;
    1497             :         } else {
    1498           0 :                 return SPDK_POLLER_IDLE;
    1499             :         }
    1500           0 : }
    1501             : 
    1502             : static void
    1503           0 : ublk_bdev_hot_remove(struct spdk_ublk_dev *ublk)
    1504             : {
    1505           0 :         ublk_close_dev(ublk);
    1506           0 : }
    1507             : 
    1508             : static void
    1509           0 : ublk_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev,
    1510             :                    void *event_ctx)
    1511             : {
    1512           0 :         switch (type) {
    1513             :         case SPDK_BDEV_EVENT_REMOVE:
    1514           0 :                 ublk_bdev_hot_remove(event_ctx);
    1515           0 :                 break;
    1516             :         default:
    1517           0 :                 SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type);
    1518           0 :                 break;
    1519             :         }
    1520           0 : }
    1521             : 
    1522             : static void
    1523           0 : ublk_dev_init_io_cmds(struct io_uring *r, uint32_t q_depth)
    1524             : {
    1525           0 :         struct io_uring_sqe *sqe;
    1526           0 :         uint32_t i;
    1527             : 
    1528           0 :         for (i = 0; i < q_depth; i++) {
    1529           0 :                 sqe = ublk_uring_get_sqe(r, i);
    1530             : 
    1531             :                 /* These fields should be written once, never change */
    1532           0 :                 sqe->flags = IOSQE_FIXED_FILE;
    1533           0 :                 sqe->rw_flags = 0;
    1534           0 :                 sqe->ioprio = 0;
    1535           0 :                 sqe->off = 0;
    1536           0 :         }
    1537           0 : }
    1538             : 
    1539             : static int
    1540           0 : ublk_dev_queue_init(struct ublk_queue *q)
    1541             : {
    1542           0 :         int rc = 0, cmd_buf_size;
    1543           0 :         uint32_t j;
    1544           0 :         struct spdk_ublk_dev *ublk = q->dev;
    1545           0 :         unsigned long off;
    1546             : 
    1547           0 :         cmd_buf_size = ublk_queue_cmd_buf_sz(q->q_depth);
    1548           0 :         off = UBLKSRV_CMD_BUF_OFFSET +
    1549           0 :               q->q_id * (UBLK_MAX_QUEUE_DEPTH * sizeof(struct ublksrv_io_desc));
    1550           0 :         q->io_cmd_buf = (struct ublksrv_io_desc *)mmap(0, cmd_buf_size, PROT_READ,
    1551           0 :                         MAP_SHARED | MAP_POPULATE, ublk->cdev_fd, off);
    1552           0 :         if (q->io_cmd_buf == MAP_FAILED) {
    1553           0 :                 q->io_cmd_buf = NULL;
    1554           0 :                 rc = -errno;
    1555           0 :                 SPDK_ERRLOG("Failed at mmap: %s\n", spdk_strerror(-rc));
    1556           0 :                 return rc;
    1557             :         }
    1558             : 
    1559           0 :         for (j = 0; j < q->q_depth; j++) {
    1560           0 :                 q->ios[j].cmd_op = UBLK_IO_FETCH_REQ;
    1561           0 :                 q->ios[j].iod = &q->io_cmd_buf[j];
    1562           0 :         }
    1563             : 
    1564           0 :         rc = ublk_setup_ring(q->q_depth, &q->ring, IORING_SETUP_SQE128);
    1565           0 :         if (rc < 0) {
    1566           0 :                 SPDK_ERRLOG("Failed at setup uring: %s\n", spdk_strerror(-rc));
    1567           0 :                 munmap(q->io_cmd_buf, ublk_queue_cmd_buf_sz(q->q_depth));
    1568           0 :                 q->io_cmd_buf = NULL;
    1569           0 :                 return rc;
    1570             :         }
    1571             : 
    1572           0 :         rc = io_uring_register_files(&q->ring, &ublk->cdev_fd, 1);
    1573           0 :         if (rc != 0) {
    1574           0 :                 SPDK_ERRLOG("Failed at uring register files: %s\n", spdk_strerror(-rc));
    1575           0 :                 io_uring_queue_exit(&q->ring);
    1576           0 :                 q->ring.ring_fd = -1;
    1577           0 :                 munmap(q->io_cmd_buf, ublk_queue_cmd_buf_sz(q->q_depth));
    1578           0 :                 q->io_cmd_buf = NULL;
    1579           0 :                 return rc;
    1580             :         }
    1581             : 
    1582           0 :         ublk_dev_init_io_cmds(&q->ring, q->q_depth);
    1583             : 
    1584           0 :         return 0;
    1585           0 : }
    1586             : 
    1587             : static void
    1588           0 : ublk_dev_queue_fini(struct ublk_queue *q)
    1589             : {
    1590           0 :         if (q->ring.ring_fd >= 0) {
    1591           0 :                 io_uring_unregister_files(&q->ring);
    1592           0 :                 io_uring_queue_exit(&q->ring);
    1593           0 :                 q->ring.ring_fd = -1;
    1594           0 :         }
    1595           0 :         if (q->io_cmd_buf) {
    1596           0 :                 munmap(q->io_cmd_buf, ublk_queue_cmd_buf_sz(q->q_depth));
    1597           0 :         }
    1598           0 : }
    1599             : 
    1600             : static void
    1601           0 : ublk_dev_queue_io_init(struct ublk_queue *q)
    1602             : {
    1603           0 :         struct ublk_io *io;
    1604           0 :         uint32_t i;
    1605           0 :         int rc __attribute__((unused));
    1606           0 :         void *buf;
    1607             : 
    1608             :         /* Some older kernels require a buffer to get posted, even
    1609             :          * when NEED_GET_DATA has been specified.  So allocate a
    1610             :          * temporary buffer, only for purposes of this workaround.
    1611             :          * It never actually gets used, so we will free it immediately
    1612             :          * after all of the commands are posted.
    1613             :          */
    1614           0 :         buf = malloc(64);
    1615             : 
    1616           0 :         assert(q->bdev_ch != NULL);
    1617             : 
    1618             :         /* Initialize and submit all io commands to ublk driver */
    1619           0 :         for (i = 0; i < q->q_depth; i++) {
    1620           0 :                 io = &q->ios[i];
    1621           0 :                 io->tag = (uint16_t)i;
    1622           0 :                 io->payload = buf;
    1623           0 :                 io->bdev_ch = q->bdev_ch;
    1624           0 :                 io->bdev_desc = q->dev->bdev_desc;
    1625           0 :                 ublksrv_queue_io_cmd(q, io, i);
    1626           0 :         }
    1627             : 
    1628           0 :         q->cmd_inflight += q->q_depth;
    1629           0 :         rc = io_uring_submit(&q->ring);
    1630           0 :         assert(rc == (int)q->q_depth);
    1631           0 :         for (i = 0; i < q->q_depth; i++) {
    1632           0 :                 io = &q->ios[i];
    1633           0 :                 io->payload = NULL;
    1634           0 :         }
    1635           0 :         free(buf);
    1636           0 : }
    1637             : 
    1638             : static int
    1639           0 : ublk_set_params(struct spdk_ublk_dev *ublk)
    1640             : {
    1641           0 :         int rc;
    1642             : 
    1643           0 :         rc = ublk_ctrl_cmd_submit(ublk, UBLK_CMD_SET_PARAMS);
    1644           0 :         if (rc < 0) {
    1645           0 :                 SPDK_ERRLOG("UBLK can't set params for dev %d, rc %s\n", ublk->ublk_id, spdk_strerror(-rc));
    1646           0 :         }
    1647             : 
    1648           0 :         return rc;
    1649           0 : }
    1650             : 
    1651             : static void
    1652           0 : ublk_dev_info_init(struct spdk_ublk_dev *ublk)
    1653             : {
    1654           0 :         struct ublksrv_ctrl_dev_info uinfo = {
    1655           0 :                 .queue_depth = ublk->queue_depth,
    1656           0 :                 .nr_hw_queues = ublk->num_queues,
    1657           0 :                 .dev_id = ublk->ublk_id,
    1658             :                 .max_io_buf_bytes = UBLK_IO_MAX_BYTES,
    1659           0 :                 .ublksrv_pid = getpid(),
    1660           0 :                 .flags = UBLK_F_URING_CMD_COMP_IN_TASK,
    1661             :         };
    1662             : 
    1663           0 :         if (g_ublk_tgt.user_copy) {
    1664           0 :                 uinfo.flags |= UBLK_F_USER_COPY;
    1665           0 :         } else {
    1666           0 :                 uinfo.flags |= UBLK_F_NEED_GET_DATA;
    1667             :         }
    1668             : 
    1669           0 :         if (g_ublk_tgt.user_recovery) {
    1670           0 :                 uinfo.flags |= UBLK_F_USER_RECOVERY;
    1671           0 :                 uinfo.flags |= UBLK_F_USER_RECOVERY_REISSUE;
    1672           0 :         }
    1673             : 
    1674           0 :         ublk->dev_info = uinfo;
    1675           0 : }
    1676             : 
    1677             : /* Set ublk device parameters based on bdev */
    1678             : static void
    1679           0 : ublk_info_param_init(struct spdk_ublk_dev *ublk)
    1680             : {
    1681           0 :         struct spdk_bdev *bdev = ublk->bdev;
    1682           0 :         uint32_t blk_size = spdk_bdev_get_data_block_size(bdev);
    1683           0 :         uint32_t pblk_size = spdk_bdev_get_physical_block_size(bdev);
    1684           0 :         uint32_t io_opt_blocks = spdk_bdev_get_optimal_io_boundary(bdev);
    1685           0 :         uint64_t num_blocks = spdk_bdev_get_num_blocks(bdev);
    1686           0 :         uint8_t sectors_per_block = blk_size >> LINUX_SECTOR_SHIFT;
    1687           0 :         uint32_t io_min_size = blk_size;
    1688           0 :         uint32_t io_opt_size = spdk_max(io_opt_blocks * blk_size, io_min_size);
    1689             : 
    1690           0 :         struct ublk_params uparams = {
    1691           0 :                 .types = UBLK_PARAM_TYPE_BASIC,
    1692             :                 .len = sizeof(struct ublk_params),
    1693           0 :                 .basic = {
    1694           0 :                         .logical_bs_shift = spdk_u32log2(blk_size),
    1695           0 :                         .physical_bs_shift = spdk_u32log2(pblk_size),
    1696           0 :                         .io_min_shift = spdk_u32log2(io_min_size),
    1697           0 :                         .io_opt_shift = spdk_u32log2(io_opt_size),
    1698           0 :                         .dev_sectors = num_blocks * sectors_per_block,
    1699           0 :                         .max_sectors = UBLK_IO_MAX_BYTES >> LINUX_SECTOR_SHIFT,
    1700             :                 }
    1701             :         };
    1702             : 
    1703           0 :         if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH)) {
    1704           0 :                 uparams.basic.attrs = UBLK_ATTR_VOLATILE_CACHE;
    1705           0 :         }
    1706             : 
    1707           0 :         if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) {
    1708           0 :                 uparams.types |= UBLK_PARAM_TYPE_DISCARD;
    1709           0 :                 uparams.discard.discard_alignment = sectors_per_block;
    1710           0 :                 uparams.discard.max_discard_sectors = num_blocks * sectors_per_block;
    1711           0 :                 uparams.discard.max_discard_segments = 1;
    1712           0 :                 uparams.discard.discard_granularity = blk_size;
    1713           0 :                 if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) {
    1714           0 :                         uparams.discard.max_write_zeroes_sectors = num_blocks * sectors_per_block;
    1715           0 :                 }
    1716           0 :         }
    1717             : 
    1718           0 :         ublk->dev_params = uparams;
    1719           0 : }
    1720             : 
    1721             : static void
    1722           0 : _ublk_free_dev(void *arg)
    1723             : {
    1724           0 :         struct spdk_ublk_dev *ublk = arg;
    1725             : 
    1726           0 :         ublk_free_dev(ublk);
    1727           0 : }
    1728             : 
    1729             : static void
    1730           0 : free_buffers(void *arg)
    1731             : {
    1732           0 :         struct ublk_queue *q = arg;
    1733           0 :         uint32_t i;
    1734             : 
    1735           0 :         for (i = 0; i < q->q_depth; i++) {
    1736           0 :                 ublk_io_put_buffer(&q->ios[i], &q->poll_group->iobuf_ch);
    1737           0 :         }
    1738           0 :         free(q->ios);
    1739           0 :         q->ios = NULL;
    1740           0 :         spdk_thread_send_msg(spdk_thread_get_app_thread(), _ublk_free_dev, q->dev);
    1741           0 : }
    1742             : 
    1743             : static void
    1744           0 : ublk_free_dev(struct spdk_ublk_dev *ublk)
    1745             : {
    1746           0 :         struct ublk_queue *q;
    1747           0 :         uint32_t q_idx;
    1748             : 
    1749           0 :         for (q_idx = 0; q_idx < ublk->num_queues; q_idx++) {
    1750           0 :                 q = &ublk->queues[q_idx];
    1751             : 
    1752             :                 /* The ublk_io of this queue are not initialized. */
    1753           0 :                 if (q->ios == NULL) {
    1754           0 :                         continue;
    1755             :                 }
    1756             : 
    1757             :                 /* We found a queue that has an ios array that may have buffers
    1758             :                  * that need to be freed.  Send a message to the queue's thread
    1759             :                  * so it can free the buffers back to that thread's iobuf channel.
    1760             :                  * When it's done, it will set q->ios to NULL and send a message
    1761             :                  * back to this function to continue.
    1762             :                  */
    1763           0 :                 if (q->poll_group) {
    1764           0 :                         spdk_thread_send_msg(q->poll_group->ublk_thread, free_buffers, q);
    1765           0 :                         return;
    1766             :                 } else {
    1767           0 :                         free(q->ios);
    1768           0 :                         q->ios = NULL;
    1769             :                 }
    1770           0 :         }
    1771             : 
    1772             :         /* All of the buffers associated with the queues have been freed, so now
    1773             :          * continue with releasing resources for the rest of the ublk device.
    1774             :          */
    1775           0 :         if (ublk->bdev_desc) {
    1776           0 :                 spdk_bdev_close(ublk->bdev_desc);
    1777           0 :                 ublk->bdev_desc = NULL;
    1778           0 :         }
    1779             : 
    1780           0 :         ublk_dev_list_unregister(ublk);
    1781           0 :         SPDK_NOTICELOG("ublk dev %d stopped\n", ublk->ublk_id);
    1782             : 
    1783           0 :         free(ublk);
    1784           0 : }
    1785             : 
    1786             : static int
    1787           0 : ublk_ios_init(struct spdk_ublk_dev *ublk)
    1788             : {
    1789           0 :         int rc;
    1790           0 :         uint32_t i, j;
    1791           0 :         struct ublk_queue *q;
    1792             : 
    1793           0 :         for (i = 0; i < ublk->num_queues; i++) {
    1794           0 :                 q = &ublk->queues[i];
    1795             : 
    1796           0 :                 TAILQ_INIT(&q->completed_io_list);
    1797           0 :                 TAILQ_INIT(&q->inflight_io_list);
    1798           0 :                 q->dev = ublk;
    1799           0 :                 q->q_id = i;
    1800           0 :                 q->q_depth = ublk->queue_depth;
    1801           0 :                 q->ios = calloc(q->q_depth, sizeof(struct ublk_io));
    1802           0 :                 if (!q->ios) {
    1803           0 :                         rc = -ENOMEM;
    1804           0 :                         SPDK_ERRLOG("could not allocate queue ios\n");
    1805           0 :                         goto err;
    1806             :                 }
    1807           0 :                 for (j = 0; j < q->q_depth; j++) {
    1808           0 :                         q->ios[j].q = q;
    1809           0 :                 }
    1810           0 :         }
    1811             : 
    1812           0 :         return 0;
    1813             : 
    1814             : err:
    1815           0 :         for (i = 0; i < ublk->num_queues; i++) {
    1816           0 :                 free(q->ios);
    1817           0 :                 q->ios = NULL;
    1818           0 :         }
    1819           0 :         return rc;
    1820           0 : }
    1821             : 
    1822             : static void
    1823           0 : ublk_queue_recovery_done(void *arg)
    1824             : {
    1825           0 :         struct spdk_ublk_dev *ublk = arg;
    1826             : 
    1827           0 :         ublk->online_num_queues++;
    1828           0 :         if (ublk->is_recovering && (ublk->online_num_queues == ublk->num_queues)) {
    1829           0 :                 ublk_ctrl_cmd_submit(ublk, UBLK_CMD_END_USER_RECOVERY);
    1830           0 :         }
    1831           0 : }
    1832             : 
    1833             : static void
    1834           0 : ublk_queue_run(void *arg1)
    1835             : {
    1836           0 :         struct ublk_queue       *q = arg1;
    1837           0 :         struct spdk_ublk_dev *ublk = q->dev;
    1838           0 :         struct ublk_poll_group *poll_group = q->poll_group;
    1839             : 
    1840           0 :         assert(spdk_get_thread() == poll_group->ublk_thread);
    1841           0 :         q->bdev_ch = spdk_bdev_get_io_channel(ublk->bdev_desc);
    1842             :         /* Queues must be filled with IO in the io pthread */
    1843           0 :         ublk_dev_queue_io_init(q);
    1844             : 
    1845           0 :         TAILQ_INSERT_TAIL(&poll_group->queue_list, q, tailq);
    1846           0 :         spdk_thread_send_msg(spdk_thread_get_app_thread(), ublk_queue_recovery_done, ublk);
    1847           0 : }
    1848             : 
    1849             : int
    1850           0 : ublk_start_disk(const char *bdev_name, uint32_t ublk_id,
    1851             :                 uint32_t num_queues, uint32_t queue_depth,
    1852             :                 ublk_ctrl_cb ctrl_cb, void *cb_arg)
    1853             : {
    1854           0 :         int                     rc;
    1855           0 :         uint32_t                i;
    1856           0 :         struct spdk_bdev        *bdev;
    1857           0 :         struct spdk_ublk_dev    *ublk = NULL;
    1858           0 :         uint32_t                sector_per_block;
    1859             : 
    1860           0 :         assert(spdk_thread_is_app_thread(NULL));
    1861             : 
    1862           0 :         if (g_ublk_tgt.active == false) {
    1863           0 :                 SPDK_ERRLOG("NO ublk target exist\n");
    1864           0 :                 return -ENODEV;
    1865             :         }
    1866             : 
    1867           0 :         ublk = ublk_dev_find_by_id(ublk_id);
    1868           0 :         if (ublk != NULL) {
    1869           0 :                 SPDK_DEBUGLOG(ublk, "ublk id %d is in use.\n", ublk_id);
    1870           0 :                 return -EBUSY;
    1871             :         }
    1872             : 
    1873           0 :         if (g_ublk_tgt.num_ublk_devs >= g_ublks_max) {
    1874           0 :                 SPDK_DEBUGLOG(ublk, "Reached maximum number of supported devices: %u\n", g_ublks_max);
    1875           0 :                 return -ENOTSUP;
    1876             :         }
    1877             : 
    1878           0 :         ublk = calloc(1, sizeof(*ublk));
    1879           0 :         if (ublk == NULL) {
    1880           0 :                 return -ENOMEM;
    1881             :         }
    1882           0 :         ublk->ctrl_cb = ctrl_cb;
    1883           0 :         ublk->cb_arg = cb_arg;
    1884           0 :         ublk->cdev_fd = -1;
    1885           0 :         ublk->ublk_id = ublk_id;
    1886           0 :         UBLK_DEBUGLOG(ublk, "bdev %s num_queues %d queue_depth %d\n",
    1887             :                       bdev_name, num_queues, queue_depth);
    1888             : 
    1889           0 :         rc = spdk_bdev_open_ext(bdev_name, true, ublk_bdev_event_cb, ublk, &ublk->bdev_desc);
    1890           0 :         if (rc != 0) {
    1891           0 :                 SPDK_ERRLOG("could not open bdev %s, error=%d\n", bdev_name, rc);
    1892           0 :                 free(ublk);
    1893           0 :                 return rc;
    1894             :         }
    1895             : 
    1896           0 :         bdev = spdk_bdev_desc_get_bdev(ublk->bdev_desc);
    1897           0 :         ublk->bdev = bdev;
    1898           0 :         sector_per_block = spdk_bdev_get_data_block_size(ublk->bdev) >> LINUX_SECTOR_SHIFT;
    1899           0 :         ublk->sector_per_block_shift = spdk_u32log2(sector_per_block);
    1900             : 
    1901           0 :         ublk->queues_closed = 0;
    1902           0 :         ublk->num_queues = num_queues;
    1903           0 :         ublk->queue_depth = queue_depth;
    1904           0 :         if (ublk->queue_depth > UBLK_DEV_MAX_QUEUE_DEPTH) {
    1905           0 :                 SPDK_WARNLOG("Set Queue depth %d of UBLK %d to maximum %d\n",
    1906             :                              ublk->queue_depth, ublk->ublk_id, UBLK_DEV_MAX_QUEUE_DEPTH);
    1907           0 :                 ublk->queue_depth = UBLK_DEV_MAX_QUEUE_DEPTH;
    1908           0 :         }
    1909           0 :         if (ublk->num_queues > UBLK_DEV_MAX_QUEUES) {
    1910           0 :                 SPDK_WARNLOG("Set Queue num %d of UBLK %d to maximum %d\n",
    1911             :                              ublk->num_queues, ublk->ublk_id, UBLK_DEV_MAX_QUEUES);
    1912           0 :                 ublk->num_queues = UBLK_DEV_MAX_QUEUES;
    1913           0 :         }
    1914           0 :         for (i = 0; i < ublk->num_queues; i++) {
    1915           0 :                 ublk->queues[i].ring.ring_fd = -1;
    1916           0 :         }
    1917             : 
    1918           0 :         ublk_dev_info_init(ublk);
    1919           0 :         ublk_info_param_init(ublk);
    1920           0 :         rc = ublk_ios_init(ublk);
    1921           0 :         if (rc != 0) {
    1922           0 :                 spdk_bdev_close(ublk->bdev_desc);
    1923           0 :                 free(ublk);
    1924           0 :                 return rc;
    1925             :         }
    1926             : 
    1927           0 :         SPDK_INFOLOG(ublk, "Enabling kernel access to bdev %s via ublk %d\n",
    1928             :                      bdev_name, ublk_id);
    1929             : 
    1930             :         /* Add ublk_dev to the end of disk list */
    1931           0 :         ublk_dev_list_register(ublk);
    1932           0 :         rc = ublk_ctrl_cmd_submit(ublk, UBLK_CMD_ADD_DEV);
    1933           0 :         if (rc < 0) {
    1934           0 :                 SPDK_ERRLOG("UBLK can't add dev %d, rc %s\n", ublk->ublk_id, spdk_strerror(-rc));
    1935           0 :                 ublk_free_dev(ublk);
    1936           0 :         }
    1937             : 
    1938           0 :         return rc;
    1939           0 : }
    1940             : 
    1941             : static int
    1942           0 : ublk_start_dev(struct spdk_ublk_dev *ublk, bool is_recovering)
    1943             : {
    1944           0 :         int                     rc;
    1945           0 :         uint32_t                q_id;
    1946           0 :         struct spdk_thread      *ublk_thread;
    1947           0 :         char                    buf[64];
    1948             : 
    1949           0 :         snprintf(buf, 64, "%s%d", UBLK_BLK_CDEV, ublk->ublk_id);
    1950           0 :         ublk->cdev_fd = open(buf, O_RDWR);
    1951           0 :         if (ublk->cdev_fd < 0) {
    1952           0 :                 rc = ublk->cdev_fd;
    1953           0 :                 SPDK_ERRLOG("can't open %s, rc %d\n", buf, rc);
    1954           0 :                 return rc;
    1955             :         }
    1956             : 
    1957           0 :         for (q_id = 0; q_id < ublk->num_queues; q_id++) {
    1958           0 :                 rc = ublk_dev_queue_init(&ublk->queues[q_id]);
    1959           0 :                 if (rc) {
    1960           0 :                         return rc;
    1961             :                 }
    1962           0 :         }
    1963             : 
    1964           0 :         if (!is_recovering) {
    1965           0 :                 rc = ublk_ctrl_cmd_submit(ublk, UBLK_CMD_START_DEV);
    1966           0 :                 if (rc < 0) {
    1967           0 :                         SPDK_ERRLOG("start dev %d failed, rc %s\n", ublk->ublk_id,
    1968             :                                     spdk_strerror(-rc));
    1969           0 :                         return rc;
    1970             :                 }
    1971           0 :         }
    1972             : 
    1973             :         /* Send queue to different spdk_threads for load balance */
    1974           0 :         for (q_id = 0; q_id < ublk->num_queues; q_id++) {
    1975           0 :                 ublk->queues[q_id].poll_group = &g_ublk_tgt.poll_groups[g_next_ublk_poll_group];
    1976           0 :                 ublk_thread = g_ublk_tgt.poll_groups[g_next_ublk_poll_group].ublk_thread;
    1977           0 :                 spdk_thread_send_msg(ublk_thread, ublk_queue_run, &ublk->queues[q_id]);
    1978           0 :                 g_next_ublk_poll_group++;
    1979           0 :                 if (g_next_ublk_poll_group == g_num_ublk_poll_groups) {
    1980           0 :                         g_next_ublk_poll_group = 0;
    1981           0 :                 }
    1982           0 :         }
    1983             : 
    1984           0 :         return 0;
    1985           0 : }
    1986             : 
    1987             : static int
    1988           0 : ublk_ctrl_start_recovery(struct spdk_ublk_dev *ublk)
    1989             : {
    1990           0 :         int                     rc;
    1991           0 :         uint32_t                i;
    1992             : 
    1993           0 :         if (ublk->ublk_id != ublk->dev_info.dev_id) {
    1994           0 :                 SPDK_ERRLOG("Invalid ublk ID\n");
    1995           0 :                 return -EINVAL;
    1996             :         }
    1997             : 
    1998           0 :         ublk->num_queues = ublk->dev_info.nr_hw_queues;
    1999           0 :         ublk->queue_depth = ublk->dev_info.queue_depth;
    2000           0 :         ublk->dev_info.ublksrv_pid = getpid();
    2001             : 
    2002           0 :         SPDK_DEBUGLOG(ublk, "Recovering ublk %d, num queues %u, queue depth %u, flags 0x%llx\n",
    2003             :                       ublk->ublk_id,
    2004             :                       ublk->num_queues, ublk->queue_depth, ublk->dev_info.flags);
    2005             : 
    2006           0 :         for (i = 0; i < ublk->num_queues; i++) {
    2007           0 :                 ublk->queues[i].ring.ring_fd = -1;
    2008           0 :         }
    2009             : 
    2010           0 :         ublk_info_param_init(ublk);
    2011           0 :         rc = ublk_ios_init(ublk);
    2012           0 :         if (rc != 0) {
    2013           0 :                 return rc;
    2014             :         }
    2015             : 
    2016           0 :         ublk->is_recovering = true;
    2017           0 :         return ublk_ctrl_cmd_submit(ublk, UBLK_CMD_START_USER_RECOVERY);
    2018           0 : }
    2019             : 
    2020             : int
    2021           0 : ublk_start_disk_recovery(const char *bdev_name, uint32_t ublk_id, ublk_ctrl_cb ctrl_cb,
    2022             :                          void *cb_arg)
    2023             : {
    2024           0 :         int                     rc;
    2025           0 :         struct spdk_bdev        *bdev;
    2026           0 :         struct spdk_ublk_dev    *ublk = NULL;
    2027           0 :         uint32_t                sector_per_block;
    2028             : 
    2029           0 :         assert(spdk_thread_is_app_thread(NULL));
    2030             : 
    2031           0 :         if (g_ublk_tgt.active == false) {
    2032           0 :                 SPDK_ERRLOG("NO ublk target exist\n");
    2033           0 :                 return -ENODEV;
    2034             :         }
    2035             : 
    2036           0 :         if (!g_ublk_tgt.user_recovery) {
    2037           0 :                 SPDK_ERRLOG("User recovery is enabled with kernel version >= 6.4\n");
    2038           0 :                 return -ENOTSUP;
    2039             :         }
    2040             : 
    2041           0 :         ublk = ublk_dev_find_by_id(ublk_id);
    2042           0 :         if (ublk != NULL) {
    2043           0 :                 SPDK_DEBUGLOG(ublk, "ublk id %d is in use.\n", ublk_id);
    2044           0 :                 return -EBUSY;
    2045             :         }
    2046             : 
    2047           0 :         if (g_ublk_tgt.num_ublk_devs >= g_ublks_max) {
    2048           0 :                 SPDK_DEBUGLOG(ublk, "Reached maximum number of supported devices: %u\n", g_ublks_max);
    2049           0 :                 return -ENOTSUP;
    2050             :         }
    2051             : 
    2052           0 :         ublk = calloc(1, sizeof(*ublk));
    2053           0 :         if (ublk == NULL) {
    2054           0 :                 return -ENOMEM;
    2055             :         }
    2056           0 :         ublk->ctrl_cb = ctrl_cb;
    2057           0 :         ublk->cb_arg = cb_arg;
    2058           0 :         ublk->cdev_fd = -1;
    2059           0 :         ublk->ublk_id = ublk_id;
    2060             : 
    2061           0 :         rc = spdk_bdev_open_ext(bdev_name, true, ublk_bdev_event_cb, ublk, &ublk->bdev_desc);
    2062           0 :         if (rc != 0) {
    2063           0 :                 SPDK_ERRLOG("could not open bdev %s, error=%d\n", bdev_name, rc);
    2064           0 :                 free(ublk);
    2065           0 :                 return rc;
    2066             :         }
    2067             : 
    2068           0 :         bdev = spdk_bdev_desc_get_bdev(ublk->bdev_desc);
    2069           0 :         ublk->bdev = bdev;
    2070           0 :         sector_per_block = spdk_bdev_get_data_block_size(ublk->bdev) >> LINUX_SECTOR_SHIFT;
    2071           0 :         ublk->sector_per_block_shift = spdk_u32log2(sector_per_block);
    2072             : 
    2073           0 :         SPDK_NOTICELOG("Recovering ublk %d with bdev %s\n", ublk->ublk_id, bdev_name);
    2074             : 
    2075           0 :         ublk_dev_list_register(ublk);
    2076           0 :         rc = ublk_ctrl_cmd_submit(ublk, UBLK_CMD_GET_DEV_INFO);
    2077           0 :         if (rc < 0) {
    2078           0 :                 ublk_free_dev(ublk);
    2079           0 :         }
    2080             : 
    2081           0 :         return rc;
    2082           0 : }
    2083             : 
    2084           0 : SPDK_LOG_REGISTER_COMPONENT(ublk)
    2085           0 : SPDK_LOG_REGISTER_COMPONENT(ublk_io)

Generated by: LCOV version 1.15