LCOV - code coverage report
Current view: top level - lib/vhost - vhost_blk.c (source / functions) Hit Total Coverage
Test: ut_cov_unit.info Lines: 74 848 8.7 %
Date: 2024-12-15 03:48:28 Functions: 12 69 17.4 %

          Line data    Source code
       1             : /*   SPDX-License-Identifier: BSD-3-Clause
       2             :  *   Copyright (C) 2017 Intel Corporation. All rights reserved.
       3             :  *   All rights reserved.
       4             :  */
       5             : 
       6             : #include <linux/virtio_blk.h>
       7             : 
       8             : #include "spdk/env.h"
       9             : #include "spdk/bdev.h"
      10             : #include "spdk/bdev_module.h"
      11             : #include "spdk/thread.h"
      12             : #include "spdk/likely.h"
      13             : #include "spdk/string.h"
      14             : #include "spdk/util.h"
      15             : #include "spdk/vhost.h"
      16             : #include "spdk/json.h"
      17             : 
      18             : #include "vhost_internal.h"
      19             : #include <rte_version.h>
      20             : 
      21             : /* Minimal set of features supported by every SPDK VHOST-BLK device */
      22             : #define SPDK_VHOST_BLK_FEATURES_BASE (SPDK_VHOST_FEATURES | \
      23             :                 (1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
      24             :                 (1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
      25             :                 (1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER)  | \
      26             :                 (1ULL << VIRTIO_BLK_F_SCSI)     | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
      27             :                 (1ULL << VIRTIO_BLK_F_MQ))
      28             : 
      29             : /* Not supported features */
      30             : #define SPDK_VHOST_BLK_DISABLED_FEATURES (SPDK_VHOST_DISABLED_FEATURES | \
      31             :                 (1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
      32             :                 (1ULL << VIRTIO_BLK_F_BARRIER)  | (1ULL << VIRTIO_BLK_F_SCSI))
      33             : 
      34             : /* Vhost-blk support protocol features */
      35             : #define SPDK_VHOST_BLK_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_CONFIG) | \
      36             :                 (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))
      37             : 
      38             : #define VIRTIO_BLK_DEFAULT_TRANSPORT "vhost_user_blk"
      39             : 
      40             : struct spdk_vhost_user_blk_task {
      41             :         struct spdk_vhost_blk_task blk_task;
      42             :         struct spdk_vhost_blk_session *bvsession;
      43             :         struct spdk_vhost_virtqueue *vq;
      44             : 
      45             :         uint16_t req_idx;
      46             :         uint16_t num_descs;
      47             :         uint16_t buffer_id;
      48             :         uint16_t inflight_head;
      49             : 
      50             :         /* If set, the task is currently used for I/O processing. */
      51             :         bool used;
      52             : };
      53             : 
      54             : struct spdk_vhost_blk_dev {
      55             :         struct spdk_vhost_dev vdev;
      56             :         struct spdk_bdev *bdev;
      57             :         struct spdk_bdev_desc *bdev_desc;
      58             :         const struct spdk_virtio_blk_transport_ops *ops;
      59             : 
      60             :         bool readonly;
      61             : };
      62             : 
      63             : struct spdk_vhost_blk_session {
      64             :         /* The parent session must be the very first field in this struct */
      65             :         struct spdk_vhost_session vsession;
      66             :         struct spdk_vhost_blk_dev *bvdev;
      67             :         struct spdk_poller *requestq_poller;
      68             :         struct spdk_io_channel *io_channel;
      69             :         struct spdk_poller *stop_poller;
      70             : };
      71             : 
      72             : /* forward declaration */
      73             : static const struct spdk_vhost_dev_backend vhost_blk_device_backend;
      74             : 
      75             : static void vhost_user_blk_request_finish(uint8_t status, struct spdk_vhost_blk_task *task,
      76             :                 void *cb_arg);
      77             : 
      78             : static int
      79           0 : vhost_user_process_blk_request(struct spdk_vhost_user_blk_task *user_task)
      80             : {
      81           0 :         struct spdk_vhost_blk_session *bvsession = user_task->bvsession;
      82           0 :         struct spdk_vhost_dev *vdev = &bvsession->bvdev->vdev;
      83             : 
      84           0 :         return virtio_blk_process_request(vdev, bvsession->io_channel, &user_task->blk_task,
      85             :                                           vhost_user_blk_request_finish, NULL);
      86             : }
      87             : 
      88             : static struct spdk_vhost_blk_dev *
      89           4 : to_blk_dev(struct spdk_vhost_dev *vdev)
      90             : {
      91           4 :         if (vdev == NULL) {
      92           0 :                 return NULL;
      93             :         }
      94             : 
      95           4 :         if (vdev->backend->type != VHOST_BACKEND_BLK) {
      96           0 :                 SPDK_ERRLOG("%s: not a vhost-blk device\n", vdev->name);
      97           0 :                 return NULL;
      98             :         }
      99             : 
     100           4 :         return SPDK_CONTAINEROF(vdev, struct spdk_vhost_blk_dev, vdev);
     101             : }
     102             : 
     103             : struct spdk_bdev *
     104           0 : vhost_blk_get_bdev(struct spdk_vhost_dev *vdev)
     105             : {
     106           0 :         struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
     107             : 
     108           0 :         assert(bvdev != NULL);
     109             : 
     110           0 :         return bvdev->bdev;
     111             : }
     112             : 
     113             : static struct spdk_vhost_blk_session *
     114           0 : to_blk_session(struct spdk_vhost_session *vsession)
     115             : {
     116           0 :         assert(vsession->vdev->backend->type == VHOST_BACKEND_BLK);
     117           0 :         return (struct spdk_vhost_blk_session *)vsession;
     118             : }
     119             : 
     120             : static inline void
     121           0 : blk_task_inc_task_cnt(struct spdk_vhost_user_blk_task *task)
     122             : {
     123           0 :         task->bvsession->vsession.task_cnt++;
     124           0 : }
     125             : 
     126             : static inline void
     127           0 : blk_task_dec_task_cnt(struct spdk_vhost_user_blk_task *task)
     128             : {
     129           0 :         assert(task->bvsession->vsession.task_cnt > 0);
     130           0 :         task->bvsession->vsession.task_cnt--;
     131           0 : }
     132             : 
     133             : static void
     134           0 : blk_task_finish(struct spdk_vhost_user_blk_task *task)
     135             : {
     136           0 :         blk_task_dec_task_cnt(task);
     137           0 :         task->used = false;
     138           0 : }
     139             : 
     140             : static void
     141           0 : blk_task_init(struct spdk_vhost_user_blk_task *task)
     142             : {
     143           0 :         struct spdk_vhost_blk_task *blk_task = &task->blk_task;
     144             : 
     145           0 :         task->used = true;
     146           0 :         blk_task->iovcnt = SPDK_COUNTOF(blk_task->iovs);
     147           0 :         blk_task->status = NULL;
     148           0 :         blk_task->used_len = 0;
     149           0 :         blk_task->payload_size = 0;
     150           0 : }
     151             : 
     152             : static void
     153           0 : blk_task_enqueue(struct spdk_vhost_user_blk_task *task)
     154             : {
     155           0 :         if (task->vq->packed.packed_ring) {
     156           0 :                 vhost_vq_packed_ring_enqueue(&task->bvsession->vsession, task->vq,
     157           0 :                                              task->num_descs,
     158           0 :                                              task->buffer_id, task->blk_task.used_len,
     159           0 :                                              task->inflight_head);
     160             :         } else {
     161           0 :                 vhost_vq_used_ring_enqueue(&task->bvsession->vsession, task->vq,
     162           0 :                                            task->req_idx, task->blk_task.used_len);
     163             :         }
     164           0 : }
     165             : 
     166             : static void
     167           0 : vhost_user_blk_request_finish(uint8_t status, struct spdk_vhost_blk_task *task, void *cb_arg)
     168             : {
     169             :         struct spdk_vhost_user_blk_task *user_task;
     170             : 
     171           0 :         user_task = SPDK_CONTAINEROF(task, struct spdk_vhost_user_blk_task, blk_task);
     172             : 
     173           0 :         blk_task_enqueue(user_task);
     174             : 
     175           0 :         SPDK_DEBUGLOG(vhost_blk, "Finished task (%p) req_idx=%d\n status: %" PRIu8"\n",
     176             :                       user_task, user_task->req_idx, status);
     177           0 :         blk_task_finish(user_task);
     178           0 : }
     179             : 
     180             : static void
     181           0 : blk_request_finish(uint8_t status, struct spdk_vhost_blk_task *task)
     182             : {
     183             : 
     184           0 :         if (task->status) {
     185           0 :                 *task->status = status;
     186             :         }
     187             : 
     188           0 :         task->cb(status, task, task->cb_arg);
     189           0 : }
     190             : 
     191             : /*
     192             :  * Process task's descriptor chain and setup data related fields.
     193             :  * Return
     194             :  *   total size of supplied buffers
     195             :  *
     196             :  *   FIXME: Make this function return to rd_cnt and wr_cnt
     197             :  */
     198             : static int
     199           0 : blk_iovs_split_queue_setup(struct spdk_vhost_blk_session *bvsession,
     200             :                            struct spdk_vhost_virtqueue *vq,
     201             :                            uint16_t req_idx, struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length)
     202             : {
     203           0 :         struct spdk_vhost_session *vsession = &bvsession->vsession;
     204           0 :         struct spdk_vhost_dev *vdev = vsession->vdev;
     205           0 :         struct vring_desc *desc, *desc_table;
     206           0 :         uint16_t out_cnt = 0, cnt = 0;
     207           0 :         uint32_t desc_table_size, len = 0;
     208             :         uint32_t desc_handled_cnt;
     209             :         int rc;
     210             : 
     211           0 :         rc = vhost_vq_get_desc(vsession, vq, req_idx, &desc, &desc_table, &desc_table_size);
     212           0 :         if (rc != 0) {
     213           0 :                 SPDK_ERRLOG("%s: invalid descriptor at index %"PRIu16".\n", vdev->name, req_idx);
     214           0 :                 return -1;
     215             :         }
     216             : 
     217           0 :         desc_handled_cnt = 0;
     218             :         while (1) {
     219             :                 /*
     220             :                  * Maximum cnt reached?
     221             :                  * Should not happen if request is well formatted, otherwise this is a BUG.
     222             :                  */
     223           0 :                 if (spdk_unlikely(cnt == *iovs_cnt)) {
     224           0 :                         SPDK_DEBUGLOG(vhost_blk, "%s: max IOVs in request reached (req_idx = %"PRIu16").\n",
     225             :                                       vsession->name, req_idx);
     226           0 :                         return -1;
     227             :                 }
     228             : 
     229           0 :                 if (spdk_unlikely(vhost_vring_desc_to_iov(vsession, iovs, &cnt, desc))) {
     230           0 :                         SPDK_DEBUGLOG(vhost_blk, "%s: invalid descriptor %" PRIu16" (req_idx = %"PRIu16").\n",
     231             :                                       vsession->name, req_idx, cnt);
     232           0 :                         return -1;
     233             :                 }
     234             : 
     235           0 :                 len += desc->len;
     236             : 
     237           0 :                 out_cnt += vhost_vring_desc_is_wr(desc);
     238             : 
     239           0 :                 rc = vhost_vring_desc_get_next(&desc, desc_table, desc_table_size);
     240           0 :                 if (rc != 0) {
     241           0 :                         SPDK_ERRLOG("%s: descriptor chain at index %"PRIu16" terminated unexpectedly.\n",
     242             :                                     vsession->name, req_idx);
     243           0 :                         return -1;
     244           0 :                 } else if (desc == NULL) {
     245           0 :                         break;
     246             :                 }
     247             : 
     248           0 :                 desc_handled_cnt++;
     249           0 :                 if (spdk_unlikely(desc_handled_cnt > desc_table_size)) {
     250             :                         /* Break a cycle and report an error, if any. */
     251           0 :                         SPDK_ERRLOG("%s: found a cycle in the descriptor chain: desc_table_size = %d, desc_handled_cnt = %d.\n",
     252             :                                     vsession->name, desc_table_size, desc_handled_cnt);
     253           0 :                         return -1;
     254             :                 }
     255             :         }
     256             : 
     257             :         /*
     258             :          * There must be least two descriptors.
     259             :          * First contain request so it must be readable.
     260             :          * Last descriptor contain buffer for response so it must be writable.
     261             :          */
     262           0 :         if (spdk_unlikely(out_cnt == 0 || cnt < 2)) {
     263           0 :                 return -1;
     264             :         }
     265             : 
     266           0 :         *length = len;
     267           0 :         *iovs_cnt = cnt;
     268           0 :         return 0;
     269             : }
     270             : 
     271             : static int
     272           0 : blk_iovs_packed_desc_setup(struct spdk_vhost_session *vsession,
     273             :                            struct spdk_vhost_virtqueue *vq, uint16_t req_idx,
     274             :                            struct vring_packed_desc *desc_table, uint16_t desc_table_size,
     275             :                            struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length)
     276             : {
     277           0 :         struct vring_packed_desc *desc;
     278           0 :         uint16_t cnt = 0, out_cnt = 0;
     279           0 :         uint32_t len = 0;
     280             : 
     281           0 :         if (desc_table == NULL) {
     282           0 :                 desc = &vq->vring.desc_packed[req_idx];
     283             :         } else {
     284           0 :                 req_idx = 0;
     285           0 :                 desc = desc_table;
     286             :         }
     287             : 
     288             :         while (1) {
     289             :                 /*
     290             :                  * Maximum cnt reached?
     291             :                  * Should not happen if request is well formatted, otherwise this is a BUG.
     292             :                  */
     293           0 :                 if (spdk_unlikely(cnt == *iovs_cnt)) {
     294           0 :                         SPDK_ERRLOG("%s: max IOVs in request reached (req_idx = %"PRIu16").\n",
     295             :                                     vsession->name, req_idx);
     296           0 :                         return -EINVAL;
     297             :                 }
     298             : 
     299           0 :                 if (spdk_unlikely(vhost_vring_packed_desc_to_iov(vsession, iovs, &cnt, desc))) {
     300           0 :                         SPDK_ERRLOG("%s: invalid descriptor %" PRIu16" (req_idx = %"PRIu16").\n",
     301             :                                     vsession->name, req_idx, cnt);
     302           0 :                         return -EINVAL;
     303             :                 }
     304             : 
     305           0 :                 len += desc->len;
     306           0 :                 out_cnt += vhost_vring_packed_desc_is_wr(desc);
     307             : 
     308             :                 /* desc is NULL means we reach the last desc of this request */
     309           0 :                 vhost_vring_packed_desc_get_next(&desc, &req_idx, vq, desc_table, desc_table_size);
     310           0 :                 if (desc == NULL) {
     311           0 :                         break;
     312             :                 }
     313             :         }
     314             : 
     315             :         /*
     316             :          * There must be least two descriptors.
     317             :          * First contain request so it must be readable.
     318             :          * Last descriptor contain buffer for response so it must be writable.
     319             :          */
     320           0 :         if (spdk_unlikely(out_cnt == 0 || cnt < 2)) {
     321           0 :                 return -EINVAL;
     322             :         }
     323             : 
     324           0 :         *length = len;
     325           0 :         *iovs_cnt = cnt;
     326             : 
     327           0 :         return 0;
     328             : }
     329             : 
     330             : static int
     331           0 : blk_iovs_packed_queue_setup(struct spdk_vhost_blk_session *bvsession,
     332             :                             struct spdk_vhost_virtqueue *vq, uint16_t req_idx,
     333             :                             struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length)
     334             : {
     335           0 :         struct spdk_vhost_session *vsession = &bvsession->vsession;
     336           0 :         struct spdk_vhost_dev *vdev = vsession->vdev;
     337           0 :         struct vring_packed_desc *desc = NULL, *desc_table;
     338           0 :         uint32_t desc_table_size;
     339             :         int rc;
     340             : 
     341           0 :         rc = vhost_vq_get_desc_packed(vsession, vq, req_idx, &desc,
     342             :                                       &desc_table, &desc_table_size);
     343           0 :         if (spdk_unlikely(rc != 0)) {
     344           0 :                 SPDK_ERRLOG("%s: Invalid descriptor at index %"PRIu16".\n", vdev->name, req_idx);
     345           0 :                 return rc;
     346             :         }
     347             : 
     348           0 :         return blk_iovs_packed_desc_setup(vsession, vq, req_idx, desc_table, desc_table_size,
     349             :                                           iovs, iovs_cnt, length);
     350             : }
     351             : 
     352             : static int
     353           0 : blk_iovs_inflight_queue_setup(struct spdk_vhost_blk_session *bvsession,
     354             :                               struct spdk_vhost_virtqueue *vq, uint16_t req_idx,
     355             :                               struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length)
     356             : {
     357           0 :         struct spdk_vhost_session *vsession = &bvsession->vsession;
     358           0 :         struct spdk_vhost_dev *vdev = vsession->vdev;
     359           0 :         spdk_vhost_inflight_desc *inflight_desc;
     360           0 :         struct vring_packed_desc *desc_table;
     361           0 :         uint16_t out_cnt = 0, cnt = 0;
     362           0 :         uint32_t desc_table_size, len = 0;
     363           0 :         int rc = 0;
     364             : 
     365           0 :         rc = vhost_inflight_queue_get_desc(vsession, vq->vring_inflight.inflight_packed->desc,
     366             :                                            req_idx, &inflight_desc, &desc_table, &desc_table_size);
     367           0 :         if (spdk_unlikely(rc != 0)) {
     368           0 :                 SPDK_ERRLOG("%s: Invalid descriptor at index %"PRIu16".\n", vdev->name, req_idx);
     369           0 :                 return rc;
     370             :         }
     371             : 
     372           0 :         if (desc_table != NULL) {
     373           0 :                 return blk_iovs_packed_desc_setup(vsession, vq, req_idx, desc_table, desc_table_size,
     374             :                                                   iovs, iovs_cnt, length);
     375             :         }
     376             : 
     377             :         while (1) {
     378             :                 /*
     379             :                  * Maximum cnt reached?
     380             :                  * Should not happen if request is well formatted, otherwise this is a BUG.
     381             :                  */
     382           0 :                 if (spdk_unlikely(cnt == *iovs_cnt)) {
     383           0 :                         SPDK_ERRLOG("%s: max IOVs in request reached (req_idx = %"PRIu16").\n",
     384             :                                     vsession->name, req_idx);
     385           0 :                         return -EINVAL;
     386             :                 }
     387             : 
     388           0 :                 if (spdk_unlikely(vhost_vring_inflight_desc_to_iov(vsession, iovs, &cnt, inflight_desc))) {
     389           0 :                         SPDK_ERRLOG("%s: invalid descriptor %" PRIu16" (req_idx = %"PRIu16").\n",
     390             :                                     vsession->name, req_idx, cnt);
     391           0 :                         return -EINVAL;
     392             :                 }
     393             : 
     394           0 :                 len += inflight_desc->len;
     395           0 :                 out_cnt += vhost_vring_inflight_desc_is_wr(inflight_desc);
     396             : 
     397             :                 /* Without F_NEXT means it's the last desc */
     398           0 :                 if ((inflight_desc->flags & VRING_DESC_F_NEXT) == 0) {
     399           0 :                         break;
     400             :                 }
     401             : 
     402           0 :                 inflight_desc = &vq->vring_inflight.inflight_packed->desc[inflight_desc->next];
     403             :         }
     404             : 
     405             :         /*
     406             :          * There must be least two descriptors.
     407             :          * First contain request so it must be readable.
     408             :          * Last descriptor contain buffer for response so it must be writable.
     409             :          */
     410           0 :         if (spdk_unlikely(out_cnt == 0 || cnt < 2)) {
     411           0 :                 return -EINVAL;
     412             :         }
     413             : 
     414           0 :         *length = len;
     415           0 :         *iovs_cnt = cnt;
     416             : 
     417           0 :         return 0;
     418             : }
     419             : 
     420             : static void
     421           0 : blk_request_complete_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
     422             : {
     423           0 :         struct spdk_vhost_blk_task *task = cb_arg;
     424             : 
     425           0 :         spdk_bdev_free_io(bdev_io);
     426           0 :         blk_request_finish(success ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR, task);
     427           0 : }
     428             : 
     429             : static void
     430           0 : blk_request_resubmit(void *arg)
     431             : {
     432           0 :         struct spdk_vhost_blk_task *task = arg;
     433           0 :         int rc = 0;
     434             : 
     435           0 :         rc = virtio_blk_process_request(task->bdev_io_wait_vdev, task->bdev_io_wait_ch, task,
     436             :                                         task->cb, task->cb_arg);
     437           0 :         if (rc == 0) {
     438           0 :                 SPDK_DEBUGLOG(vhost_blk, "====== Task %p resubmitted ======\n", task);
     439             :         } else {
     440           0 :                 SPDK_DEBUGLOG(vhost_blk, "====== Task %p failed ======\n", task);
     441             :         }
     442           0 : }
     443             : 
     444             : static inline void
     445           0 : blk_request_queue_io(struct spdk_vhost_dev *vdev, struct spdk_io_channel *ch,
     446             :                      struct spdk_vhost_blk_task *task)
     447             : {
     448             :         int rc;
     449           0 :         struct spdk_bdev *bdev = vhost_blk_get_bdev(vdev);
     450             : 
     451           0 :         task->bdev_io_wait.bdev = bdev;
     452           0 :         task->bdev_io_wait.cb_fn = blk_request_resubmit;
     453           0 :         task->bdev_io_wait.cb_arg = task;
     454           0 :         task->bdev_io_wait_ch = ch;
     455           0 :         task->bdev_io_wait_vdev = vdev;
     456             : 
     457           0 :         rc = spdk_bdev_queue_io_wait(bdev, ch, &task->bdev_io_wait);
     458           0 :         if (rc != 0) {
     459           0 :                 blk_request_finish(VIRTIO_BLK_S_IOERR, task);
     460             :         }
     461           0 : }
     462             : 
     463             : int
     464           0 : virtio_blk_process_request(struct spdk_vhost_dev *vdev, struct spdk_io_channel *ch,
     465             :                            struct spdk_vhost_blk_task *task, virtio_blk_request_cb cb, void *cb_arg)
     466             : {
     467           0 :         struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
     468           0 :         struct virtio_blk_outhdr req;
     469             :         struct virtio_blk_discard_write_zeroes *desc;
     470             :         struct iovec *iov;
     471             :         uint32_t type;
     472             :         uint64_t flush_bytes;
     473             :         uint32_t payload_len;
     474             :         uint16_t iovcnt;
     475             :         int rc;
     476             : 
     477           0 :         assert(bvdev != NULL);
     478             : 
     479           0 :         task->cb = cb;
     480           0 :         task->cb_arg = cb_arg;
     481             : 
     482           0 :         iov = &task->iovs[0];
     483           0 :         if (spdk_unlikely(iov->iov_len != sizeof(req))) {
     484           0 :                 SPDK_DEBUGLOG(vhost_blk,
     485             :                               "First descriptor size is %zu but expected %zu (task = %p).\n",
     486             :                               iov->iov_len, sizeof(req), task);
     487           0 :                 blk_request_finish(VIRTIO_BLK_S_UNSUPP, task);
     488           0 :                 return -1;
     489             :         }
     490             : 
     491             :         /* Some SeaBIOS versions don't align the virtio_blk_outhdr on an 8-byte boundary, which
     492             :          * triggers ubsan errors.  So copy this small 16-byte structure to the stack to workaround
     493             :          * this problem.
     494             :          */
     495           0 :         memcpy(&req, iov->iov_base, sizeof(req));
     496             : 
     497           0 :         iov = &task->iovs[task->iovcnt - 1];
     498           0 :         if (spdk_unlikely(iov->iov_len != 1)) {
     499           0 :                 SPDK_DEBUGLOG(vhost_blk,
     500             :                               "Last descriptor size is %zu but expected %d (task = %p).\n",
     501             :                               iov->iov_len, 1, task);
     502           0 :                 blk_request_finish(VIRTIO_BLK_S_UNSUPP, task);
     503           0 :                 return -1;
     504             :         }
     505             : 
     506           0 :         payload_len = task->payload_size;
     507           0 :         task->status = iov->iov_base;
     508           0 :         payload_len -= sizeof(req) + sizeof(*task->status);
     509           0 :         iovcnt = task->iovcnt - 2;
     510             : 
     511           0 :         type = req.type;
     512             : #ifdef VIRTIO_BLK_T_BARRIER
     513             :         /* Don't care about barrier for now (as QEMU's virtio-blk do). */
     514           0 :         type &= ~VIRTIO_BLK_T_BARRIER;
     515             : #endif
     516             : 
     517           0 :         switch (type) {
     518           0 :         case VIRTIO_BLK_T_IN:
     519             :         case VIRTIO_BLK_T_OUT:
     520           0 :                 if (spdk_unlikely(payload_len == 0 || (payload_len & (512 - 1)) != 0)) {
     521           0 :                         SPDK_ERRLOG("%s - passed IO buffer is not multiple of 512b (task = %p).\n",
     522             :                                     type ? "WRITE" : "READ", task);
     523           0 :                         blk_request_finish(VIRTIO_BLK_S_UNSUPP, task);
     524           0 :                         return -1;
     525             :                 }
     526             : 
     527           0 :                 if (type == VIRTIO_BLK_T_IN) {
     528           0 :                         task->used_len = payload_len + sizeof(*task->status);
     529           0 :                         rc = spdk_bdev_readv(bvdev->bdev_desc, ch,
     530           0 :                                              &task->iovs[1], iovcnt, req.sector * 512,
     531             :                                              payload_len, blk_request_complete_cb, task);
     532           0 :                 } else if (!bvdev->readonly) {
     533           0 :                         task->used_len = sizeof(*task->status);
     534           0 :                         rc = spdk_bdev_writev(bvdev->bdev_desc, ch,
     535           0 :                                               &task->iovs[1], iovcnt, req.sector * 512,
     536             :                                               payload_len, blk_request_complete_cb, task);
     537             :                 } else {
     538           0 :                         SPDK_DEBUGLOG(vhost_blk, "Device is in read-only mode!\n");
     539           0 :                         rc = -1;
     540             :                 }
     541             : 
     542           0 :                 if (rc) {
     543           0 :                         if (rc == -ENOMEM) {
     544           0 :                                 SPDK_DEBUGLOG(vhost_blk, "No memory, start to queue io.\n");
     545           0 :                                 blk_request_queue_io(vdev, ch, task);
     546             :                         } else {
     547           0 :                                 blk_request_finish(VIRTIO_BLK_S_IOERR, task);
     548           0 :                                 return -1;
     549             :                         }
     550             :                 }
     551           0 :                 break;
     552           0 :         case VIRTIO_BLK_T_DISCARD:
     553           0 :                 desc = task->iovs[1].iov_base;
     554           0 :                 if (payload_len != sizeof(*desc)) {
     555           0 :                         SPDK_NOTICELOG("Invalid discard payload size: %u\n", payload_len);
     556           0 :                         blk_request_finish(VIRTIO_BLK_S_IOERR, task);
     557           0 :                         return -1;
     558             :                 }
     559             : 
     560           0 :                 if (desc->flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
     561           0 :                         SPDK_ERRLOG("UNMAP flag is only used for WRITE ZEROES command\n");
     562           0 :                         blk_request_finish(VIRTIO_BLK_S_UNSUPP, task);
     563           0 :                         return -1;
     564             :                 }
     565             : 
     566           0 :                 rc = spdk_bdev_unmap(bvdev->bdev_desc, ch,
     567           0 :                                      desc->sector * 512, desc->num_sectors * 512,
     568             :                                      blk_request_complete_cb, task);
     569           0 :                 if (rc) {
     570           0 :                         if (rc == -ENOMEM) {
     571           0 :                                 SPDK_DEBUGLOG(vhost_blk, "No memory, start to queue io.\n");
     572           0 :                                 blk_request_queue_io(vdev, ch, task);
     573             :                         } else {
     574           0 :                                 blk_request_finish(VIRTIO_BLK_S_IOERR, task);
     575           0 :                                 return -1;
     576             :                         }
     577             :                 }
     578           0 :                 break;
     579           0 :         case VIRTIO_BLK_T_WRITE_ZEROES:
     580           0 :                 desc = task->iovs[1].iov_base;
     581           0 :                 if (payload_len != sizeof(*desc)) {
     582           0 :                         SPDK_NOTICELOG("Invalid write zeroes payload size: %u\n", payload_len);
     583           0 :                         blk_request_finish(VIRTIO_BLK_S_IOERR, task);
     584           0 :                         return -1;
     585             :                 }
     586             : 
     587             :                 /* Unmap this range, SPDK doesn't support it, kernel will enable this flag by default
     588             :                  * without checking unmap feature is negotiated or not, the flag isn't mandatory, so
     589             :                  * just print a warning.
     590             :                  */
     591           0 :                 if (desc->flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
     592           0 :                         SPDK_WARNLOG("Ignore the unmap flag for WRITE ZEROES from %"PRIx64", len %"PRIx64"\n",
     593             :                                      (uint64_t)desc->sector * 512, (uint64_t)desc->num_sectors * 512);
     594             :                 }
     595             : 
     596           0 :                 rc = spdk_bdev_write_zeroes(bvdev->bdev_desc, ch,
     597           0 :                                             desc->sector * 512, desc->num_sectors * 512,
     598             :                                             blk_request_complete_cb, task);
     599           0 :                 if (rc) {
     600           0 :                         if (rc == -ENOMEM) {
     601           0 :                                 SPDK_DEBUGLOG(vhost_blk, "No memory, start to queue io.\n");
     602           0 :                                 blk_request_queue_io(vdev, ch, task);
     603             :                         } else {
     604           0 :                                 blk_request_finish(VIRTIO_BLK_S_IOERR, task);
     605           0 :                                 return -1;
     606             :                         }
     607             :                 }
     608           0 :                 break;
     609           0 :         case VIRTIO_BLK_T_FLUSH:
     610           0 :                 flush_bytes = spdk_bdev_get_num_blocks(bvdev->bdev) * spdk_bdev_get_block_size(bvdev->bdev);
     611           0 :                 if (req.sector != 0) {
     612           0 :                         SPDK_NOTICELOG("sector must be zero for flush command\n");
     613           0 :                         blk_request_finish(VIRTIO_BLK_S_IOERR, task);
     614           0 :                         return -1;
     615             :                 }
     616           0 :                 rc = spdk_bdev_flush(bvdev->bdev_desc, ch,
     617             :                                      0, flush_bytes,
     618             :                                      blk_request_complete_cb, task);
     619           0 :                 if (rc) {
     620           0 :                         if (rc == -ENOMEM) {
     621           0 :                                 SPDK_DEBUGLOG(vhost_blk, "No memory, start to queue io.\n");
     622           0 :                                 blk_request_queue_io(vdev, ch, task);
     623           0 :                         } else if (rc == -ENOTSUP) {
     624           0 :                                 blk_request_finish(VIRTIO_BLK_S_UNSUPP, task);
     625           0 :                                 return -1;
     626             :                         } else {
     627           0 :                                 blk_request_finish(VIRTIO_BLK_S_IOERR, task);
     628           0 :                                 return -1;
     629             :                         }
     630             :                 }
     631           0 :                 break;
     632           0 :         case VIRTIO_BLK_T_GET_ID:
     633           0 :                 if (!iovcnt || !payload_len) {
     634           0 :                         blk_request_finish(VIRTIO_BLK_S_UNSUPP, task);
     635           0 :                         return -1;
     636             :                 }
     637           0 :                 task->used_len = spdk_min((size_t)VIRTIO_BLK_ID_BYTES, task->iovs[1].iov_len);
     638           0 :                 spdk_strcpy_pad(task->iovs[1].iov_base, spdk_bdev_get_name(bvdev->bdev),
     639           0 :                                 task->used_len, ' ');
     640           0 :                 blk_request_finish(VIRTIO_BLK_S_OK, task);
     641           0 :                 break;
     642           0 :         default:
     643           0 :                 SPDK_DEBUGLOG(vhost_blk, "Not supported request type '%"PRIu32"'.\n", type);
     644           0 :                 blk_request_finish(VIRTIO_BLK_S_UNSUPP, task);
     645           0 :                 return -1;
     646             :         }
     647             : 
     648           0 :         return 0;
     649             : }
     650             : 
     651             : static void
     652           0 : process_blk_task(struct spdk_vhost_virtqueue *vq, uint16_t req_idx)
     653             : {
     654             :         struct spdk_vhost_user_blk_task *task;
     655             :         struct spdk_vhost_blk_task *blk_task;
     656             :         int rc;
     657             : 
     658           0 :         assert(vq->packed.packed_ring == false);
     659             : 
     660           0 :         task = &((struct spdk_vhost_user_blk_task *)vq->tasks)[req_idx];
     661           0 :         blk_task = &task->blk_task;
     662           0 :         if (spdk_unlikely(task->used)) {
     663           0 :                 SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n",
     664             :                             task->bvsession->vsession.name, req_idx);
     665           0 :                 blk_task->used_len = 0;
     666           0 :                 blk_task_enqueue(task);
     667           0 :                 return;
     668             :         }
     669             : 
     670           0 :         blk_task_inc_task_cnt(task);
     671             : 
     672           0 :         blk_task_init(task);
     673             : 
     674           0 :         rc = blk_iovs_split_queue_setup(task->bvsession, vq, task->req_idx,
     675           0 :                                         blk_task->iovs, &blk_task->iovcnt, &blk_task->payload_size);
     676             : 
     677           0 :         if (rc) {
     678           0 :                 SPDK_DEBUGLOG(vhost_blk, "Invalid request (req_idx = %"PRIu16").\n", task->req_idx);
     679             :                 /* Only READ and WRITE are supported for now. */
     680           0 :                 vhost_user_blk_request_finish(VIRTIO_BLK_S_UNSUPP, blk_task, NULL);
     681           0 :                 return;
     682             :         }
     683             : 
     684           0 :         if (vhost_user_process_blk_request(task) == 0) {
     685           0 :                 SPDK_DEBUGLOG(vhost_blk, "====== Task %p req_idx %d submitted ======\n", task,
     686             :                               req_idx);
     687             :         } else {
     688           0 :                 SPDK_ERRLOG("====== Task %p req_idx %d failed ======\n", task, req_idx);
     689             :         }
     690             : }
     691             : 
     692             : static void
     693           0 : process_packed_blk_task(struct spdk_vhost_virtqueue *vq, uint16_t req_idx)
     694             : {
     695             :         struct spdk_vhost_user_blk_task *task;
     696             :         struct spdk_vhost_blk_task *blk_task;
     697           0 :         uint16_t task_idx = req_idx, num_descs;
     698             :         int rc;
     699             : 
     700           0 :         assert(vq->packed.packed_ring);
     701             : 
     702             :         /* Packed ring used the buffer_id as the task_idx to get task struct.
     703             :          * In kernel driver, it uses the vq->free_head to set the buffer_id so the value
     704             :          * must be in the range of 0 ~ vring.size. The free_head value must be unique
     705             :          * in the outstanding requests.
     706             :          * We can't use the req_idx as the task_idx because the desc can be reused in
     707             :          * the next phase even when it's not completed in the previous phase. For example,
     708             :          * At phase 0, last_used_idx was 2 and desc0 was not completed.Then after moving
     709             :          * phase 1, last_avail_idx is updated to 1. In this case, req_idx can not be used
     710             :          * as task_idx because we will know task[0]->used is true at phase 1.
     711             :          * The split queue is quite different, the desc would insert into the free list when
     712             :          * device completes the request, the driver gets the desc from the free list which
     713             :          * ensures the req_idx is unique in the outstanding requests.
     714             :          */
     715           0 :         task_idx = vhost_vring_packed_desc_get_buffer_id(vq, req_idx, &num_descs);
     716             : 
     717           0 :         task = &((struct spdk_vhost_user_blk_task *)vq->tasks)[task_idx];
     718           0 :         blk_task = &task->blk_task;
     719           0 :         if (spdk_unlikely(task->used)) {
     720           0 :                 SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n",
     721             :                             task->bvsession->vsession.name, task_idx);
     722           0 :                 blk_task->used_len = 0;
     723           0 :                 blk_task_enqueue(task);
     724           0 :                 return;
     725             :         }
     726             : 
     727           0 :         task->req_idx = req_idx;
     728           0 :         task->num_descs = num_descs;
     729           0 :         task->buffer_id = task_idx;
     730             : 
     731           0 :         rte_vhost_set_inflight_desc_packed(task->bvsession->vsession.vid, vq->vring_idx,
     732           0 :                                            req_idx, (req_idx + num_descs - 1) % vq->vring.size,
     733             :                                            &task->inflight_head);
     734             : 
     735           0 :         blk_task_inc_task_cnt(task);
     736             : 
     737           0 :         blk_task_init(task);
     738             : 
     739           0 :         rc = blk_iovs_packed_queue_setup(task->bvsession, vq, task->req_idx, blk_task->iovs,
     740             :                                          &blk_task->iovcnt,
     741             :                                          &blk_task->payload_size);
     742           0 :         if (rc) {
     743           0 :                 SPDK_DEBUGLOG(vhost_blk, "Invalid request (req_idx = %"PRIu16").\n", task->req_idx);
     744             :                 /* Only READ and WRITE are supported for now. */
     745           0 :                 vhost_user_blk_request_finish(VIRTIO_BLK_S_UNSUPP, blk_task, NULL);
     746           0 :                 return;
     747             :         }
     748             : 
     749           0 :         if (vhost_user_process_blk_request(task) == 0) {
     750           0 :                 SPDK_DEBUGLOG(vhost_blk, "====== Task %p req_idx %d submitted ======\n", task,
     751             :                               task_idx);
     752             :         } else {
     753           0 :                 SPDK_ERRLOG("====== Task %p req_idx %d failed ======\n", task, task_idx);
     754             :         }
     755             : }
     756             : 
     757             : static void
     758           0 : process_packed_inflight_blk_task(struct spdk_vhost_virtqueue *vq,
     759             :                                  uint16_t req_idx)
     760             : {
     761           0 :         spdk_vhost_inflight_desc *desc_array = vq->vring_inflight.inflight_packed->desc;
     762           0 :         spdk_vhost_inflight_desc *desc = &desc_array[req_idx];
     763             :         struct spdk_vhost_user_blk_task *task;
     764             :         struct spdk_vhost_blk_task *blk_task;
     765             :         uint16_t task_idx, num_descs;
     766             :         int rc;
     767             : 
     768           0 :         task_idx = desc_array[desc->last].id;
     769           0 :         num_descs = desc->num;
     770             :         /* In packed ring reconnection, we use the last_used_idx as the
     771             :          * initial value. So when we process the inflight descs we still
     772             :          * need to update the available ring index.
     773             :          */
     774           0 :         vq->last_avail_idx += num_descs;
     775           0 :         if (vq->last_avail_idx >= vq->vring.size) {
     776           0 :                 vq->last_avail_idx -= vq->vring.size;
     777           0 :                 vq->packed.avail_phase = !vq->packed.avail_phase;
     778             :         }
     779             : 
     780           0 :         task = &((struct spdk_vhost_user_blk_task *)vq->tasks)[task_idx];
     781           0 :         blk_task = &task->blk_task;
     782           0 :         if (spdk_unlikely(task->used)) {
     783           0 :                 SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n",
     784             :                             task->bvsession->vsession.name, task_idx);
     785           0 :                 blk_task->used_len = 0;
     786           0 :                 blk_task_enqueue(task);
     787           0 :                 return;
     788             :         }
     789             : 
     790           0 :         task->req_idx = req_idx;
     791           0 :         task->num_descs = num_descs;
     792           0 :         task->buffer_id = task_idx;
     793             :         /* It's for cleaning inflight entries */
     794           0 :         task->inflight_head = req_idx;
     795             : 
     796           0 :         blk_task_inc_task_cnt(task);
     797             : 
     798           0 :         blk_task_init(task);
     799             : 
     800           0 :         rc = blk_iovs_inflight_queue_setup(task->bvsession, vq, task->req_idx, blk_task->iovs,
     801             :                                            &blk_task->iovcnt,
     802             :                                            &blk_task->payload_size);
     803           0 :         if (rc) {
     804           0 :                 SPDK_DEBUGLOG(vhost_blk, "Invalid request (req_idx = %"PRIu16").\n", task->req_idx);
     805             :                 /* Only READ and WRITE are supported for now. */
     806           0 :                 vhost_user_blk_request_finish(VIRTIO_BLK_S_UNSUPP, blk_task, NULL);
     807           0 :                 return;
     808             :         }
     809             : 
     810           0 :         if (vhost_user_process_blk_request(task) == 0) {
     811           0 :                 SPDK_DEBUGLOG(vhost_blk, "====== Task %p req_idx %d submitted ======\n", task,
     812             :                               task_idx);
     813             :         } else {
     814           0 :                 SPDK_ERRLOG("====== Task %p req_idx %d failed ======\n", task, task_idx);
     815             :         }
     816             : }
     817             : 
     818             : static int
     819           0 : submit_inflight_desc(struct spdk_vhost_blk_session *bvsession,
     820             :                      struct spdk_vhost_virtqueue *vq)
     821             : {
     822             :         struct spdk_vhost_session *vsession;
     823             :         spdk_vhost_resubmit_info *resubmit;
     824             :         spdk_vhost_resubmit_desc *resubmit_list;
     825             :         uint16_t req_idx;
     826             :         int i, resubmit_cnt;
     827             : 
     828           0 :         resubmit = vq->vring_inflight.resubmit_inflight;
     829           0 :         if (spdk_likely(resubmit == NULL || resubmit->resubmit_list == NULL ||
     830             :                         resubmit->resubmit_num == 0)) {
     831           0 :                 return 0;
     832             :         }
     833             : 
     834           0 :         resubmit_list = resubmit->resubmit_list;
     835           0 :         vsession = &bvsession->vsession;
     836             : 
     837           0 :         for (i = resubmit->resubmit_num - 1; i >= 0; --i) {
     838           0 :                 req_idx = resubmit_list[i].index;
     839           0 :                 SPDK_DEBUGLOG(vhost_blk, "====== Start processing resubmit request idx %"PRIu16"======\n",
     840             :                               req_idx);
     841             : 
     842           0 :                 if (spdk_unlikely(req_idx >= vq->vring.size)) {
     843           0 :                         SPDK_ERRLOG("%s: request idx '%"PRIu16"' exceeds virtqueue size (%"PRIu16").\n",
     844             :                                     vsession->name, req_idx, vq->vring.size);
     845           0 :                         vhost_vq_used_ring_enqueue(vsession, vq, req_idx, 0);
     846           0 :                         continue;
     847             :                 }
     848             : 
     849           0 :                 if (vq->packed.packed_ring) {
     850           0 :                         process_packed_inflight_blk_task(vq, req_idx);
     851             :                 } else {
     852           0 :                         process_blk_task(vq, req_idx);
     853             :                 }
     854             :         }
     855           0 :         resubmit_cnt = resubmit->resubmit_num;
     856           0 :         resubmit->resubmit_num = 0;
     857           0 :         return resubmit_cnt;
     858             : }
     859             : 
     860             : static int
     861           0 : process_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq)
     862             : {
     863           0 :         struct spdk_vhost_session *vsession = &bvsession->vsession;
     864           0 :         uint16_t reqs[SPDK_VHOST_VQ_MAX_SUBMISSIONS];
     865             :         uint16_t reqs_cnt, i;
     866           0 :         int resubmit_cnt = 0;
     867             : 
     868           0 :         resubmit_cnt = submit_inflight_desc(bvsession, vq);
     869             : 
     870           0 :         reqs_cnt = vhost_vq_avail_ring_get(vq, reqs, SPDK_COUNTOF(reqs));
     871           0 :         if (!reqs_cnt) {
     872           0 :                 return resubmit_cnt;
     873             :         }
     874             : 
     875           0 :         for (i = 0; i < reqs_cnt; i++) {
     876           0 :                 SPDK_DEBUGLOG(vhost_blk, "====== Starting processing request idx %"PRIu16"======\n",
     877             :                               reqs[i]);
     878             : 
     879           0 :                 if (spdk_unlikely(reqs[i] >= vq->vring.size)) {
     880           0 :                         SPDK_ERRLOG("%s: request idx '%"PRIu16"' exceeds virtqueue size (%"PRIu16").\n",
     881             :                                     vsession->name, reqs[i], vq->vring.size);
     882           0 :                         vhost_vq_used_ring_enqueue(vsession, vq, reqs[i], 0);
     883           0 :                         continue;
     884             :                 }
     885             : 
     886           0 :                 rte_vhost_set_inflight_desc_split(vsession->vid, vq->vring_idx, reqs[i]);
     887             : 
     888           0 :                 process_blk_task(vq, reqs[i]);
     889             :         }
     890             : 
     891           0 :         return reqs_cnt;
     892             : }
     893             : 
     894             : static int
     895           0 : process_packed_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq)
     896             : {
     897           0 :         uint16_t i = 0;
     898           0 :         uint16_t count = 0;
     899           0 :         int resubmit_cnt = 0;
     900             : 
     901           0 :         resubmit_cnt = submit_inflight_desc(bvsession, vq);
     902             : 
     903           0 :         while (i++ < SPDK_VHOST_VQ_MAX_SUBMISSIONS &&
     904           0 :                vhost_vq_packed_ring_is_avail(vq)) {
     905           0 :                 SPDK_DEBUGLOG(vhost_blk, "====== Starting processing request idx %"PRIu16"======\n",
     906             :                               vq->last_avail_idx);
     907           0 :                 count++;
     908           0 :                 process_packed_blk_task(vq, vq->last_avail_idx);
     909             :         }
     910             : 
     911           0 :         return count > 0 ? count : resubmit_cnt;
     912             : }
     913             : 
     914             : static int
     915           0 : _vdev_vq_worker(struct spdk_vhost_virtqueue *vq)
     916             : {
     917           0 :         struct spdk_vhost_session *vsession = vq->vsession;
     918           0 :         struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession);
     919             :         bool packed_ring;
     920           0 :         int rc = 0;
     921             : 
     922           0 :         packed_ring = vq->packed.packed_ring;
     923           0 :         if (packed_ring) {
     924           0 :                 rc = process_packed_vq(bvsession, vq);
     925             :         } else {
     926           0 :                 rc = process_vq(bvsession, vq);
     927             :         }
     928             : 
     929           0 :         vhost_session_vq_used_signal(vq);
     930             : 
     931           0 :         return rc;
     932             : 
     933             : }
     934             : 
     935             : static int
     936           0 : vdev_vq_worker(void *arg)
     937             : {
     938           0 :         struct spdk_vhost_virtqueue *vq = arg;
     939             : 
     940           0 :         return _vdev_vq_worker(vq);
     941             : }
     942             : 
     943             : static int
     944           0 : vdev_worker(void *arg)
     945             : {
     946           0 :         struct spdk_vhost_blk_session *bvsession = arg;
     947           0 :         struct spdk_vhost_session *vsession = &bvsession->vsession;
     948             :         uint16_t q_idx;
     949           0 :         int rc = 0;
     950             : 
     951           0 :         for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) {
     952           0 :                 rc += _vdev_vq_worker(&vsession->virtqueue[q_idx]);
     953             :         }
     954             : 
     955           0 :         return rc > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
     956             : }
     957             : 
     958             : static void
     959           0 : no_bdev_process_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq)
     960             : {
     961           0 :         struct spdk_vhost_session *vsession = &bvsession->vsession;
     962           0 :         struct iovec iovs[SPDK_VHOST_IOVS_MAX];
     963           0 :         uint32_t length;
     964           0 :         uint16_t iovcnt, req_idx;
     965             : 
     966           0 :         if (vhost_vq_avail_ring_get(vq, &req_idx, 1) != 1) {
     967           0 :                 return;
     968             :         }
     969             : 
     970           0 :         iovcnt = SPDK_COUNTOF(iovs);
     971           0 :         if (blk_iovs_split_queue_setup(bvsession, vq, req_idx, iovs, &iovcnt, &length) == 0) {
     972           0 :                 *(volatile uint8_t *)iovs[iovcnt - 1].iov_base = VIRTIO_BLK_S_IOERR;
     973           0 :                 SPDK_DEBUGLOG(vhost_blk_data, "Aborting request %" PRIu16"\n", req_idx);
     974             :         }
     975             : 
     976           0 :         vhost_vq_used_ring_enqueue(vsession, vq, req_idx, 0);
     977             : }
     978             : 
     979             : static void
     980           0 : no_bdev_process_packed_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq)
     981             : {
     982           0 :         struct spdk_vhost_session *vsession = &bvsession->vsession;
     983             :         struct spdk_vhost_user_blk_task *task;
     984             :         struct spdk_vhost_blk_task *blk_task;
     985           0 :         uint32_t length;
     986           0 :         uint16_t req_idx = vq->last_avail_idx;
     987           0 :         uint16_t task_idx, num_descs;
     988             : 
     989           0 :         if (!vhost_vq_packed_ring_is_avail(vq)) {
     990           0 :                 return;
     991             :         }
     992             : 
     993           0 :         task_idx = vhost_vring_packed_desc_get_buffer_id(vq, req_idx, &num_descs);
     994           0 :         task = &((struct spdk_vhost_user_blk_task *)vq->tasks)[task_idx];
     995           0 :         blk_task = &task->blk_task;
     996           0 :         if (spdk_unlikely(task->used)) {
     997           0 :                 SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n",
     998             :                             vsession->name, req_idx);
     999           0 :                 vhost_vq_packed_ring_enqueue(vsession, vq, num_descs,
    1000           0 :                                              task->buffer_id, blk_task->used_len,
    1001           0 :                                              task->inflight_head);
    1002           0 :                 return;
    1003             :         }
    1004             : 
    1005           0 :         task->req_idx = req_idx;
    1006           0 :         task->num_descs = num_descs;
    1007           0 :         task->buffer_id = task_idx;
    1008           0 :         blk_task_init(task);
    1009             : 
    1010           0 :         if (blk_iovs_packed_queue_setup(bvsession, vq, task->req_idx, blk_task->iovs, &blk_task->iovcnt,
    1011             :                                         &length)) {
    1012           0 :                 *(volatile uint8_t *)(blk_task->iovs[blk_task->iovcnt - 1].iov_base) = VIRTIO_BLK_S_IOERR;
    1013           0 :                 SPDK_DEBUGLOG(vhost_blk_data, "Aborting request %" PRIu16"\n", req_idx);
    1014             :         }
    1015             : 
    1016           0 :         task->used = false;
    1017           0 :         vhost_vq_packed_ring_enqueue(vsession, vq, num_descs,
    1018           0 :                                      task->buffer_id, blk_task->used_len,
    1019           0 :                                      task->inflight_head);
    1020             : }
    1021             : 
    1022             : static int
    1023           0 : _no_bdev_vdev_vq_worker(struct spdk_vhost_virtqueue *vq)
    1024             : {
    1025           0 :         struct spdk_vhost_session *vsession = vq->vsession;
    1026           0 :         struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession);
    1027             :         bool packed_ring;
    1028             : 
    1029           0 :         packed_ring = vq->packed.packed_ring;
    1030           0 :         if (packed_ring) {
    1031           0 :                 no_bdev_process_packed_vq(bvsession, vq);
    1032             :         } else {
    1033           0 :                 no_bdev_process_vq(bvsession, vq);
    1034             :         }
    1035             : 
    1036           0 :         vhost_session_vq_used_signal(vq);
    1037             : 
    1038           0 :         if (vsession->task_cnt == 0 && bvsession->io_channel) {
    1039           0 :                 vhost_blk_put_io_channel(bvsession->io_channel);
    1040           0 :                 bvsession->io_channel = NULL;
    1041             :         }
    1042             : 
    1043           0 :         return SPDK_POLLER_BUSY;
    1044             : }
    1045             : 
    1046             : static int
    1047           0 : no_bdev_vdev_vq_worker(void *arg)
    1048             : {
    1049           0 :         struct spdk_vhost_virtqueue *vq = arg;
    1050             : 
    1051           0 :         return _no_bdev_vdev_vq_worker(vq);
    1052             : }
    1053             : 
    1054             : static int
    1055           0 : no_bdev_vdev_worker(void *arg)
    1056             : {
    1057           0 :         struct spdk_vhost_blk_session *bvsession = arg;
    1058           0 :         struct spdk_vhost_session *vsession = &bvsession->vsession;
    1059             :         uint16_t q_idx;
    1060             : 
    1061           0 :         for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) {
    1062           0 :                 _no_bdev_vdev_vq_worker(&vsession->virtqueue[q_idx]);
    1063             :         }
    1064             : 
    1065           0 :         return SPDK_POLLER_BUSY;
    1066             : }
    1067             : 
    1068             : static void
    1069           0 : vhost_blk_session_unregister_interrupts(struct spdk_vhost_blk_session *bvsession)
    1070             : {
    1071           0 :         struct spdk_vhost_session *vsession = &bvsession->vsession;
    1072             :         struct spdk_vhost_virtqueue *vq;
    1073             :         int i;
    1074             : 
    1075           0 :         SPDK_DEBUGLOG(vhost_blk, "unregister virtqueues interrupt\n");
    1076           0 :         for (i = 0; i < vsession->max_queues; i++) {
    1077           0 :                 vq = &vsession->virtqueue[i];
    1078           0 :                 if (vq->intr == NULL) {
    1079           0 :                         break;
    1080             :                 }
    1081             : 
    1082           0 :                 SPDK_DEBUGLOG(vhost_blk, "unregister vq[%d]'s kickfd is %d\n",
    1083             :                               i, vq->vring.kickfd);
    1084           0 :                 spdk_interrupt_unregister(&vq->intr);
    1085             :         }
    1086           0 : }
    1087             : 
    1088             : static void
    1089           0 : _vhost_blk_vq_register_interrupt(void *arg)
    1090             : {
    1091           0 :         struct spdk_vhost_virtqueue *vq = arg;
    1092           0 :         struct spdk_vhost_session *vsession = vq->vsession;
    1093           0 :         struct spdk_vhost_blk_dev *bvdev =  to_blk_dev(vsession->vdev);
    1094             : 
    1095           0 :         assert(bvdev != NULL);
    1096             : 
    1097           0 :         if (bvdev->bdev) {
    1098           0 :                 vq->intr = spdk_interrupt_register(vq->vring.kickfd, vdev_vq_worker, vq, "vdev_vq_worker");
    1099             :         } else {
    1100           0 :                 vq->intr = spdk_interrupt_register(vq->vring.kickfd, no_bdev_vdev_vq_worker, vq,
    1101             :                                                    "no_bdev_vdev_vq_worker");
    1102             :         }
    1103             : 
    1104           0 :         if (vq->intr == NULL) {
    1105           0 :                 SPDK_ERRLOG("Fail to register req notifier handler.\n");
    1106           0 :                 assert(false);
    1107             :         }
    1108           0 : }
    1109             : 
    1110             : static int
    1111           0 : vhost_blk_vq_enable(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *vq)
    1112             : {
    1113           0 :         if (spdk_interrupt_mode_is_enabled()) {
    1114           0 :                 spdk_thread_send_msg(vsession->vdev->thread, _vhost_blk_vq_register_interrupt, vq);
    1115             :         }
    1116             : 
    1117           0 :         return 0;
    1118             : }
    1119             : 
    1120             : static int
    1121           0 : vhost_blk_session_register_no_bdev_interrupts(struct spdk_vhost_blk_session *bvsession)
    1122             : {
    1123           0 :         struct spdk_vhost_session *vsession = &bvsession->vsession;
    1124           0 :         struct spdk_vhost_virtqueue *vq = NULL;
    1125             :         int i;
    1126             : 
    1127           0 :         SPDK_DEBUGLOG(vhost_blk, "Register virtqueues interrupt\n");
    1128           0 :         for (i = 0; i < vsession->max_queues; i++) {
    1129           0 :                 vq = &vsession->virtqueue[i];
    1130           0 :                 SPDK_DEBUGLOG(vhost_blk, "Register vq[%d]'s kickfd is %d\n",
    1131             :                               i, vq->vring.kickfd);
    1132           0 :                 vq->intr = spdk_interrupt_register(vq->vring.kickfd, no_bdev_vdev_vq_worker, vq,
    1133             :                                                    "no_bdev_vdev_vq_worker");
    1134           0 :                 if (vq->intr == NULL) {
    1135           0 :                         goto err;
    1136             :                 }
    1137             : 
    1138             :         }
    1139             : 
    1140           0 :         return 0;
    1141             : 
    1142           0 : err:
    1143           0 :         vhost_blk_session_unregister_interrupts(bvsession);
    1144           0 :         return -1;
    1145             : }
    1146             : 
    1147             : static void
    1148           0 : vhost_blk_poller_set_interrupt_mode(struct spdk_poller *poller, void *cb_arg, bool interrupt_mode)
    1149             : {
    1150           0 :         struct spdk_vhost_blk_session *bvsession = cb_arg;
    1151             : 
    1152           0 :         vhost_user_session_set_interrupt_mode(&bvsession->vsession, interrupt_mode);
    1153           0 : }
    1154             : 
    1155             : static void
    1156           0 : bdev_event_cpl_cb(struct spdk_vhost_dev *vdev, void *ctx)
    1157             : {
    1158           0 :         enum spdk_bdev_event_type type = (enum spdk_bdev_event_type)(uintptr_t)ctx;
    1159             :         struct spdk_vhost_blk_dev *bvdev;
    1160             : 
    1161           0 :         if (type == SPDK_BDEV_EVENT_REMOVE) {
    1162             :                 /* All sessions have been notified, time to close the bdev */
    1163           0 :                 bvdev = to_blk_dev(vdev);
    1164           0 :                 assert(bvdev != NULL);
    1165           0 :                 spdk_bdev_close(bvdev->bdev_desc);
    1166           0 :                 bvdev->bdev_desc = NULL;
    1167           0 :                 bvdev->bdev = NULL;
    1168             :         }
    1169           0 : }
    1170             : 
    1171             : static int
    1172           0 : vhost_session_bdev_resize_cb(struct spdk_vhost_dev *vdev,
    1173             :                              struct spdk_vhost_session *vsession,
    1174             :                              void *ctx)
    1175             : {
    1176           0 :         SPDK_NOTICELOG("bdev send slave msg to vid(%d)\n", vsession->vid);
    1177             : #if RTE_VERSION >= RTE_VERSION_NUM(23, 03, 0, 0)
    1178           0 :         rte_vhost_backend_config_change(vsession->vid, false);
    1179             : #else
    1180             :         rte_vhost_slave_config_change(vsession->vid, false);
    1181             : #endif
    1182             : 
    1183           0 :         return 0;
    1184             : }
    1185             : 
    1186             : static void
    1187           0 : vhost_user_blk_resize_cb(struct spdk_vhost_dev *vdev, bdev_event_cb_complete cb, void *cb_arg)
    1188             : {
    1189           0 :         vhost_user_dev_foreach_session(vdev, vhost_session_bdev_resize_cb,
    1190             :                                        cb, cb_arg);
    1191           0 : }
    1192             : 
    1193             : static int
    1194           0 : vhost_user_session_bdev_remove_cb(struct spdk_vhost_dev *vdev,
    1195             :                                   struct spdk_vhost_session *vsession,
    1196             :                                   void *ctx)
    1197             : {
    1198             :         struct spdk_vhost_blk_session *bvsession;
    1199             :         int rc;
    1200             : 
    1201           0 :         bvsession = to_blk_session(vsession);
    1202           0 :         if (bvsession->requestq_poller) {
    1203           0 :                 spdk_poller_unregister(&bvsession->requestq_poller);
    1204           0 :                 if (spdk_interrupt_mode_is_enabled()) {
    1205           0 :                         vhost_blk_session_unregister_interrupts(bvsession);
    1206           0 :                         rc = vhost_blk_session_register_no_bdev_interrupts(bvsession);
    1207           0 :                         if (rc) {
    1208           0 :                                 SPDK_ERRLOG("%s: Interrupt register failed\n", vsession->name);
    1209           0 :                                 return rc;
    1210             :                         }
    1211             :                 }
    1212             : 
    1213           0 :                 bvsession->requestq_poller = SPDK_POLLER_REGISTER(no_bdev_vdev_worker, bvsession, 0);
    1214           0 :                 spdk_poller_register_interrupt(bvsession->requestq_poller, vhost_blk_poller_set_interrupt_mode,
    1215             :                                                bvsession);
    1216             :         }
    1217             : 
    1218           0 :         return 0;
    1219             : }
    1220             : 
    1221             : static void
    1222           0 : vhost_user_bdev_remove_cb(struct spdk_vhost_dev *vdev, bdev_event_cb_complete cb, void *cb_arg)
    1223             : {
    1224           0 :         SPDK_WARNLOG("%s: hot-removing bdev - all further requests will fail.\n",
    1225             :                      vdev->name);
    1226             : 
    1227           0 :         vhost_user_dev_foreach_session(vdev, vhost_user_session_bdev_remove_cb,
    1228             :                                        cb, cb_arg);
    1229           0 : }
    1230             : 
    1231             : static void
    1232           0 : vhost_user_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_vhost_dev *vdev,
    1233             :                          bdev_event_cb_complete cb, void *cb_arg)
    1234             : {
    1235           0 :         switch (type) {
    1236           0 :         case SPDK_BDEV_EVENT_REMOVE:
    1237           0 :                 vhost_user_bdev_remove_cb(vdev, cb, cb_arg);
    1238           0 :                 break;
    1239           0 :         case SPDK_BDEV_EVENT_RESIZE:
    1240           0 :                 vhost_user_blk_resize_cb(vdev, cb, cb_arg);
    1241           0 :                 break;
    1242           0 :         default:
    1243           0 :                 assert(false);
    1244             :                 return;
    1245             :         }
    1246             : }
    1247             : 
    1248             : static void
    1249           0 : bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev,
    1250             :               void *event_ctx)
    1251             : {
    1252           0 :         struct spdk_vhost_dev *vdev = (struct spdk_vhost_dev *)event_ctx;
    1253           0 :         struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
    1254             : 
    1255           0 :         assert(bvdev != NULL);
    1256             : 
    1257           0 :         SPDK_DEBUGLOG(vhost_blk, "Bdev event: type %d, name %s\n",
    1258             :                       type,
    1259             :                       bdev->name);
    1260             : 
    1261           0 :         switch (type) {
    1262           0 :         case SPDK_BDEV_EVENT_REMOVE:
    1263             :         case SPDK_BDEV_EVENT_RESIZE:
    1264           0 :                 bvdev->ops->bdev_event(type, vdev, bdev_event_cpl_cb, (void *)type);
    1265           0 :                 break;
    1266           0 :         default:
    1267           0 :                 SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type);
    1268           0 :                 break;
    1269             :         }
    1270           0 : }
    1271             : 
    1272             : static void
    1273           0 : free_task_pool(struct spdk_vhost_blk_session *bvsession)
    1274             : {
    1275           0 :         struct spdk_vhost_session *vsession = &bvsession->vsession;
    1276             :         struct spdk_vhost_virtqueue *vq;
    1277             :         uint16_t i;
    1278             : 
    1279           0 :         for (i = 0; i < vsession->max_queues; i++) {
    1280           0 :                 vq = &vsession->virtqueue[i];
    1281           0 :                 if (vq->tasks == NULL) {
    1282           0 :                         continue;
    1283             :                 }
    1284             : 
    1285           0 :                 spdk_free(vq->tasks);
    1286           0 :                 vq->tasks = NULL;
    1287             :         }
    1288           0 : }
    1289             : 
    1290             : static int
    1291           0 : alloc_vq_task_pool(struct spdk_vhost_session *vsession, uint16_t qid)
    1292             : {
    1293           0 :         struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession);
    1294             :         struct spdk_vhost_virtqueue *vq;
    1295             :         struct spdk_vhost_user_blk_task *task;
    1296             :         uint32_t task_cnt;
    1297             :         uint32_t j;
    1298             : 
    1299           0 :         if (qid >= SPDK_VHOST_MAX_VQUEUES) {
    1300           0 :                 return -EINVAL;
    1301             :         }
    1302             : 
    1303           0 :         vq = &vsession->virtqueue[qid];
    1304           0 :         if (vq->vring.desc == NULL) {
    1305           0 :                 return 0;
    1306             :         }
    1307             : 
    1308           0 :         task_cnt = vq->vring.size;
    1309           0 :         if (task_cnt > SPDK_VHOST_MAX_VQ_SIZE) {
    1310             :                 /* sanity check */
    1311           0 :                 SPDK_ERRLOG("%s: virtqueue %"PRIu16" is too big. (size = %"PRIu32", max = %"PRIu32")\n",
    1312             :                             vsession->name, qid, task_cnt, SPDK_VHOST_MAX_VQ_SIZE);
    1313           0 :                 return -1;
    1314             :         }
    1315           0 :         vq->tasks = spdk_zmalloc(sizeof(struct spdk_vhost_user_blk_task) * task_cnt,
    1316             :                                  SPDK_CACHE_LINE_SIZE, NULL,
    1317             :                                  SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
    1318           0 :         if (vq->tasks == NULL) {
    1319           0 :                 SPDK_ERRLOG("%s: failed to allocate %"PRIu32" tasks for virtqueue %"PRIu16"\n",
    1320             :                             vsession->name, task_cnt, qid);
    1321           0 :                 return -1;
    1322             :         }
    1323             : 
    1324           0 :         for (j = 0; j < task_cnt; j++) {
    1325           0 :                 task = &((struct spdk_vhost_user_blk_task *)vq->tasks)[j];
    1326           0 :                 task->bvsession = bvsession;
    1327           0 :                 task->req_idx = j;
    1328           0 :                 task->vq = vq;
    1329             :         }
    1330             : 
    1331           0 :         return 0;
    1332             : }
    1333             : 
    1334             : static int
    1335           0 : vhost_blk_start(struct spdk_vhost_dev *vdev,
    1336             :                 struct spdk_vhost_session *vsession, void *unused)
    1337             : {
    1338           0 :         struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession);
    1339             :         struct spdk_vhost_blk_dev *bvdev;
    1340             :         int i;
    1341             : 
    1342             :         /* return if start is already in progress */
    1343           0 :         if (bvsession->requestq_poller) {
    1344           0 :                 SPDK_INFOLOG(vhost, "%s: start in progress\n", vsession->name);
    1345           0 :                 return -EINPROGRESS;
    1346             :         }
    1347             : 
    1348             :         /* validate all I/O queues are in a contiguous index range */
    1349           0 :         for (i = 0; i < vsession->max_queues; i++) {
    1350             :                 /* vring.desc and vring.desc_packed are in a union struct
    1351             :                  * so q->vring.desc can replace q->vring.desc_packed.
    1352             :                  */
    1353           0 :                 if (vsession->virtqueue[i].vring.desc == NULL) {
    1354           0 :                         SPDK_ERRLOG("%s: queue %"PRIu32" is empty\n", vsession->name, i);
    1355           0 :                         return -1;
    1356             :                 }
    1357             :         }
    1358             : 
    1359           0 :         bvdev = to_blk_dev(vdev);
    1360           0 :         assert(bvdev != NULL);
    1361           0 :         bvsession->bvdev = bvdev;
    1362             : 
    1363           0 :         if (bvdev->bdev) {
    1364           0 :                 bvsession->io_channel = vhost_blk_get_io_channel(vdev);
    1365           0 :                 if (!bvsession->io_channel) {
    1366           0 :                         free_task_pool(bvsession);
    1367           0 :                         SPDK_ERRLOG("%s: I/O channel allocation failed\n", vsession->name);
    1368           0 :                         return -1;
    1369             :                 }
    1370             :         }
    1371             : 
    1372           0 :         if (bvdev->bdev) {
    1373           0 :                 bvsession->requestq_poller = SPDK_POLLER_REGISTER(vdev_worker, bvsession, 0);
    1374             :         } else {
    1375           0 :                 bvsession->requestq_poller = SPDK_POLLER_REGISTER(no_bdev_vdev_worker, bvsession, 0);
    1376             :         }
    1377           0 :         SPDK_INFOLOG(vhost, "%s: started poller on lcore %d\n",
    1378             :                      vsession->name, spdk_env_get_current_core());
    1379             : 
    1380           0 :         spdk_poller_register_interrupt(bvsession->requestq_poller, vhost_blk_poller_set_interrupt_mode,
    1381             :                                        bvsession);
    1382             : 
    1383           0 :         return 0;
    1384             : }
    1385             : 
    1386             : static int
    1387           0 : destroy_session_poller_cb(void *arg)
    1388             : {
    1389           0 :         struct spdk_vhost_blk_session *bvsession = arg;
    1390           0 :         struct spdk_vhost_session *vsession = &bvsession->vsession;
    1391           0 :         struct spdk_vhost_user_dev *user_dev = to_user_dev(vsession->vdev);
    1392             :         int i;
    1393             : 
    1394           0 :         if (vsession->task_cnt > 0 || (pthread_mutex_trylock(&user_dev->lock) != 0)) {
    1395           0 :                 assert(vsession->stop_retry_count > 0);
    1396           0 :                 vsession->stop_retry_count--;
    1397           0 :                 if (vsession->stop_retry_count == 0) {
    1398           0 :                         SPDK_ERRLOG("%s: Timedout when destroy session (task_cnt %d)\n", vsession->name,
    1399             :                                     vsession->task_cnt);
    1400           0 :                         spdk_poller_unregister(&bvsession->stop_poller);
    1401           0 :                         vhost_user_session_stop_done(vsession, -ETIMEDOUT);
    1402             :                 }
    1403             : 
    1404           0 :                 return SPDK_POLLER_BUSY;
    1405             :         }
    1406             : 
    1407           0 :         for (i = 0; i < vsession->max_queues; i++) {
    1408           0 :                 vsession->virtqueue[i].next_event_time = 0;
    1409           0 :                 vhost_vq_used_signal(vsession, &vsession->virtqueue[i]);
    1410             :         }
    1411             : 
    1412           0 :         SPDK_INFOLOG(vhost, "%s: stopping poller on lcore %d\n",
    1413             :                      vsession->name, spdk_env_get_current_core());
    1414             : 
    1415           0 :         if (bvsession->io_channel) {
    1416           0 :                 vhost_blk_put_io_channel(bvsession->io_channel);
    1417           0 :                 bvsession->io_channel = NULL;
    1418             :         }
    1419             : 
    1420           0 :         free_task_pool(bvsession);
    1421           0 :         spdk_poller_unregister(&bvsession->stop_poller);
    1422           0 :         vhost_user_session_stop_done(vsession, 0);
    1423             : 
    1424           0 :         pthread_mutex_unlock(&user_dev->lock);
    1425           0 :         return SPDK_POLLER_BUSY;
    1426             : }
    1427             : 
    1428             : static int
    1429           0 : vhost_blk_stop(struct spdk_vhost_dev *vdev,
    1430             :                struct spdk_vhost_session *vsession, void *unused)
    1431             : {
    1432           0 :         struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession);
    1433             : 
    1434             :         /* return if stop is already in progress */
    1435           0 :         if (bvsession->stop_poller) {
    1436           0 :                 return -EINPROGRESS;
    1437             :         }
    1438             : 
    1439           0 :         spdk_poller_unregister(&bvsession->requestq_poller);
    1440           0 :         vhost_blk_session_unregister_interrupts(bvsession);
    1441             : 
    1442           0 :         bvsession->vsession.stop_retry_count = (SPDK_VHOST_SESSION_STOP_RETRY_TIMEOUT_IN_SEC * 1000 *
    1443             :                                                 1000) / SPDK_VHOST_SESSION_STOP_RETRY_PERIOD_IN_US;
    1444           0 :         bvsession->stop_poller = SPDK_POLLER_REGISTER(destroy_session_poller_cb,
    1445             :                                  bvsession, SPDK_VHOST_SESSION_STOP_RETRY_PERIOD_IN_US);
    1446           0 :         return 0;
    1447             : }
    1448             : 
    1449             : static void
    1450           0 : vhost_blk_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w)
    1451             : {
    1452             :         struct spdk_vhost_blk_dev *bvdev;
    1453             : 
    1454           0 :         bvdev = to_blk_dev(vdev);
    1455           0 :         assert(bvdev != NULL);
    1456             : 
    1457           0 :         spdk_json_write_named_object_begin(w, "block");
    1458             : 
    1459           0 :         spdk_json_write_named_bool(w, "readonly", bvdev->readonly);
    1460             : 
    1461           0 :         spdk_json_write_name(w, "bdev");
    1462           0 :         if (bvdev->bdev) {
    1463           0 :                 spdk_json_write_string(w, spdk_bdev_get_name(bvdev->bdev));
    1464             :         } else {
    1465           0 :                 spdk_json_write_null(w);
    1466             :         }
    1467           0 :         spdk_json_write_named_string(w, "transport", bvdev->ops->name);
    1468             : 
    1469           0 :         spdk_json_write_object_end(w);
    1470           0 : }
    1471             : 
    1472             : static void
    1473           0 : vhost_blk_write_config_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w)
    1474             : {
    1475             :         struct spdk_vhost_blk_dev *bvdev;
    1476             : 
    1477           0 :         bvdev = to_blk_dev(vdev);
    1478           0 :         assert(bvdev != NULL);
    1479             : 
    1480           0 :         if (!bvdev->bdev) {
    1481           0 :                 return;
    1482             :         }
    1483             : 
    1484           0 :         spdk_json_write_object_begin(w);
    1485           0 :         spdk_json_write_named_string(w, "method", "vhost_create_blk_controller");
    1486             : 
    1487           0 :         spdk_json_write_named_object_begin(w, "params");
    1488           0 :         spdk_json_write_named_string(w, "ctrlr", vdev->name);
    1489           0 :         spdk_json_write_named_string(w, "dev_name", spdk_bdev_get_name(bvdev->bdev));
    1490           0 :         spdk_json_write_named_string(w, "cpumask",
    1491             :                                      spdk_cpuset_fmt(spdk_thread_get_cpumask(vdev->thread)));
    1492           0 :         spdk_json_write_named_bool(w, "readonly", bvdev->readonly);
    1493           0 :         spdk_json_write_named_string(w, "transport", bvdev->ops->name);
    1494           0 :         spdk_json_write_object_end(w);
    1495             : 
    1496           0 :         spdk_json_write_object_end(w);
    1497             : }
    1498             : 
    1499             : static int vhost_blk_destroy(struct spdk_vhost_dev *dev);
    1500             : 
    1501             : static int
    1502           0 : vhost_blk_get_config(struct spdk_vhost_dev *vdev, uint8_t *config,
    1503             :                      uint32_t len)
    1504             : {
    1505           0 :         struct virtio_blk_config blkcfg;
    1506             :         struct spdk_bdev *bdev;
    1507             :         uint32_t blk_size;
    1508             :         uint64_t blkcnt;
    1509             : 
    1510           0 :         memset(&blkcfg, 0, sizeof(blkcfg));
    1511           0 :         bdev = vhost_blk_get_bdev(vdev);
    1512           0 :         if (bdev == NULL) {
    1513             :                 /* We can't just return -1 here as this GET_CONFIG message might
    1514             :                  * be caused by a QEMU VM reboot. Returning -1 will indicate an
    1515             :                  * error to QEMU, who might then decide to terminate itself.
    1516             :                  * We don't want that. A simple reboot shouldn't break the system.
    1517             :                  *
    1518             :                  * Presenting a block device with block size 0 and block count 0
    1519             :                  * doesn't cause any problems on QEMU side and the virtio-pci
    1520             :                  * device is even still available inside the VM, but there will
    1521             :                  * be no block device created for it - the kernel drivers will
    1522             :                  * silently reject it.
    1523             :                  */
    1524           0 :                 blk_size = 0;
    1525           0 :                 blkcnt = 0;
    1526             :         } else {
    1527           0 :                 blk_size = spdk_bdev_get_block_size(bdev);
    1528           0 :                 blkcnt = spdk_bdev_get_num_blocks(bdev);
    1529           0 :                 if (spdk_bdev_get_buf_align(bdev) > 1) {
    1530           0 :                         blkcfg.size_max = SPDK_BDEV_LARGE_BUF_MAX_SIZE;
    1531           0 :                         blkcfg.seg_max = spdk_min(SPDK_VHOST_IOVS_MAX - 2 - 1, SPDK_BDEV_IO_NUM_CHILD_IOV - 2 - 1);
    1532             :                 } else {
    1533           0 :                         blkcfg.size_max = 131072;
    1534             :                         /*  -2 for REQ and RESP and -1 for region boundary splitting */
    1535           0 :                         blkcfg.seg_max = SPDK_VHOST_IOVS_MAX - 2 - 1;
    1536             :                 }
    1537             :         }
    1538             : 
    1539           0 :         blkcfg.blk_size = blk_size;
    1540             :         /* minimum I/O size in blocks */
    1541           0 :         blkcfg.min_io_size = 1;
    1542             :         /* expressed in 512 Bytes sectors */
    1543           0 :         blkcfg.capacity = (blkcnt * blk_size) / 512;
    1544             :         /* QEMU can overwrite this value when started */
    1545           0 :         blkcfg.num_queues = SPDK_VHOST_MAX_VQUEUES;
    1546             : 
    1547           0 :         if (bdev && spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) {
    1548             :                 /* 16MiB, expressed in 512 Bytes */
    1549           0 :                 blkcfg.max_discard_sectors = 32768;
    1550           0 :                 blkcfg.max_discard_seg = 1;
    1551           0 :                 blkcfg.discard_sector_alignment = blk_size / 512;
    1552             :         }
    1553           0 :         if (bdev && spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) {
    1554           0 :                 blkcfg.max_write_zeroes_sectors = 32768;
    1555           0 :                 blkcfg.max_write_zeroes_seg = 1;
    1556             :         }
    1557             : 
    1558           0 :         memcpy(config, &blkcfg, spdk_min(len, sizeof(blkcfg)));
    1559             : 
    1560           0 :         return 0;
    1561             : }
    1562             : 
    1563             : static int
    1564           0 : vhost_blk_set_coalescing(struct spdk_vhost_dev *vdev, uint32_t delay_base_us,
    1565             :                          uint32_t iops_threshold)
    1566             : {
    1567           0 :         struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
    1568             : 
    1569           0 :         assert(bvdev != NULL);
    1570             : 
    1571           0 :         return bvdev->ops->set_coalescing(vdev, delay_base_us, iops_threshold);
    1572             : }
    1573             : 
    1574             : static void
    1575           0 : vhost_blk_get_coalescing(struct spdk_vhost_dev *vdev, uint32_t *delay_base_us,
    1576             :                          uint32_t *iops_threshold)
    1577             : {
    1578           0 :         struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
    1579             : 
    1580           0 :         assert(bvdev != NULL);
    1581             : 
    1582           0 :         bvdev->ops->get_coalescing(vdev, delay_base_us, iops_threshold);
    1583           0 : }
    1584             : 
    1585             : static const struct spdk_vhost_user_dev_backend vhost_blk_user_device_backend = {
    1586             :         .session_ctx_size = sizeof(struct spdk_vhost_blk_session) - sizeof(struct spdk_vhost_session),
    1587             :         .start_session =  vhost_blk_start,
    1588             :         .stop_session = vhost_blk_stop,
    1589             :         .alloc_vq_tasks = alloc_vq_task_pool,
    1590             :         .enable_vq = vhost_blk_vq_enable,
    1591             : };
    1592             : 
    1593             : static const struct spdk_vhost_dev_backend vhost_blk_device_backend = {
    1594             :         .type = VHOST_BACKEND_BLK,
    1595             :         .vhost_get_config = vhost_blk_get_config,
    1596             :         .dump_info_json = vhost_blk_dump_info_json,
    1597             :         .write_config_json = vhost_blk_write_config_json,
    1598             :         .remove_device = vhost_blk_destroy,
    1599             :         .set_coalescing = vhost_blk_set_coalescing,
    1600             :         .get_coalescing = vhost_blk_get_coalescing,
    1601             : };
    1602             : 
    1603             : int
    1604           1 : virtio_blk_construct_ctrlr(struct spdk_vhost_dev *vdev, const char *address,
    1605             :                            struct spdk_cpuset *cpumask, const struct spdk_json_val *params,
    1606             :                            const struct spdk_vhost_user_dev_backend *user_backend)
    1607             : {
    1608           1 :         struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
    1609             : 
    1610           1 :         assert(bvdev != NULL);
    1611             : 
    1612           1 :         return bvdev->ops->create_ctrlr(vdev, cpumask, address, params, (void *)user_backend);
    1613             : }
    1614             : 
    1615             : int
    1616           1 : spdk_vhost_blk_construct(const char *name, const char *cpumask, const char *dev_name,
    1617             :                          const char *transport, const struct spdk_json_val *params)
    1618             : {
    1619           1 :         struct spdk_vhost_blk_dev *bvdev = NULL;
    1620             :         struct spdk_vhost_dev *vdev;
    1621             :         struct spdk_bdev *bdev;
    1622           1 :         const char *transport_name = VIRTIO_BLK_DEFAULT_TRANSPORT;
    1623           1 :         int ret = 0;
    1624             : 
    1625           1 :         bvdev = calloc(1, sizeof(*bvdev));
    1626           1 :         if (bvdev == NULL) {
    1627           0 :                 ret = -ENOMEM;
    1628           0 :                 goto out;
    1629             :         }
    1630             : 
    1631           1 :         if (transport != NULL) {
    1632           0 :                 transport_name = transport;
    1633             :         }
    1634             : 
    1635           1 :         bvdev->ops = virtio_blk_get_transport_ops(transport_name);
    1636           1 :         if (!bvdev->ops) {
    1637           0 :                 ret = -EINVAL;
    1638           0 :                 SPDK_ERRLOG("Transport type '%s' unavailable.\n", transport_name);
    1639           0 :                 goto out;
    1640             :         }
    1641             : 
    1642           1 :         ret = spdk_bdev_open_ext(dev_name, true, bdev_event_cb, bvdev, &bvdev->bdev_desc);
    1643           1 :         if (ret != 0) {
    1644           0 :                 SPDK_ERRLOG("%s: could not open bdev '%s', error=%d\n",
    1645             :                             name, dev_name, ret);
    1646           0 :                 goto out;
    1647             :         }
    1648           1 :         bdev = spdk_bdev_desc_get_bdev(bvdev->bdev_desc);
    1649             : 
    1650           1 :         vdev = &bvdev->vdev;
    1651           1 :         vdev->virtio_features = SPDK_VHOST_BLK_FEATURES_BASE;
    1652           1 :         vdev->disabled_features = SPDK_VHOST_BLK_DISABLED_FEATURES;
    1653           1 :         vdev->protocol_features = SPDK_VHOST_BLK_PROTOCOL_FEATURES;
    1654             : 
    1655           1 :         if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) {
    1656           1 :                 vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_DISCARD);
    1657             :         }
    1658           1 :         if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) {
    1659           1 :                 vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_WRITE_ZEROES);
    1660             :         }
    1661             : 
    1662           1 :         if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH)) {
    1663           1 :                 vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_FLUSH);
    1664             :         }
    1665             : 
    1666           1 :         bvdev->bdev = bdev;
    1667           1 :         bvdev->readonly = false;
    1668           1 :         ret = vhost_dev_register(vdev, name, cpumask, params, &vhost_blk_device_backend,
    1669             :                                  &vhost_blk_user_device_backend, false);
    1670           1 :         if (ret != 0) {
    1671           0 :                 spdk_bdev_close(bvdev->bdev_desc);
    1672           0 :                 goto out;
    1673             :         }
    1674             : 
    1675           1 :         SPDK_INFOLOG(vhost, "%s: using bdev '%s'\n", name, dev_name);
    1676           1 : out:
    1677           1 :         if (ret != 0 && bvdev) {
    1678           0 :                 free(bvdev);
    1679             :         }
    1680           1 :         return ret;
    1681             : }
    1682             : 
    1683             : int
    1684           1 : virtio_blk_destroy_ctrlr(struct spdk_vhost_dev *vdev)
    1685             : {
    1686           1 :         struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
    1687             : 
    1688           1 :         assert(bvdev != NULL);
    1689             : 
    1690           1 :         return bvdev->ops->destroy_ctrlr(vdev);
    1691             : }
    1692             : 
    1693             : static int
    1694           1 : vhost_blk_destroy(struct spdk_vhost_dev *vdev)
    1695             : {
    1696           1 :         struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
    1697             :         int rc;
    1698             : 
    1699           1 :         assert(bvdev != NULL);
    1700             : 
    1701           1 :         rc = vhost_dev_unregister(&bvdev->vdev);
    1702           1 :         if (rc != 0) {
    1703           0 :                 return rc;
    1704             :         }
    1705             : 
    1706           1 :         if (bvdev->bdev_desc) {
    1707           0 :                 spdk_bdev_close(bvdev->bdev_desc);
    1708           0 :                 bvdev->bdev_desc = NULL;
    1709             :         }
    1710           1 :         bvdev->bdev = NULL;
    1711             : 
    1712           1 :         free(bvdev);
    1713           1 :         return 0;
    1714             : }
    1715             : 
    1716             : struct spdk_io_channel *
    1717           0 : vhost_blk_get_io_channel(struct spdk_vhost_dev *vdev)
    1718             : {
    1719           0 :         struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
    1720             : 
    1721           0 :         assert(bvdev != NULL);
    1722             : 
    1723           0 :         return spdk_bdev_get_io_channel(bvdev->bdev_desc);
    1724             : }
    1725             : 
    1726             : void
    1727           0 : vhost_blk_put_io_channel(struct spdk_io_channel *ch)
    1728             : {
    1729           0 :         spdk_put_io_channel(ch);
    1730           0 : }
    1731             : 
    1732             : static struct spdk_virtio_blk_transport *
    1733           1 : vhost_user_blk_create(const struct spdk_json_val *params)
    1734             : {
    1735             :         int ret;
    1736             :         struct spdk_virtio_blk_transport *vhost_user_blk;
    1737             : 
    1738           1 :         vhost_user_blk = calloc(1, sizeof(*vhost_user_blk));
    1739           1 :         if (!vhost_user_blk) {
    1740           0 :                 return NULL;
    1741             :         }
    1742             : 
    1743           1 :         ret = vhost_user_init();
    1744           1 :         if (ret != 0) {
    1745           0 :                 free(vhost_user_blk);
    1746           0 :                 return NULL;
    1747             :         }
    1748             : 
    1749           1 :         return vhost_user_blk;
    1750             : }
    1751             : 
    1752             : static int
    1753           1 : vhost_user_blk_destroy(struct spdk_virtio_blk_transport *transport,
    1754             :                        spdk_vhost_fini_cb cb_fn)
    1755             : {
    1756           1 :         vhost_user_fini(cb_fn);
    1757           1 :         free(transport);
    1758           1 :         return 0;
    1759             : }
    1760             : 
    1761             : struct rpc_vhost_blk {
    1762             :         bool readonly;
    1763             :         bool packed_ring;
    1764             : };
    1765             : 
    1766             : static const struct spdk_json_object_decoder rpc_construct_vhost_blk[] = {
    1767             :         {"readonly", offsetof(struct rpc_vhost_blk, readonly), spdk_json_decode_bool, true},
    1768             :         {"packed_ring", offsetof(struct rpc_vhost_blk, packed_ring), spdk_json_decode_bool, true},
    1769             : };
    1770             : 
    1771             : static int
    1772           1 : vhost_user_blk_create_ctrlr(struct spdk_vhost_dev *vdev, struct spdk_cpuset *cpumask,
    1773             :                             const char *address, const struct spdk_json_val *params, void *custom_opts)
    1774             : {
    1775           1 :         struct rpc_vhost_blk req = {0};
    1776           1 :         struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
    1777             : 
    1778           1 :         assert(bvdev != NULL);
    1779             : 
    1780           1 :         if (spdk_json_decode_object_relaxed(params, rpc_construct_vhost_blk,
    1781             :                                             SPDK_COUNTOF(rpc_construct_vhost_blk),
    1782             :                                             &req)) {
    1783           0 :                 SPDK_DEBUGLOG(vhost_blk, "spdk_json_decode_object failed\n");
    1784           0 :                 return -EINVAL;
    1785             :         }
    1786             : 
    1787           1 :         if (req.packed_ring) {
    1788           0 :                 vdev->virtio_features |= (uint64_t)req.packed_ring << VIRTIO_F_RING_PACKED;
    1789             :         }
    1790           1 :         if (req.readonly) {
    1791           0 :                 vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_RO);
    1792           0 :                 bvdev->readonly = req.readonly;
    1793             :         }
    1794             : 
    1795           1 :         return vhost_user_dev_create(vdev, address, cpumask, custom_opts, false);
    1796             : }
    1797             : 
    1798             : static int
    1799           1 : vhost_user_blk_destroy_ctrlr(struct spdk_vhost_dev *vdev)
    1800             : {
    1801           1 :         return vhost_user_dev_unregister(vdev);
    1802             : }
    1803             : 
    1804             : static void
    1805           0 : vhost_user_blk_dump_opts(struct spdk_virtio_blk_transport *transport, struct spdk_json_write_ctx *w)
    1806             : {
    1807           0 :         assert(w != NULL);
    1808             : 
    1809           0 :         spdk_json_write_named_string(w, "name", transport->ops->name);
    1810           0 : }
    1811             : 
    1812             : static const struct spdk_virtio_blk_transport_ops vhost_user_blk = {
    1813             :         .name = "vhost_user_blk",
    1814             : 
    1815             :         .dump_opts = vhost_user_blk_dump_opts,
    1816             : 
    1817             :         .create = vhost_user_blk_create,
    1818             :         .destroy = vhost_user_blk_destroy,
    1819             : 
    1820             :         .create_ctrlr = vhost_user_blk_create_ctrlr,
    1821             :         .destroy_ctrlr = vhost_user_blk_destroy_ctrlr,
    1822             : 
    1823             :         .bdev_event = vhost_user_bdev_event_cb,
    1824             :         .set_coalescing = vhost_user_set_coalescing,
    1825             :         .get_coalescing = vhost_user_get_coalescing,
    1826             : };
    1827             : 
    1828           1 : SPDK_VIRTIO_BLK_TRANSPORT_REGISTER(vhost_user_blk, &vhost_user_blk);
    1829             : 
    1830           1 : SPDK_LOG_REGISTER_COMPONENT(vhost_blk)
    1831           1 : SPDK_LOG_REGISTER_COMPONENT(vhost_blk_data)

Generated by: LCOV version 1.15