LCOV - code coverage report
Current view: top level - lib/vhost - vhost_blk.c (source / functions) Hit Total Coverage
Test: ut_cov_unit.info Lines: 74 986 7.5 %
Date: 2024-07-13 09:25:29 Functions: 12 78 15.4 %

          Line data    Source code
       1             : /*   SPDX-License-Identifier: BSD-3-Clause
       2             :  *   Copyright (C) 2017 Intel Corporation. All rights reserved.
       3             :  *   All rights reserved.
       4             :  */
       5             : 
       6             : #include <linux/virtio_blk.h>
       7             : 
       8             : #include "spdk/env.h"
       9             : #include "spdk/bdev.h"
      10             : #include "spdk/bdev_module.h"
      11             : #include "spdk/thread.h"
      12             : #include "spdk/likely.h"
      13             : #include "spdk/string.h"
      14             : #include "spdk/util.h"
      15             : #include "spdk/vhost.h"
      16             : #include "spdk/json.h"
      17             : 
      18             : #include "vhost_internal.h"
      19             : #include <rte_version.h>
      20             : 
      21             : /* Minimal set of features supported by every SPDK VHOST-BLK device */
      22             : #define SPDK_VHOST_BLK_FEATURES_BASE (SPDK_VHOST_FEATURES | \
      23             :                 (1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
      24             :                 (1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
      25             :                 (1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER)  | \
      26             :                 (1ULL << VIRTIO_BLK_F_SCSI)     | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
      27             :                 (1ULL << VIRTIO_BLK_F_MQ))
      28             : 
      29             : /* Not supported features */
      30             : #define SPDK_VHOST_BLK_DISABLED_FEATURES (SPDK_VHOST_DISABLED_FEATURES | \
      31             :                 (1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
      32             :                 (1ULL << VIRTIO_BLK_F_BARRIER)  | (1ULL << VIRTIO_BLK_F_SCSI))
      33             : 
      34             : /* Vhost-blk support protocol features */
      35             : #define SPDK_VHOST_BLK_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_CONFIG) | \
      36             :                 (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))
      37             : 
      38             : #define VIRTIO_BLK_DEFAULT_TRANSPORT "vhost_user_blk"
      39             : 
      40             : struct spdk_vhost_user_blk_task {
      41             :         struct spdk_vhost_blk_task blk_task;
      42             :         struct spdk_vhost_blk_session *bvsession;
      43             :         struct spdk_vhost_virtqueue *vq;
      44             : 
      45             :         uint16_t req_idx;
      46             :         uint16_t num_descs;
      47             :         uint16_t buffer_id;
      48             :         uint16_t inflight_head;
      49             : 
      50             :         /* If set, the task is currently used for I/O processing. */
      51             :         bool used;
      52             : };
      53             : 
      54             : struct spdk_vhost_blk_dev {
      55             :         struct spdk_vhost_dev vdev;
      56             :         struct spdk_bdev *bdev;
      57             :         struct spdk_bdev_desc *bdev_desc;
      58             :         const struct spdk_virtio_blk_transport_ops *ops;
      59             : 
      60             :         bool readonly;
      61             :         /* Next poll group index to be assigned */
      62             :         uint32_t next_pg_index;
      63             : };
      64             : 
      65             : struct vhost_user_pg_vq_info {
      66             :         struct vhost_user_poll_group *pg;
      67             :         struct spdk_vhost_virtqueue *vq;
      68             :         struct spdk_vhost_session *vsession;
      69             : 
      70             :         TAILQ_ENTRY(vhost_user_pg_vq_info) link;
      71             : };
      72             : 
      73             : struct vhost_user_poll_group {
      74             :         struct spdk_vhost_dev *vdev;
      75             :         struct spdk_vhost_session *vsession;
      76             : 
      77             :         struct spdk_thread *thread;
      78             :         struct spdk_poller *requestq_poller;
      79             :         struct spdk_io_channel *io_channel;
      80             : 
      81             :         int task_cnt;
      82             : 
      83             :         TAILQ_HEAD(, vhost_user_pg_vq_info) vqs;
      84             : 
      85             :         struct spdk_poller *stop_poller;
      86             :         uint32_t stop_retry_count;
      87             : };
      88             : 
      89             : struct spdk_vhost_blk_session {
      90             :         /* The parent session must be the very first field in this struct */
      91             :         struct spdk_vhost_session vsession;
      92             :         struct spdk_vhost_blk_dev *bvdev;
      93             :         struct spdk_poller *stop_poller;
      94             : 
      95             :         struct spdk_thread *thread;
      96             :         struct vhost_user_poll_group *poll_groups;
      97             :         uint32_t num_poll_groups;
      98             : 
      99             :         uint32_t num_stopped_poll_groups;
     100             : };
     101             : 
     102             : /* forward declaration */
     103             : static const struct spdk_vhost_dev_backend vhost_blk_device_backend;
     104             : 
     105             : static void vhost_user_blk_request_finish(uint8_t status, struct spdk_vhost_blk_task *task,
     106             :                 void *cb_arg);
     107             : 
     108             : static void session_stop_poll_groups(struct spdk_vhost_blk_session *bvsession);
     109             : 
     110             : static int
     111           0 : vhost_user_process_blk_request(struct spdk_vhost_user_blk_task *user_task)
     112             : {
     113           0 :         struct spdk_vhost_blk_session *bvsession = user_task->bvsession;
     114           0 :         struct spdk_vhost_dev *vdev = &bvsession->bvdev->vdev;
     115           0 :         struct vhost_user_poll_group *pg = (struct vhost_user_poll_group *)user_task->vq->poll_group;
     116             : 
     117           0 :         return virtio_blk_process_request(vdev, pg->io_channel, &user_task->blk_task,
     118             :                                           vhost_user_blk_request_finish, NULL);
     119             : }
     120             : 
     121             : static struct spdk_vhost_blk_dev *
     122           4 : to_blk_dev(struct spdk_vhost_dev *vdev)
     123             : {
     124           4 :         if (vdev == NULL) {
     125           0 :                 return NULL;
     126             :         }
     127             : 
     128           4 :         if (vdev->backend->type != VHOST_BACKEND_BLK) {
     129           0 :                 SPDK_ERRLOG("%s: not a vhost-blk device\n", vdev->name);
     130           0 :                 return NULL;
     131             :         }
     132             : 
     133           4 :         return SPDK_CONTAINEROF(vdev, struct spdk_vhost_blk_dev, vdev);
     134             : }
     135             : 
     136             : struct spdk_bdev *
     137           0 : vhost_blk_get_bdev(struct spdk_vhost_dev *vdev)
     138             : {
     139           0 :         struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
     140             : 
     141           0 :         assert(bvdev != NULL);
     142             : 
     143           0 :         return bvdev->bdev;
     144             : }
     145             : 
     146             : static struct spdk_vhost_blk_session *
     147           0 : to_blk_session(struct spdk_vhost_session *vsession)
     148             : {
     149           0 :         assert(vsession->vdev->backend->type == VHOST_BACKEND_BLK);
     150           0 :         return (struct spdk_vhost_blk_session *)vsession;
     151             : }
     152             : 
     153             : static inline void
     154           0 : blk_task_inc_task_cnt(struct spdk_vhost_user_blk_task *task)
     155             : {
     156           0 :         struct spdk_vhost_virtqueue *vq = task->vq;
     157           0 :         struct vhost_user_poll_group *pg = (struct vhost_user_poll_group *)vq->poll_group;
     158             : 
     159           0 :         pg->task_cnt++;
     160           0 : }
     161             : 
     162             : static inline void
     163           0 : blk_task_dec_task_cnt(struct spdk_vhost_user_blk_task *task)
     164             : {
     165           0 :         struct spdk_vhost_virtqueue *vq = task->vq;
     166           0 :         struct vhost_user_poll_group *pg = (struct vhost_user_poll_group *)vq->poll_group;
     167             : 
     168           0 :         assert(pg->task_cnt > 0);
     169           0 :         pg->task_cnt--;
     170           0 : }
     171             : 
     172             : static void
     173           0 : blk_task_finish(struct spdk_vhost_user_blk_task *task)
     174             : {
     175           0 :         blk_task_dec_task_cnt(task);
     176           0 :         task->used = false;
     177           0 : }
     178             : 
     179             : static void
     180           0 : blk_task_init(struct spdk_vhost_user_blk_task *task)
     181             : {
     182           0 :         struct spdk_vhost_blk_task *blk_task = &task->blk_task;
     183             : 
     184           0 :         task->used = true;
     185           0 :         blk_task->iovcnt = SPDK_COUNTOF(blk_task->iovs);
     186           0 :         blk_task->status = NULL;
     187           0 :         blk_task->used_len = 0;
     188           0 :         blk_task->payload_size = 0;
     189           0 : }
     190             : 
     191             : static void
     192           0 : blk_task_enqueue(struct spdk_vhost_user_blk_task *task)
     193             : {
     194           0 :         if (task->vq->packed.packed_ring) {
     195           0 :                 vhost_vq_packed_ring_enqueue(&task->bvsession->vsession, task->vq,
     196           0 :                                              task->num_descs,
     197           0 :                                              task->buffer_id, task->blk_task.used_len,
     198           0 :                                              task->inflight_head);
     199             :         } else {
     200           0 :                 vhost_vq_used_ring_enqueue(&task->bvsession->vsession, task->vq,
     201           0 :                                            task->req_idx, task->blk_task.used_len);
     202             :         }
     203           0 : }
     204             : 
     205             : static void
     206           0 : vhost_user_blk_request_finish(uint8_t status, struct spdk_vhost_blk_task *task, void *cb_arg)
     207             : {
     208             :         struct spdk_vhost_user_blk_task *user_task;
     209             : 
     210           0 :         user_task = SPDK_CONTAINEROF(task, struct spdk_vhost_user_blk_task, blk_task);
     211             : 
     212           0 :         blk_task_enqueue(user_task);
     213             : 
     214           0 :         SPDK_DEBUGLOG(vhost_blk, "Finished task (%p) req_idx=%d\n status: %" PRIu8"\n",
     215             :                       user_task, user_task->req_idx, status);
     216           0 :         blk_task_finish(user_task);
     217           0 : }
     218             : 
     219             : static void
     220           0 : blk_request_finish(uint8_t status, struct spdk_vhost_blk_task *task)
     221             : {
     222             : 
     223           0 :         if (task->status) {
     224           0 :                 *task->status = status;
     225             :         }
     226             : 
     227           0 :         task->cb(status, task, task->cb_arg);
     228           0 : }
     229             : 
     230             : /*
     231             :  * Process task's descriptor chain and setup data related fields.
     232             :  * Return
     233             :  *   total size of supplied buffers
     234             :  *
     235             :  *   FIXME: Make this function return to rd_cnt and wr_cnt
     236             :  */
     237             : static int
     238           0 : blk_iovs_split_queue_setup(struct spdk_vhost_blk_session *bvsession,
     239             :                            struct spdk_vhost_virtqueue *vq,
     240             :                            uint16_t req_idx, struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length)
     241             : {
     242           0 :         struct spdk_vhost_session *vsession = &bvsession->vsession;
     243           0 :         struct spdk_vhost_dev *vdev = vsession->vdev;
     244           0 :         struct vring_desc *desc, *desc_table;
     245           0 :         uint16_t out_cnt = 0, cnt = 0;
     246           0 :         uint32_t desc_table_size, len = 0;
     247             :         uint32_t desc_handled_cnt;
     248             :         int rc;
     249             : 
     250           0 :         rc = vhost_vq_get_desc(vsession, vq, req_idx, &desc, &desc_table, &desc_table_size);
     251           0 :         if (rc != 0) {
     252           0 :                 SPDK_ERRLOG("%s: invalid descriptor at index %"PRIu16".\n", vdev->name, req_idx);
     253           0 :                 return -1;
     254             :         }
     255             : 
     256           0 :         desc_handled_cnt = 0;
     257             :         while (1) {
     258             :                 /*
     259             :                  * Maximum cnt reached?
     260             :                  * Should not happen if request is well formatted, otherwise this is a BUG.
     261             :                  */
     262           0 :                 if (spdk_unlikely(cnt == *iovs_cnt)) {
     263           0 :                         SPDK_DEBUGLOG(vhost_blk, "%s: max IOVs in request reached (req_idx = %"PRIu16").\n",
     264             :                                       vsession->name, req_idx);
     265           0 :                         return -1;
     266             :                 }
     267             : 
     268           0 :                 if (spdk_unlikely(vhost_vring_desc_to_iov(vsession, iovs, &cnt, desc))) {
     269           0 :                         SPDK_DEBUGLOG(vhost_blk, "%s: invalid descriptor %" PRIu16" (req_idx = %"PRIu16").\n",
     270             :                                       vsession->name, req_idx, cnt);
     271           0 :                         return -1;
     272             :                 }
     273             : 
     274           0 :                 len += desc->len;
     275             : 
     276           0 :                 out_cnt += vhost_vring_desc_is_wr(desc);
     277             : 
     278           0 :                 rc = vhost_vring_desc_get_next(&desc, desc_table, desc_table_size);
     279           0 :                 if (rc != 0) {
     280           0 :                         SPDK_ERRLOG("%s: descriptor chain at index %"PRIu16" terminated unexpectedly.\n",
     281             :                                     vsession->name, req_idx);
     282           0 :                         return -1;
     283           0 :                 } else if (desc == NULL) {
     284           0 :                         break;
     285             :                 }
     286             : 
     287           0 :                 desc_handled_cnt++;
     288           0 :                 if (spdk_unlikely(desc_handled_cnt > desc_table_size)) {
     289             :                         /* Break a cycle and report an error, if any. */
     290           0 :                         SPDK_ERRLOG("%s: found a cycle in the descriptor chain: desc_table_size = %d, desc_handled_cnt = %d.\n",
     291             :                                     vsession->name, desc_table_size, desc_handled_cnt);
     292           0 :                         return -1;
     293             :                 }
     294             :         }
     295             : 
     296             :         /*
     297             :          * There must be least two descriptors.
     298             :          * First contain request so it must be readable.
     299             :          * Last descriptor contain buffer for response so it must be writable.
     300             :          */
     301           0 :         if (spdk_unlikely(out_cnt == 0 || cnt < 2)) {
     302           0 :                 return -1;
     303             :         }
     304             : 
     305           0 :         *length = len;
     306           0 :         *iovs_cnt = cnt;
     307           0 :         return 0;
     308             : }
     309             : 
     310             : static int
     311           0 : blk_iovs_packed_desc_setup(struct spdk_vhost_session *vsession,
     312             :                            struct spdk_vhost_virtqueue *vq, uint16_t req_idx,
     313             :                            struct vring_packed_desc *desc_table, uint16_t desc_table_size,
     314             :                            struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length)
     315             : {
     316           0 :         struct vring_packed_desc *desc;
     317           0 :         uint16_t cnt = 0, out_cnt = 0;
     318           0 :         uint32_t len = 0;
     319             : 
     320           0 :         if (desc_table == NULL) {
     321           0 :                 desc = &vq->vring.desc_packed[req_idx];
     322             :         } else {
     323           0 :                 req_idx = 0;
     324           0 :                 desc = desc_table;
     325             :         }
     326             : 
     327             :         while (1) {
     328             :                 /*
     329             :                  * Maximum cnt reached?
     330             :                  * Should not happen if request is well formatted, otherwise this is a BUG.
     331             :                  */
     332           0 :                 if (spdk_unlikely(cnt == *iovs_cnt)) {
     333           0 :                         SPDK_ERRLOG("%s: max IOVs in request reached (req_idx = %"PRIu16").\n",
     334             :                                     vsession->name, req_idx);
     335           0 :                         return -EINVAL;
     336             :                 }
     337             : 
     338           0 :                 if (spdk_unlikely(vhost_vring_packed_desc_to_iov(vsession, iovs, &cnt, desc))) {
     339           0 :                         SPDK_ERRLOG("%s: invalid descriptor %" PRIu16" (req_idx = %"PRIu16").\n",
     340             :                                     vsession->name, req_idx, cnt);
     341           0 :                         return -EINVAL;
     342             :                 }
     343             : 
     344           0 :                 len += desc->len;
     345           0 :                 out_cnt += vhost_vring_packed_desc_is_wr(desc);
     346             : 
     347             :                 /* desc is NULL means we reach the last desc of this request */
     348           0 :                 vhost_vring_packed_desc_get_next(&desc, &req_idx, vq, desc_table, desc_table_size);
     349           0 :                 if (desc == NULL) {
     350           0 :                         break;
     351             :                 }
     352             :         }
     353             : 
     354             :         /*
     355             :          * There must be least two descriptors.
     356             :          * First contain request so it must be readable.
     357             :          * Last descriptor contain buffer for response so it must be writable.
     358             :          */
     359           0 :         if (spdk_unlikely(out_cnt == 0 || cnt < 2)) {
     360           0 :                 return -EINVAL;
     361             :         }
     362             : 
     363           0 :         *length = len;
     364           0 :         *iovs_cnt = cnt;
     365             : 
     366           0 :         return 0;
     367             : }
     368             : 
     369             : static int
     370           0 : blk_iovs_packed_queue_setup(struct spdk_vhost_blk_session *bvsession,
     371             :                             struct spdk_vhost_virtqueue *vq, uint16_t req_idx,
     372             :                             struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length)
     373             : {
     374           0 :         struct spdk_vhost_session *vsession = &bvsession->vsession;
     375           0 :         struct spdk_vhost_dev *vdev = vsession->vdev;
     376           0 :         struct vring_packed_desc *desc = NULL, *desc_table;
     377           0 :         uint32_t desc_table_size;
     378             :         int rc;
     379             : 
     380           0 :         rc = vhost_vq_get_desc_packed(vsession, vq, req_idx, &desc,
     381             :                                       &desc_table, &desc_table_size);
     382           0 :         if (spdk_unlikely(rc != 0)) {
     383           0 :                 SPDK_ERRLOG("%s: Invalid descriptor at index %"PRIu16".\n", vdev->name, req_idx);
     384           0 :                 return rc;
     385             :         }
     386             : 
     387           0 :         return blk_iovs_packed_desc_setup(vsession, vq, req_idx, desc_table, desc_table_size,
     388             :                                           iovs, iovs_cnt, length);
     389             : }
     390             : 
     391             : static int
     392           0 : blk_iovs_inflight_queue_setup(struct spdk_vhost_blk_session *bvsession,
     393             :                               struct spdk_vhost_virtqueue *vq, uint16_t req_idx,
     394             :                               struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length)
     395             : {
     396           0 :         struct spdk_vhost_session *vsession = &bvsession->vsession;
     397           0 :         struct spdk_vhost_dev *vdev = vsession->vdev;
     398           0 :         spdk_vhost_inflight_desc *inflight_desc;
     399           0 :         struct vring_packed_desc *desc_table;
     400           0 :         uint16_t out_cnt = 0, cnt = 0;
     401           0 :         uint32_t desc_table_size, len = 0;
     402           0 :         int rc = 0;
     403             : 
     404           0 :         rc = vhost_inflight_queue_get_desc(vsession, vq->vring_inflight.inflight_packed->desc,
     405             :                                            req_idx, &inflight_desc, &desc_table, &desc_table_size);
     406           0 :         if (spdk_unlikely(rc != 0)) {
     407           0 :                 SPDK_ERRLOG("%s: Invalid descriptor at index %"PRIu16".\n", vdev->name, req_idx);
     408           0 :                 return rc;
     409             :         }
     410             : 
     411           0 :         if (desc_table != NULL) {
     412           0 :                 return blk_iovs_packed_desc_setup(vsession, vq, req_idx, desc_table, desc_table_size,
     413             :                                                   iovs, iovs_cnt, length);
     414             :         }
     415             : 
     416             :         while (1) {
     417             :                 /*
     418             :                  * Maximum cnt reached?
     419             :                  * Should not happen if request is well formatted, otherwise this is a BUG.
     420             :                  */
     421           0 :                 if (spdk_unlikely(cnt == *iovs_cnt)) {
     422           0 :                         SPDK_ERRLOG("%s: max IOVs in request reached (req_idx = %"PRIu16").\n",
     423             :                                     vsession->name, req_idx);
     424           0 :                         return -EINVAL;
     425             :                 }
     426             : 
     427           0 :                 if (spdk_unlikely(vhost_vring_inflight_desc_to_iov(vsession, iovs, &cnt, inflight_desc))) {
     428           0 :                         SPDK_ERRLOG("%s: invalid descriptor %" PRIu16" (req_idx = %"PRIu16").\n",
     429             :                                     vsession->name, req_idx, cnt);
     430           0 :                         return -EINVAL;
     431             :                 }
     432             : 
     433           0 :                 len += inflight_desc->len;
     434           0 :                 out_cnt += vhost_vring_inflight_desc_is_wr(inflight_desc);
     435             : 
     436             :                 /* Without F_NEXT means it's the last desc */
     437           0 :                 if ((inflight_desc->flags & VRING_DESC_F_NEXT) == 0) {
     438           0 :                         break;
     439             :                 }
     440             : 
     441           0 :                 inflight_desc = &vq->vring_inflight.inflight_packed->desc[inflight_desc->next];
     442             :         }
     443             : 
     444             :         /*
     445             :          * There must be least two descriptors.
     446             :          * First contain request so it must be readable.
     447             :          * Last descriptor contain buffer for response so it must be writable.
     448             :          */
     449           0 :         if (spdk_unlikely(out_cnt == 0 || cnt < 2)) {
     450           0 :                 return -EINVAL;
     451             :         }
     452             : 
     453           0 :         *length = len;
     454           0 :         *iovs_cnt = cnt;
     455             : 
     456           0 :         return 0;
     457             : }
     458             : 
     459             : static void
     460           0 : blk_request_complete_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
     461             : {
     462           0 :         struct spdk_vhost_blk_task *task = cb_arg;
     463             : 
     464           0 :         spdk_bdev_free_io(bdev_io);
     465           0 :         blk_request_finish(success ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR, task);
     466           0 : }
     467             : 
     468             : static void
     469           0 : blk_request_resubmit(void *arg)
     470             : {
     471           0 :         struct spdk_vhost_blk_task *task = arg;
     472           0 :         int rc = 0;
     473             : 
     474           0 :         rc = virtio_blk_process_request(task->bdev_io_wait_vdev, task->bdev_io_wait_ch, task,
     475             :                                         task->cb, task->cb_arg);
     476           0 :         if (rc == 0) {
     477           0 :                 SPDK_DEBUGLOG(vhost_blk, "====== Task %p resubmitted ======\n", task);
     478             :         } else {
     479           0 :                 SPDK_DEBUGLOG(vhost_blk, "====== Task %p failed ======\n", task);
     480             :         }
     481           0 : }
     482             : 
     483             : static inline void
     484           0 : blk_request_queue_io(struct spdk_vhost_dev *vdev, struct spdk_io_channel *ch,
     485             :                      struct spdk_vhost_blk_task *task)
     486             : {
     487             :         int rc;
     488           0 :         struct spdk_bdev *bdev = vhost_blk_get_bdev(vdev);
     489             : 
     490           0 :         task->bdev_io_wait.bdev = bdev;
     491           0 :         task->bdev_io_wait.cb_fn = blk_request_resubmit;
     492           0 :         task->bdev_io_wait.cb_arg = task;
     493           0 :         task->bdev_io_wait_ch = ch;
     494           0 :         task->bdev_io_wait_vdev = vdev;
     495             : 
     496           0 :         rc = spdk_bdev_queue_io_wait(bdev, ch, &task->bdev_io_wait);
     497           0 :         if (rc != 0) {
     498           0 :                 blk_request_finish(VIRTIO_BLK_S_IOERR, task);
     499             :         }
     500           0 : }
     501             : 
     502             : int
     503           0 : virtio_blk_process_request(struct spdk_vhost_dev *vdev, struct spdk_io_channel *ch,
     504             :                            struct spdk_vhost_blk_task *task, virtio_blk_request_cb cb, void *cb_arg)
     505             : {
     506           0 :         struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
     507           0 :         struct virtio_blk_outhdr req;
     508             :         struct virtio_blk_discard_write_zeroes *desc;
     509             :         struct iovec *iov;
     510             :         uint32_t type;
     511             :         uint64_t flush_bytes;
     512             :         uint32_t payload_len;
     513             :         uint16_t iovcnt;
     514             :         int rc;
     515             : 
     516           0 :         assert(bvdev != NULL);
     517             : 
     518           0 :         task->cb = cb;
     519           0 :         task->cb_arg = cb_arg;
     520             : 
     521           0 :         iov = &task->iovs[0];
     522           0 :         if (spdk_unlikely(iov->iov_len != sizeof(req))) {
     523           0 :                 SPDK_DEBUGLOG(vhost_blk,
     524             :                               "First descriptor size is %zu but expected %zu (task = %p).\n",
     525             :                               iov->iov_len, sizeof(req), task);
     526           0 :                 blk_request_finish(VIRTIO_BLK_S_UNSUPP, task);
     527           0 :                 return -1;
     528             :         }
     529             : 
     530             :         /* Some SeaBIOS versions don't align the virtio_blk_outhdr on an 8-byte boundary, which
     531             :          * triggers ubsan errors.  So copy this small 16-byte structure to the stack to workaround
     532             :          * this problem.
     533             :          */
     534           0 :         memcpy(&req, iov->iov_base, sizeof(req));
     535             : 
     536           0 :         iov = &task->iovs[task->iovcnt - 1];
     537           0 :         if (spdk_unlikely(iov->iov_len != 1)) {
     538           0 :                 SPDK_DEBUGLOG(vhost_blk,
     539             :                               "Last descriptor size is %zu but expected %d (task = %p).\n",
     540             :                               iov->iov_len, 1, task);
     541           0 :                 blk_request_finish(VIRTIO_BLK_S_UNSUPP, task);
     542           0 :                 return -1;
     543             :         }
     544             : 
     545           0 :         payload_len = task->payload_size;
     546           0 :         task->status = iov->iov_base;
     547           0 :         payload_len -= sizeof(req) + sizeof(*task->status);
     548           0 :         iovcnt = task->iovcnt - 2;
     549             : 
     550           0 :         type = req.type;
     551             : #ifdef VIRTIO_BLK_T_BARRIER
     552             :         /* Don't care about barrier for now (as QEMU's virtio-blk do). */
     553           0 :         type &= ~VIRTIO_BLK_T_BARRIER;
     554             : #endif
     555             : 
     556           0 :         switch (type) {
     557           0 :         case VIRTIO_BLK_T_IN:
     558             :         case VIRTIO_BLK_T_OUT:
     559           0 :                 if (spdk_unlikely(payload_len == 0 || (payload_len & (512 - 1)) != 0)) {
     560           0 :                         SPDK_ERRLOG("%s - passed IO buffer is not multiple of 512b (task = %p).\n",
     561             :                                     type ? "WRITE" : "READ", task);
     562           0 :                         blk_request_finish(VIRTIO_BLK_S_UNSUPP, task);
     563           0 :                         return -1;
     564             :                 }
     565             : 
     566           0 :                 if (type == VIRTIO_BLK_T_IN) {
     567           0 :                         task->used_len = payload_len + sizeof(*task->status);
     568           0 :                         rc = spdk_bdev_readv(bvdev->bdev_desc, ch,
     569           0 :                                              &task->iovs[1], iovcnt, req.sector * 512,
     570             :                                              payload_len, blk_request_complete_cb, task);
     571           0 :                 } else if (!bvdev->readonly) {
     572           0 :                         task->used_len = sizeof(*task->status);
     573           0 :                         rc = spdk_bdev_writev(bvdev->bdev_desc, ch,
     574           0 :                                               &task->iovs[1], iovcnt, req.sector * 512,
     575             :                                               payload_len, blk_request_complete_cb, task);
     576             :                 } else {
     577           0 :                         SPDK_DEBUGLOG(vhost_blk, "Device is in read-only mode!\n");
     578           0 :                         rc = -1;
     579             :                 }
     580             : 
     581           0 :                 if (rc) {
     582           0 :                         if (rc == -ENOMEM) {
     583           0 :                                 SPDK_DEBUGLOG(vhost_blk, "No memory, start to queue io.\n");
     584           0 :                                 blk_request_queue_io(vdev, ch, task);
     585             :                         } else {
     586           0 :                                 blk_request_finish(VIRTIO_BLK_S_IOERR, task);
     587           0 :                                 return -1;
     588             :                         }
     589             :                 }
     590           0 :                 break;
     591           0 :         case VIRTIO_BLK_T_DISCARD:
     592           0 :                 desc = task->iovs[1].iov_base;
     593           0 :                 if (payload_len != sizeof(*desc)) {
     594           0 :                         SPDK_NOTICELOG("Invalid discard payload size: %u\n", payload_len);
     595           0 :                         blk_request_finish(VIRTIO_BLK_S_IOERR, task);
     596           0 :                         return -1;
     597             :                 }
     598             : 
     599           0 :                 if (desc->flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
     600           0 :                         SPDK_ERRLOG("UNMAP flag is only used for WRITE ZEROES command\n");
     601           0 :                         blk_request_finish(VIRTIO_BLK_S_UNSUPP, task);
     602           0 :                         return -1;
     603             :                 }
     604             : 
     605           0 :                 rc = spdk_bdev_unmap(bvdev->bdev_desc, ch,
     606           0 :                                      desc->sector * 512, desc->num_sectors * 512,
     607             :                                      blk_request_complete_cb, task);
     608           0 :                 if (rc) {
     609           0 :                         if (rc == -ENOMEM) {
     610           0 :                                 SPDK_DEBUGLOG(vhost_blk, "No memory, start to queue io.\n");
     611           0 :                                 blk_request_queue_io(vdev, ch, task);
     612             :                         } else {
     613           0 :                                 blk_request_finish(VIRTIO_BLK_S_IOERR, task);
     614           0 :                                 return -1;
     615             :                         }
     616             :                 }
     617           0 :                 break;
     618           0 :         case VIRTIO_BLK_T_WRITE_ZEROES:
     619           0 :                 desc = task->iovs[1].iov_base;
     620           0 :                 if (payload_len != sizeof(*desc)) {
     621           0 :                         SPDK_NOTICELOG("Invalid write zeroes payload size: %u\n", payload_len);
     622           0 :                         blk_request_finish(VIRTIO_BLK_S_IOERR, task);
     623           0 :                         return -1;
     624             :                 }
     625             : 
     626             :                 /* Unmap this range, SPDK doesn't support it, kernel will enable this flag by default
     627             :                  * without checking unmap feature is negotiated or not, the flag isn't mandatory, so
     628             :                  * just print a warning.
     629             :                  */
     630           0 :                 if (desc->flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
     631           0 :                         SPDK_WARNLOG("Ignore the unmap flag for WRITE ZEROES from %"PRIx64", len %"PRIx64"\n",
     632             :                                      (uint64_t)desc->sector * 512, (uint64_t)desc->num_sectors * 512);
     633             :                 }
     634             : 
     635           0 :                 rc = spdk_bdev_write_zeroes(bvdev->bdev_desc, ch,
     636           0 :                                             desc->sector * 512, desc->num_sectors * 512,
     637             :                                             blk_request_complete_cb, task);
     638           0 :                 if (rc) {
     639           0 :                         if (rc == -ENOMEM) {
     640           0 :                                 SPDK_DEBUGLOG(vhost_blk, "No memory, start to queue io.\n");
     641           0 :                                 blk_request_queue_io(vdev, ch, task);
     642             :                         } else {
     643           0 :                                 blk_request_finish(VIRTIO_BLK_S_IOERR, task);
     644           0 :                                 return -1;
     645             :                         }
     646             :                 }
     647           0 :                 break;
     648           0 :         case VIRTIO_BLK_T_FLUSH:
     649           0 :                 flush_bytes = spdk_bdev_get_num_blocks(bvdev->bdev) * spdk_bdev_get_block_size(bvdev->bdev);
     650           0 :                 if (req.sector != 0) {
     651           0 :                         SPDK_NOTICELOG("sector must be zero for flush command\n");
     652           0 :                         blk_request_finish(VIRTIO_BLK_S_IOERR, task);
     653           0 :                         return -1;
     654             :                 }
     655           0 :                 rc = spdk_bdev_flush(bvdev->bdev_desc, ch,
     656             :                                      0, flush_bytes,
     657             :                                      blk_request_complete_cb, task);
     658           0 :                 if (rc) {
     659           0 :                         if (rc == -ENOMEM) {
     660           0 :                                 SPDK_DEBUGLOG(vhost_blk, "No memory, start to queue io.\n");
     661           0 :                                 blk_request_queue_io(vdev, ch, task);
     662             :                         } else {
     663           0 :                                 blk_request_finish(VIRTIO_BLK_S_IOERR, task);
     664           0 :                                 return -1;
     665             :                         }
     666             :                 }
     667           0 :                 break;
     668           0 :         case VIRTIO_BLK_T_GET_ID:
     669           0 :                 if (!iovcnt || !payload_len) {
     670           0 :                         blk_request_finish(VIRTIO_BLK_S_UNSUPP, task);
     671           0 :                         return -1;
     672             :                 }
     673           0 :                 task->used_len = spdk_min((size_t)VIRTIO_BLK_ID_BYTES, task->iovs[1].iov_len);
     674           0 :                 spdk_strcpy_pad(task->iovs[1].iov_base, spdk_bdev_get_name(bvdev->bdev),
     675           0 :                                 task->used_len, ' ');
     676           0 :                 blk_request_finish(VIRTIO_BLK_S_OK, task);
     677           0 :                 break;
     678           0 :         default:
     679           0 :                 SPDK_DEBUGLOG(vhost_blk, "Not supported request type '%"PRIu32"'.\n", type);
     680           0 :                 blk_request_finish(VIRTIO_BLK_S_UNSUPP, task);
     681           0 :                 return -1;
     682             :         }
     683             : 
     684           0 :         return 0;
     685             : }
     686             : 
     687             : static void
     688           0 : process_blk_task(struct spdk_vhost_virtqueue *vq, uint16_t req_idx)
     689             : {
     690             :         struct spdk_vhost_user_blk_task *task;
     691             :         struct spdk_vhost_blk_task *blk_task;
     692             :         int rc;
     693             : 
     694           0 :         assert(vq->packed.packed_ring == false);
     695             : 
     696           0 :         task = &((struct spdk_vhost_user_blk_task *)vq->tasks)[req_idx];
     697           0 :         blk_task = &task->blk_task;
     698           0 :         if (spdk_unlikely(task->used)) {
     699           0 :                 SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n",
     700             :                             task->bvsession->vsession.name, req_idx);
     701           0 :                 blk_task->used_len = 0;
     702           0 :                 blk_task_enqueue(task);
     703           0 :                 return;
     704             :         }
     705             : 
     706           0 :         blk_task_inc_task_cnt(task);
     707             : 
     708           0 :         blk_task_init(task);
     709             : 
     710           0 :         rc = blk_iovs_split_queue_setup(task->bvsession, vq, task->req_idx,
     711           0 :                                         blk_task->iovs, &blk_task->iovcnt, &blk_task->payload_size);
     712             : 
     713           0 :         if (rc) {
     714           0 :                 SPDK_DEBUGLOG(vhost_blk, "Invalid request (req_idx = %"PRIu16").\n", task->req_idx);
     715             :                 /* Only READ and WRITE are supported for now. */
     716           0 :                 vhost_user_blk_request_finish(VIRTIO_BLK_S_UNSUPP, blk_task, NULL);
     717           0 :                 return;
     718             :         }
     719             : 
     720           0 :         if (vhost_user_process_blk_request(task) == 0) {
     721           0 :                 SPDK_DEBUGLOG(vhost_blk, "====== Task %p req_idx %d submitted ======\n", task,
     722             :                               req_idx);
     723             :         } else {
     724           0 :                 SPDK_ERRLOG("====== Task %p req_idx %d failed ======\n", task, req_idx);
     725             :         }
     726             : }
     727             : 
     728             : static void
     729           0 : process_packed_blk_task(struct spdk_vhost_virtqueue *vq, uint16_t req_idx)
     730             : {
     731             :         struct spdk_vhost_user_blk_task *task;
     732             :         struct spdk_vhost_blk_task *blk_task;
     733           0 :         uint16_t task_idx = req_idx, num_descs;
     734             :         int rc;
     735             : 
     736           0 :         assert(vq->packed.packed_ring);
     737             : 
     738             :         /* Packed ring used the buffer_id as the task_idx to get task struct.
     739             :          * In kernel driver, it uses the vq->free_head to set the buffer_id so the value
     740             :          * must be in the range of 0 ~ vring.size. The free_head value must be unique
     741             :          * in the outstanding requests.
     742             :          * We can't use the req_idx as the task_idx because the desc can be reused in
     743             :          * the next phase even when it's not completed in the previous phase. For example,
     744             :          * At phase 0, last_used_idx was 2 and desc0 was not completed.Then after moving
     745             :          * phase 1, last_avail_idx is updated to 1. In this case, req_idx can not be used
     746             :          * as task_idx because we will know task[0]->used is true at phase 1.
     747             :          * The split queue is quite different, the desc would insert into the free list when
     748             :          * device completes the request, the driver gets the desc from the free list which
     749             :          * ensures the req_idx is unique in the outstanding requests.
     750             :          */
     751           0 :         task_idx = vhost_vring_packed_desc_get_buffer_id(vq, req_idx, &num_descs);
     752             : 
     753           0 :         task = &((struct spdk_vhost_user_blk_task *)vq->tasks)[task_idx];
     754           0 :         blk_task = &task->blk_task;
     755           0 :         if (spdk_unlikely(task->used)) {
     756           0 :                 SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n",
     757             :                             task->bvsession->vsession.name, task_idx);
     758           0 :                 blk_task->used_len = 0;
     759           0 :                 blk_task_enqueue(task);
     760           0 :                 return;
     761             :         }
     762             : 
     763           0 :         task->req_idx = req_idx;
     764           0 :         task->num_descs = num_descs;
     765           0 :         task->buffer_id = task_idx;
     766             : 
     767           0 :         rte_vhost_set_inflight_desc_packed(task->bvsession->vsession.vid, vq->vring_idx,
     768           0 :                                            req_idx, (req_idx + num_descs - 1) % vq->vring.size,
     769             :                                            &task->inflight_head);
     770             : 
     771           0 :         blk_task_inc_task_cnt(task);
     772             : 
     773           0 :         blk_task_init(task);
     774             : 
     775           0 :         rc = blk_iovs_packed_queue_setup(task->bvsession, vq, task->req_idx, blk_task->iovs,
     776             :                                          &blk_task->iovcnt,
     777             :                                          &blk_task->payload_size);
     778           0 :         if (rc) {
     779           0 :                 SPDK_DEBUGLOG(vhost_blk, "Invalid request (req_idx = %"PRIu16").\n", task->req_idx);
     780             :                 /* Only READ and WRITE are supported for now. */
     781           0 :                 vhost_user_blk_request_finish(VIRTIO_BLK_S_UNSUPP, blk_task, NULL);
     782           0 :                 return;
     783             :         }
     784             : 
     785           0 :         if (vhost_user_process_blk_request(task) == 0) {
     786           0 :                 SPDK_DEBUGLOG(vhost_blk, "====== Task %p req_idx %d submitted ======\n", task,
     787             :                               task_idx);
     788             :         } else {
     789           0 :                 SPDK_ERRLOG("====== Task %p req_idx %d failed ======\n", task, task_idx);
     790             :         }
     791             : }
     792             : 
     793             : static void
     794           0 : process_packed_inflight_blk_task(struct spdk_vhost_virtqueue *vq,
     795             :                                  uint16_t req_idx)
     796             : {
     797           0 :         spdk_vhost_inflight_desc *desc_array = vq->vring_inflight.inflight_packed->desc;
     798           0 :         spdk_vhost_inflight_desc *desc = &desc_array[req_idx];
     799             :         struct spdk_vhost_user_blk_task *task;
     800             :         struct spdk_vhost_blk_task *blk_task;
     801             :         uint16_t task_idx, num_descs;
     802             :         int rc;
     803             : 
     804           0 :         task_idx = desc_array[desc->last].id;
     805           0 :         num_descs = desc->num;
     806             :         /* In packed ring reconnection, we use the last_used_idx as the
     807             :          * initial value. So when we process the inflight descs we still
     808             :          * need to update the available ring index.
     809             :          */
     810           0 :         vq->last_avail_idx += num_descs;
     811           0 :         if (vq->last_avail_idx >= vq->vring.size) {
     812           0 :                 vq->last_avail_idx -= vq->vring.size;
     813           0 :                 vq->packed.avail_phase = !vq->packed.avail_phase;
     814             :         }
     815             : 
     816           0 :         task = &((struct spdk_vhost_user_blk_task *)vq->tasks)[task_idx];
     817           0 :         blk_task = &task->blk_task;
     818           0 :         if (spdk_unlikely(task->used)) {
     819           0 :                 SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n",
     820             :                             task->bvsession->vsession.name, task_idx);
     821           0 :                 blk_task->used_len = 0;
     822           0 :                 blk_task_enqueue(task);
     823           0 :                 return;
     824             :         }
     825             : 
     826           0 :         task->req_idx = req_idx;
     827           0 :         task->num_descs = num_descs;
     828           0 :         task->buffer_id = task_idx;
     829             :         /* It's for cleaning inflight entries */
     830           0 :         task->inflight_head = req_idx;
     831             : 
     832           0 :         blk_task_inc_task_cnt(task);
     833             : 
     834           0 :         blk_task_init(task);
     835             : 
     836           0 :         rc = blk_iovs_inflight_queue_setup(task->bvsession, vq, task->req_idx, blk_task->iovs,
     837             :                                            &blk_task->iovcnt,
     838             :                                            &blk_task->payload_size);
     839           0 :         if (rc) {
     840           0 :                 SPDK_DEBUGLOG(vhost_blk, "Invalid request (req_idx = %"PRIu16").\n", task->req_idx);
     841             :                 /* Only READ and WRITE are supported for now. */
     842           0 :                 vhost_user_blk_request_finish(VIRTIO_BLK_S_UNSUPP, blk_task, NULL);
     843           0 :                 return;
     844             :         }
     845             : 
     846           0 :         if (vhost_user_process_blk_request(task) == 0) {
     847           0 :                 SPDK_DEBUGLOG(vhost_blk, "====== Task %p req_idx %d submitted ======\n", task,
     848             :                               task_idx);
     849             :         } else {
     850           0 :                 SPDK_ERRLOG("====== Task %p req_idx %d failed ======\n", task, task_idx);
     851             :         }
     852             : }
     853             : 
     854             : static int
     855           0 : submit_inflight_desc(struct spdk_vhost_blk_session *bvsession,
     856             :                      struct spdk_vhost_virtqueue *vq)
     857             : {
     858             :         struct spdk_vhost_session *vsession;
     859             :         spdk_vhost_resubmit_info *resubmit;
     860             :         spdk_vhost_resubmit_desc *resubmit_list;
     861             :         uint16_t req_idx;
     862             :         int i, resubmit_cnt;
     863             : 
     864           0 :         resubmit = vq->vring_inflight.resubmit_inflight;
     865           0 :         if (spdk_likely(resubmit == NULL || resubmit->resubmit_list == NULL ||
     866             :                         resubmit->resubmit_num == 0)) {
     867           0 :                 return 0;
     868             :         }
     869             : 
     870           0 :         resubmit_list = resubmit->resubmit_list;
     871           0 :         vsession = &bvsession->vsession;
     872             : 
     873           0 :         for (i = resubmit->resubmit_num - 1; i >= 0; --i) {
     874           0 :                 req_idx = resubmit_list[i].index;
     875           0 :                 SPDK_DEBUGLOG(vhost_blk, "====== Start processing resubmit request idx %"PRIu16"======\n",
     876             :                               req_idx);
     877             : 
     878           0 :                 if (spdk_unlikely(req_idx >= vq->vring.size)) {
     879           0 :                         SPDK_ERRLOG("%s: request idx '%"PRIu16"' exceeds virtqueue size (%"PRIu16").\n",
     880             :                                     vsession->name, req_idx, vq->vring.size);
     881           0 :                         vhost_vq_used_ring_enqueue(vsession, vq, req_idx, 0);
     882           0 :                         continue;
     883             :                 }
     884             : 
     885           0 :                 if (vq->packed.packed_ring) {
     886           0 :                         process_packed_inflight_blk_task(vq, req_idx);
     887             :                 } else {
     888           0 :                         process_blk_task(vq, req_idx);
     889             :                 }
     890             :         }
     891           0 :         resubmit_cnt = resubmit->resubmit_num;
     892           0 :         resubmit->resubmit_num = 0;
     893           0 :         return resubmit_cnt;
     894             : }
     895             : 
     896             : static int
     897           0 : process_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq)
     898             : {
     899           0 :         struct spdk_vhost_session *vsession = &bvsession->vsession;
     900           0 :         uint16_t reqs[SPDK_VHOST_VQ_MAX_SUBMISSIONS];
     901             :         uint16_t reqs_cnt, i;
     902           0 :         int resubmit_cnt = 0;
     903             : 
     904           0 :         resubmit_cnt = submit_inflight_desc(bvsession, vq);
     905             : 
     906           0 :         reqs_cnt = vhost_vq_avail_ring_get(vq, reqs, SPDK_COUNTOF(reqs));
     907           0 :         if (!reqs_cnt) {
     908           0 :                 return resubmit_cnt;
     909             :         }
     910             : 
     911           0 :         for (i = 0; i < reqs_cnt; i++) {
     912           0 :                 SPDK_DEBUGLOG(vhost_blk, "====== Starting processing request idx %"PRIu16"======\n",
     913             :                               reqs[i]);
     914             : 
     915           0 :                 if (spdk_unlikely(reqs[i] >= vq->vring.size)) {
     916           0 :                         SPDK_ERRLOG("%s: request idx '%"PRIu16"' exceeds virtqueue size (%"PRIu16").\n",
     917             :                                     vsession->name, reqs[i], vq->vring.size);
     918           0 :                         vhost_vq_used_ring_enqueue(vsession, vq, reqs[i], 0);
     919           0 :                         continue;
     920             :                 }
     921             : 
     922           0 :                 rte_vhost_set_inflight_desc_split(vsession->vid, vq->vring_idx, reqs[i]);
     923             : 
     924           0 :                 process_blk_task(vq, reqs[i]);
     925             :         }
     926             : 
     927           0 :         return reqs_cnt;
     928             : }
     929             : 
     930             : static int
     931           0 : process_packed_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq)
     932             : {
     933           0 :         uint16_t i = 0;
     934           0 :         uint16_t count = 0;
     935           0 :         int resubmit_cnt = 0;
     936             : 
     937           0 :         resubmit_cnt = submit_inflight_desc(bvsession, vq);
     938             : 
     939           0 :         while (i++ < SPDK_VHOST_VQ_MAX_SUBMISSIONS &&
     940           0 :                vhost_vq_packed_ring_is_avail(vq)) {
     941           0 :                 SPDK_DEBUGLOG(vhost_blk, "====== Starting processing request idx %"PRIu16"======\n",
     942             :                               vq->last_avail_idx);
     943           0 :                 count++;
     944           0 :                 process_packed_blk_task(vq, vq->last_avail_idx);
     945             :         }
     946             : 
     947           0 :         return count > 0 ? count : resubmit_cnt;
     948             : }
     949             : 
     950             : static int
     951           0 : _vdev_vq_worker(struct spdk_vhost_virtqueue *vq)
     952             : {
     953           0 :         struct spdk_vhost_session *vsession = vq->vsession;
     954           0 :         struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession);
     955             :         bool packed_ring;
     956           0 :         int rc = 0;
     957             : 
     958           0 :         packed_ring = vq->packed.packed_ring;
     959           0 :         if (packed_ring) {
     960           0 :                 rc = process_packed_vq(bvsession, vq);
     961             :         } else {
     962           0 :                 rc = process_vq(bvsession, vq);
     963             :         }
     964             : 
     965           0 :         vhost_session_vq_used_signal(vq);
     966             : 
     967           0 :         return rc;
     968             : 
     969             : }
     970             : 
     971             : static int
     972           0 : vdev_vq_worker(void *arg)
     973             : {
     974           0 :         struct spdk_vhost_virtqueue *vq = arg;
     975             : 
     976           0 :         return _vdev_vq_worker(vq);
     977             : }
     978             : 
     979             : static int
     980           0 : vdev_worker(void *arg)
     981             : {
     982           0 :         struct vhost_user_poll_group *pg = arg;
     983             :         struct vhost_user_pg_vq_info *vq_info;
     984             :         struct spdk_vhost_virtqueue *vq;
     985           0 :         int rc = 0;
     986             : 
     987           0 :         TAILQ_FOREACH(vq_info, &pg->vqs, link) {
     988           0 :                 vq = vq_info->vq;
     989           0 :                 assert(vq->poll_group == pg);
     990           0 :                 rc = _vdev_vq_worker(vq);
     991             :         }
     992             : 
     993           0 :         return rc > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
     994             : }
     995             : 
     996             : static void
     997           0 : no_bdev_process_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq)
     998             : {
     999           0 :         struct spdk_vhost_session *vsession = &bvsession->vsession;
    1000           0 :         struct iovec iovs[SPDK_VHOST_IOVS_MAX];
    1001           0 :         uint32_t length;
    1002           0 :         uint16_t iovcnt, req_idx;
    1003             : 
    1004           0 :         if (vhost_vq_avail_ring_get(vq, &req_idx, 1) != 1) {
    1005           0 :                 return;
    1006             :         }
    1007             : 
    1008           0 :         iovcnt = SPDK_COUNTOF(iovs);
    1009           0 :         if (blk_iovs_split_queue_setup(bvsession, vq, req_idx, iovs, &iovcnt, &length) == 0) {
    1010           0 :                 *(volatile uint8_t *)iovs[iovcnt - 1].iov_base = VIRTIO_BLK_S_IOERR;
    1011           0 :                 SPDK_DEBUGLOG(vhost_blk_data, "Aborting request %" PRIu16"\n", req_idx);
    1012             :         }
    1013             : 
    1014           0 :         vhost_vq_used_ring_enqueue(vsession, vq, req_idx, 0);
    1015             : }
    1016             : 
    1017             : static void
    1018           0 : no_bdev_process_packed_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq)
    1019             : {
    1020           0 :         struct spdk_vhost_session *vsession = &bvsession->vsession;
    1021             :         struct spdk_vhost_user_blk_task *task;
    1022             :         struct spdk_vhost_blk_task *blk_task;
    1023           0 :         uint32_t length;
    1024           0 :         uint16_t req_idx = vq->last_avail_idx;
    1025           0 :         uint16_t task_idx, num_descs;
    1026             : 
    1027           0 :         if (!vhost_vq_packed_ring_is_avail(vq)) {
    1028           0 :                 return;
    1029             :         }
    1030             : 
    1031           0 :         task_idx = vhost_vring_packed_desc_get_buffer_id(vq, req_idx, &num_descs);
    1032           0 :         task = &((struct spdk_vhost_user_blk_task *)vq->tasks)[task_idx];
    1033           0 :         blk_task = &task->blk_task;
    1034           0 :         if (spdk_unlikely(task->used)) {
    1035           0 :                 SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n",
    1036             :                             vsession->name, req_idx);
    1037           0 :                 vhost_vq_packed_ring_enqueue(vsession, vq, num_descs,
    1038           0 :                                              task->buffer_id, blk_task->used_len,
    1039           0 :                                              task->inflight_head);
    1040           0 :                 return;
    1041             :         }
    1042             : 
    1043           0 :         task->req_idx = req_idx;
    1044           0 :         task->num_descs = num_descs;
    1045           0 :         task->buffer_id = task_idx;
    1046           0 :         blk_task_init(task);
    1047             : 
    1048           0 :         if (blk_iovs_packed_queue_setup(bvsession, vq, task->req_idx, blk_task->iovs, &blk_task->iovcnt,
    1049             :                                         &length)) {
    1050           0 :                 *(volatile uint8_t *)(blk_task->iovs[blk_task->iovcnt - 1].iov_base) = VIRTIO_BLK_S_IOERR;
    1051           0 :                 SPDK_DEBUGLOG(vhost_blk_data, "Aborting request %" PRIu16"\n", req_idx);
    1052             :         }
    1053             : 
    1054           0 :         task->used = false;
    1055           0 :         vhost_vq_packed_ring_enqueue(vsession, vq, num_descs,
    1056           0 :                                      task->buffer_id, blk_task->used_len,
    1057           0 :                                      task->inflight_head);
    1058             : }
    1059             : 
    1060             : static int
    1061           0 : _no_bdev_vdev_vq_worker(struct spdk_vhost_virtqueue *vq)
    1062             : {
    1063           0 :         struct spdk_vhost_session *vsession = vq->vsession;
    1064           0 :         struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession);
    1065           0 :         struct vhost_user_poll_group *pg = (struct vhost_user_poll_group *)vq->poll_group;
    1066             : 
    1067             :         bool packed_ring;
    1068             : 
    1069           0 :         packed_ring = vq->packed.packed_ring;
    1070           0 :         if (packed_ring) {
    1071           0 :                 no_bdev_process_packed_vq(bvsession, vq);
    1072             :         } else {
    1073           0 :                 no_bdev_process_vq(bvsession, vq);
    1074             :         }
    1075             : 
    1076           0 :         vhost_session_vq_used_signal(vq);
    1077             : 
    1078           0 :         if (pg->task_cnt == 0 && pg->io_channel) {
    1079           0 :                 vhost_blk_put_io_channel(pg->io_channel);
    1080           0 :                 pg->io_channel = NULL;
    1081             :         }
    1082             : 
    1083           0 :         return SPDK_POLLER_BUSY;
    1084             : }
    1085             : 
    1086             : static int
    1087           0 : no_bdev_vdev_vq_worker(void *arg)
    1088             : {
    1089           0 :         struct spdk_vhost_virtqueue *vq = arg;
    1090             : 
    1091           0 :         return _no_bdev_vdev_vq_worker(vq);
    1092             : }
    1093             : 
    1094             : static int
    1095           0 : no_bdev_vdev_worker(void *arg)
    1096             : {
    1097           0 :         struct vhost_user_poll_group *pg = arg;
    1098             :         struct vhost_user_pg_vq_info *vq_info;
    1099           0 :         int rc = 0;
    1100             : 
    1101           0 :         TAILQ_FOREACH(vq_info, &pg->vqs, link) {
    1102           0 :                 rc = _no_bdev_vdev_vq_worker(vq_info->vq);
    1103             :         }
    1104             : 
    1105           0 :         return rc > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
    1106             : }
    1107             : 
    1108             : static void
    1109           0 : vhost_blk_pg_unregister_interrupts(struct vhost_user_poll_group *pg)
    1110             : {
    1111             :         struct vhost_user_pg_vq_info *vq_info;
    1112             :         struct spdk_vhost_virtqueue *vq;
    1113             : 
    1114           0 :         TAILQ_FOREACH(vq_info, &pg->vqs, link) {
    1115           0 :                 vq = vq_info->vq;
    1116           0 :                 if (vq->intr == NULL) {
    1117           0 :                         break;
    1118             :                 }
    1119             : 
    1120           0 :                 SPDK_DEBUGLOG(vhost_blk, "unregister vq[%d]'s kickfd is %d\n",
    1121             :                               vq->vring_idx, vq->vring.kickfd);
    1122           0 :                 spdk_interrupt_unregister(&vq->intr);
    1123             :         }
    1124           0 : }
    1125             : 
    1126             : static void
    1127           0 : vhost_blk_vq_register_interrupt(struct spdk_vhost_virtqueue *vq)
    1128             : {
    1129           0 :         struct spdk_vhost_session *vsession = vq->vsession;
    1130           0 :         struct spdk_vhost_blk_dev *bvdev =  to_blk_dev(vsession->vdev);
    1131             : 
    1132           0 :         assert(bvdev != NULL);
    1133             : 
    1134           0 :         if (bvdev->bdev) {
    1135           0 :                 vq->intr = spdk_interrupt_register(vq->vring.kickfd, vdev_vq_worker, vq, "vdev_vq_worker");
    1136             :         } else {
    1137           0 :                 vq->intr = spdk_interrupt_register(vq->vring.kickfd, no_bdev_vdev_vq_worker, vq,
    1138             :                                                    "no_bdev_vdev_vq_worker");
    1139             :         }
    1140             : 
    1141           0 :         if (vq->intr == NULL) {
    1142           0 :                 SPDK_ERRLOG("Fail to register req notifier handler.\n");
    1143           0 :                 assert(false);
    1144             :         }
    1145           0 : }
    1146             : 
    1147             : static void
    1148           0 : add_vq_to_poll_group(void *arg)
    1149             : {
    1150           0 :         struct vhost_user_pg_vq_info *vq_info = arg;
    1151           0 :         struct vhost_user_poll_group *pg = vq_info->pg;
    1152             : 
    1153           0 :         SPDK_DEBUGLOG(vhost_blk, "%s: vring %u is added to pg %p, thread %s, lcore %u\n",
    1154             :                       pg->vsession->name,
    1155             :                       vq_info->vq->vring_idx, pg, spdk_thread_get_name(spdk_get_thread()), spdk_env_get_current_core());
    1156             : 
    1157           0 :         TAILQ_INSERT_TAIL(&pg->vqs, vq_info, link);
    1158             : 
    1159           0 :         if (spdk_interrupt_mode_is_enabled()) {
    1160           0 :                 vhost_blk_vq_register_interrupt(vq_info->vq);
    1161             :         }
    1162           0 : }
    1163             : 
    1164             : static struct vhost_user_poll_group *
    1165           0 : get_optimal_poll_group(struct spdk_vhost_blk_session *bvsession)
    1166             : {
    1167             :         struct vhost_user_poll_group *pg;
    1168             :         struct spdk_vhost_blk_dev *bvdev;
    1169             : 
    1170           0 :         if (bvsession->bvdev == NULL) {
    1171           0 :                 return NULL;
    1172             :         }
    1173             : 
    1174             :         /* round robin */
    1175           0 :         bvdev = bvsession->bvdev;
    1176           0 :         if (bvdev->next_pg_index >= bvsession->num_poll_groups) {
    1177           0 :                 bvdev->next_pg_index = 0;
    1178             :         }
    1179             : 
    1180           0 :         pg = &bvsession->poll_groups[bvdev->next_pg_index];
    1181           0 :         bvdev->next_pg_index++;
    1182             : 
    1183           0 :         return pg;
    1184             : }
    1185             : 
    1186             : static int
    1187           0 : vhost_blk_vq_enable(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *vq)
    1188             : {
    1189           0 :         struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession);
    1190             :         struct spdk_vhost_dev *vdev;
    1191             :         struct spdk_vhost_user_dev *user_dev;
    1192             :         struct vhost_user_pg_vq_info *vq_info;
    1193             : 
    1194           0 :         vdev = vsession->vdev;
    1195           0 :         user_dev = to_user_dev(vdev);
    1196             : 
    1197           0 :         SPDK_DEBUGLOG(vhost_blk, "%s: enable vq %u\n", vsession->name, vq->vring_idx);
    1198             : 
    1199           0 :         pthread_mutex_lock(&user_dev->lock);
    1200           0 :         if (vsession->started || vsession->starting) {
    1201           0 :                 pthread_mutex_unlock(&user_dev->lock);
    1202           0 :                 vq_info = calloc(1, sizeof(*vq_info));
    1203           0 :                 if (!vq_info) {
    1204           0 :                         SPDK_ERRLOG("Failed to allocate vq_info\n");
    1205           0 :                         return -ENOMEM;
    1206             :                 }
    1207           0 :                 vq_info->vq = vq;
    1208           0 :                 vq_info->pg = get_optimal_poll_group(bvsession);
    1209           0 :                 if (vq_info->pg == NULL) {
    1210           0 :                         free(vq_info);
    1211           0 :                         return -EFAULT;
    1212             :                 }
    1213           0 :                 vq->poll_group = (void *)vq_info->pg;
    1214           0 :                 spdk_thread_send_msg(vq_info->pg->thread, add_vq_to_poll_group, vq_info);
    1215           0 :                 return 0;
    1216             :         }
    1217           0 :         pthread_mutex_unlock(&user_dev->lock);
    1218             : 
    1219           0 :         return 0;
    1220             : }
    1221             : 
    1222             : static int
    1223           0 : vhost_blk_pg_register_no_bdev_interrupts(struct vhost_user_poll_group *pg)
    1224             : {
    1225             :         struct vhost_user_pg_vq_info *vq_info;
    1226             :         struct spdk_vhost_virtqueue *vq;
    1227             : 
    1228           0 :         TAILQ_FOREACH(vq_info, &pg->vqs, link) {
    1229           0 :                 vq = vq_info->vq;
    1230           0 :                 SPDK_DEBUGLOG(vhost_blk, "Register vq[%d]'s kickfd is %d\n",
    1231             :                               vq->vring_idx, vq->vring.kickfd);
    1232           0 :                 vq->intr = spdk_interrupt_register(vq->vring.kickfd, no_bdev_vdev_vq_worker, vq,
    1233             :                                                    "no_bdev_vdev_vq_worker");
    1234           0 :                 if (vq->intr == NULL) {
    1235           0 :                         goto err;
    1236             :                 }
    1237             : 
    1238             :         }
    1239             : 
    1240           0 :         return 0;
    1241             : 
    1242           0 : err:
    1243           0 :         vhost_blk_pg_unregister_interrupts(pg);
    1244           0 :         return -1;
    1245             : }
    1246             : 
    1247             : static void
    1248           0 : vhost_blk_poller_set_interrupt_mode(struct spdk_poller *poller, void *cb_arg, bool interrupt_mode)
    1249             : {
    1250           0 :         struct spdk_vhost_blk_session *bvsession = cb_arg;
    1251             : 
    1252           0 :         vhost_user_session_set_interrupt_mode(&bvsession->vsession, interrupt_mode);
    1253           0 : }
    1254             : 
    1255             : static void
    1256           0 : bdev_event_cpl_cb(struct spdk_vhost_dev *vdev, void *ctx)
    1257             : {
    1258           0 :         enum spdk_bdev_event_type type = (enum spdk_bdev_event_type)(uintptr_t)ctx;
    1259             :         struct spdk_vhost_blk_dev *bvdev;
    1260             : 
    1261           0 :         if (type == SPDK_BDEV_EVENT_REMOVE) {
    1262             :                 /* All sessions have been notified, time to close the bdev */
    1263           0 :                 bvdev = to_blk_dev(vdev);
    1264           0 :                 assert(bvdev != NULL);
    1265           0 :                 spdk_bdev_close(bvdev->bdev_desc);
    1266           0 :                 bvdev->bdev_desc = NULL;
    1267           0 :                 bvdev->bdev = NULL;
    1268             :         }
    1269           0 : }
    1270             : 
    1271             : static int
    1272           0 : vhost_session_bdev_resize_cb(struct spdk_vhost_dev *vdev,
    1273             :                              struct spdk_vhost_session *vsession,
    1274             :                              void *ctx)
    1275             : {
    1276           0 :         SPDK_NOTICELOG("bdev send slave msg to vid(%d)\n", vsession->vid);
    1277             : #if RTE_VERSION >= RTE_VERSION_NUM(23, 03, 0, 0)
    1278             :         rte_vhost_backend_config_change(vsession->vid, false);
    1279             : #else
    1280           0 :         rte_vhost_slave_config_change(vsession->vid, false);
    1281             : #endif
    1282             : 
    1283           0 :         return 0;
    1284             : }
    1285             : 
    1286             : static void
    1287           0 : vhost_user_blk_resize_cb(struct spdk_vhost_dev *vdev, bdev_event_cb_complete cb, void *cb_arg)
    1288             : {
    1289           0 :         vhost_user_dev_foreach_session(vdev, vhost_session_bdev_resize_cb,
    1290             :                                        cb, cb_arg);
    1291           0 : }
    1292             : 
    1293             : static void
    1294           0 : _vhost_user_session_bdev_remove_cb(void *arg)
    1295             : {
    1296           0 :         struct vhost_user_poll_group *pg = arg;
    1297           0 :         struct spdk_vhost_session *vsession = pg->vsession;
    1298           0 :         struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession);
    1299             :         int rc;
    1300             : 
    1301           0 :         if (pg->requestq_poller == NULL) {
    1302           0 :                 return;
    1303             :         }
    1304             : 
    1305           0 :         spdk_poller_unregister(&pg->requestq_poller);
    1306           0 :         if (spdk_interrupt_mode_is_enabled()) {
    1307           0 :                 vhost_blk_pg_unregister_interrupts(pg);
    1308           0 :                 rc = vhost_blk_pg_register_no_bdev_interrupts(pg);
    1309           0 :                 if (rc) {
    1310           0 :                         SPDK_ERRLOG("Interrupt register failed\n");
    1311           0 :                         return;
    1312             :                 }
    1313             :         }
    1314             : 
    1315           0 :         pg->requestq_poller = SPDK_POLLER_REGISTER(no_bdev_vdev_worker, pg, 0);
    1316           0 :         spdk_poller_register_interrupt(pg->requestq_poller, vhost_blk_poller_set_interrupt_mode, bvsession);
    1317             : }
    1318             : 
    1319             : static int
    1320           0 : vhost_user_session_bdev_remove_cb(struct spdk_vhost_dev *vdev,
    1321             :                                   struct spdk_vhost_session *vsession,
    1322             :                                   void *ctx)
    1323             : {
    1324           0 :         struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession);
    1325             :         struct vhost_user_poll_group *pg;
    1326             :         uint32_t i;
    1327             : 
    1328           0 :         for (i = 0; i < bvsession->num_poll_groups; i++) {
    1329           0 :                 pg = &bvsession->poll_groups[i];
    1330           0 :                 spdk_thread_send_msg(pg->thread, _vhost_user_session_bdev_remove_cb, pg);
    1331             :         }
    1332             : 
    1333           0 :         return 0;
    1334             : }
    1335             : 
    1336             : static void
    1337           0 : vhost_user_bdev_remove_cb(struct spdk_vhost_dev *vdev, bdev_event_cb_complete cb, void *cb_arg)
    1338             : {
    1339           0 :         SPDK_WARNLOG("%s: hot-removing bdev - all further requests will fail.\n",
    1340             :                      vdev->name);
    1341             : 
    1342           0 :         vhost_user_dev_foreach_session(vdev, vhost_user_session_bdev_remove_cb,
    1343             :                                        cb, cb_arg);
    1344           0 : }
    1345             : 
    1346             : static void
    1347           0 : vhost_user_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_vhost_dev *vdev,
    1348             :                          bdev_event_cb_complete cb, void *cb_arg)
    1349             : {
    1350           0 :         switch (type) {
    1351           0 :         case SPDK_BDEV_EVENT_REMOVE:
    1352           0 :                 vhost_user_bdev_remove_cb(vdev, cb, cb_arg);
    1353           0 :                 break;
    1354           0 :         case SPDK_BDEV_EVENT_RESIZE:
    1355           0 :                 vhost_user_blk_resize_cb(vdev, cb, cb_arg);
    1356           0 :                 break;
    1357           0 :         default:
    1358           0 :                 assert(false);
    1359             :                 return;
    1360             :         }
    1361           0 : }
    1362             : 
    1363             : static void
    1364           0 : bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev,
    1365             :               void *event_ctx)
    1366             : {
    1367           0 :         struct spdk_vhost_dev *vdev = (struct spdk_vhost_dev *)event_ctx;
    1368           0 :         struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
    1369             : 
    1370           0 :         assert(bvdev != NULL);
    1371             : 
    1372           0 :         SPDK_DEBUGLOG(vhost_blk, "Bdev event: type %d, name %s\n",
    1373             :                       type,
    1374             :                       bdev->name);
    1375             : 
    1376           0 :         switch (type) {
    1377           0 :         case SPDK_BDEV_EVENT_REMOVE:
    1378             :         case SPDK_BDEV_EVENT_RESIZE:
    1379           0 :                 bvdev->ops->bdev_event(type, vdev, bdev_event_cpl_cb, (void *)type);
    1380           0 :                 break;
    1381           0 :         default:
    1382           0 :                 SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type);
    1383           0 :                 break;
    1384             :         }
    1385           0 : }
    1386             : 
    1387             : static void
    1388           0 : free_task_pool(struct spdk_vhost_blk_session *bvsession)
    1389             : {
    1390           0 :         struct spdk_vhost_session *vsession = &bvsession->vsession;
    1391             :         struct spdk_vhost_virtqueue *vq;
    1392             :         uint16_t i;
    1393             : 
    1394           0 :         for (i = 0; i < vsession->max_queues; i++) {
    1395           0 :                 vq = &vsession->virtqueue[i];
    1396           0 :                 if (vq->tasks == NULL) {
    1397           0 :                         continue;
    1398             :                 }
    1399             : 
    1400           0 :                 spdk_free(vq->tasks);
    1401           0 :                 vq->tasks = NULL;
    1402             :         }
    1403           0 : }
    1404             : 
    1405             : static int
    1406           0 : alloc_vq_task_pool(struct spdk_vhost_session *vsession, uint16_t qid)
    1407             : {
    1408           0 :         struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession);
    1409             :         struct spdk_vhost_virtqueue *vq;
    1410             :         struct spdk_vhost_user_blk_task *task;
    1411             :         uint32_t task_cnt;
    1412             :         uint32_t j;
    1413             : 
    1414           0 :         if (qid >= SPDK_VHOST_MAX_VQUEUES) {
    1415           0 :                 return -EINVAL;
    1416             :         }
    1417             : 
    1418           0 :         vq = &vsession->virtqueue[qid];
    1419           0 :         if (vq->vring.desc == NULL) {
    1420           0 :                 return 0;
    1421             :         }
    1422             : 
    1423           0 :         task_cnt = vq->vring.size;
    1424           0 :         if (task_cnt > SPDK_VHOST_MAX_VQ_SIZE) {
    1425             :                 /* sanity check */
    1426           0 :                 SPDK_ERRLOG("%s: virtqueue %"PRIu16" is too big. (size = %"PRIu32", max = %"PRIu32")\n",
    1427             :                             vsession->name, qid, task_cnt, SPDK_VHOST_MAX_VQ_SIZE);
    1428           0 :                 return -1;
    1429             :         }
    1430           0 :         vq->tasks = spdk_zmalloc(sizeof(struct spdk_vhost_user_blk_task) * task_cnt,
    1431             :                                  SPDK_CACHE_LINE_SIZE, NULL,
    1432             :                                  SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
    1433           0 :         if (vq->tasks == NULL) {
    1434           0 :                 SPDK_ERRLOG("%s: failed to allocate %"PRIu32" tasks for virtqueue %"PRIu16"\n",
    1435             :                             vsession->name, task_cnt, qid);
    1436           0 :                 return -1;
    1437             :         }
    1438             : 
    1439           0 :         for (j = 0; j < task_cnt; j++) {
    1440           0 :                 task = &((struct spdk_vhost_user_blk_task *)vq->tasks)[j];
    1441           0 :                 task->bvsession = bvsession;
    1442           0 :                 task->req_idx = j;
    1443           0 :                 task->vq = vq;
    1444             :         }
    1445             : 
    1446           0 :         return 0;
    1447             : }
    1448             : 
    1449             : static void
    1450           0 : session_start_poll_group(void *args)
    1451             : {
    1452             :         struct vhost_user_pg_vq_info *vq_info;
    1453           0 :         struct vhost_user_poll_group *pg = args;
    1454           0 :         struct spdk_vhost_blk_dev *bvdev = to_blk_dev(pg->vdev);
    1455           0 :         struct spdk_vhost_blk_session *bvsession = to_blk_session(pg->vsession);
    1456             : 
    1457           0 :         if (bvdev->bdev) {
    1458           0 :                 pg->io_channel = vhost_blk_get_io_channel(pg->vdev);
    1459           0 :                 SPDK_DEBUGLOG(vhost_blk, "%s: pg %p, pg io channel %p, thread %s, lcore %u\n",
    1460             :                               bvsession->vsession.name, pg,
    1461             :                               pg->io_channel, spdk_thread_get_name(spdk_get_thread()), spdk_env_get_current_core());
    1462           0 :                 if (!pg->io_channel) {
    1463           0 :                         SPDK_ERRLOG("%s: I/O channel allocation failed\n", bvsession->vsession.name);
    1464           0 :                         return;
    1465             :                 }
    1466             :         }
    1467             : 
    1468           0 :         if (spdk_interrupt_mode_is_enabled()) {
    1469           0 :                 TAILQ_FOREACH(vq_info, &pg->vqs, link) {
    1470           0 :                         vhost_blk_vq_register_interrupt(vq_info->vq);
    1471             :                 }
    1472             :         }
    1473             : 
    1474           0 :         if (bvdev->bdev) {
    1475           0 :                 pg->requestq_poller = SPDK_POLLER_REGISTER(vdev_worker, pg, 0);
    1476             :         } else {
    1477           0 :                 pg->requestq_poller = SPDK_POLLER_REGISTER(no_bdev_vdev_worker, pg, 0);
    1478             :         }
    1479           0 :         SPDK_INFOLOG(vhost, "%s: poller started on lcore %d\n",
    1480             :                      bvsession->vsession.name, spdk_env_get_current_core());
    1481             : 
    1482           0 :         spdk_poller_register_interrupt(pg->requestq_poller, vhost_blk_poller_set_interrupt_mode, bvsession);
    1483             : }
    1484             : 
    1485             : static int
    1486           0 : session_start_poll_groups(struct spdk_vhost_dev *vdev, struct spdk_vhost_session *vsession)
    1487             : {
    1488           0 :         struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession);
    1489             :         struct vhost_user_poll_group *pg;
    1490             :         struct vhost_user_pg_vq_info *vq_info;
    1491             :         struct spdk_cpuset *cpumask;
    1492           0 :         char thread_name[128];
    1493           0 :         uint32_t i, index = 0;
    1494           0 :         int rc = 0;
    1495             : 
    1496           0 :         bvsession->thread = vdev->thread;
    1497           0 :         cpumask = spdk_thread_get_cpumask(vdev->thread);
    1498             :         /* If no cpumask is input by user, we still start one thread for the device */
    1499           0 :         if (vdev->use_default_cpumask) {
    1500           0 :                 bvsession->num_poll_groups = 1;
    1501             :         } else {
    1502           0 :                 bvsession->num_poll_groups = spdk_cpuset_count(cpumask);
    1503             :         }
    1504           0 :         bvsession->poll_groups = calloc(bvsession->num_poll_groups, sizeof(struct vhost_user_poll_group));
    1505           0 :         if (!bvsession->poll_groups) {
    1506           0 :                 SPDK_ERRLOG("Failed to allocate poll groups\n");
    1507           0 :                 return -ENOMEM;
    1508             :         }
    1509             : 
    1510           0 :         for (i = 0; i < bvsession->num_poll_groups; i++) {
    1511           0 :                 pg = &bvsession->poll_groups[i];
    1512           0 :                 TAILQ_INIT(&pg->vqs);
    1513             :         }
    1514             : 
    1515           0 :         for (i = 0; i < vsession->max_queues; i++) {
    1516           0 :                 vq_info = calloc(1, sizeof(*vq_info));
    1517           0 :                 if (!vq_info) {
    1518           0 :                         SPDK_ERRLOG("Failed to allocate vq_info\n");
    1519           0 :                         rc = -ENOMEM;
    1520           0 :                         goto err;
    1521             :                 }
    1522           0 :                 vq_info->vq = &vsession->virtqueue[i];
    1523           0 :                 vq_info->vsession = vsession;
    1524             : 
    1525           0 :                 pg = get_optimal_poll_group(bvsession);
    1526           0 :                 if (pg == NULL) {
    1527           0 :                         free(vq_info);
    1528           0 :                         rc = -EFAULT;
    1529           0 :                         goto err;
    1530             :                 }
    1531           0 :                 vq_info->pg = pg;
    1532           0 :                 vq_info->vq->poll_group = pg;
    1533             : 
    1534           0 :                 SPDK_DEBUGLOG(vhost_blk, "%s: vring %u is added to pg %p\n", vsession->name, i, pg);
    1535           0 :                 TAILQ_INSERT_TAIL(&pg->vqs, vq_info, link);
    1536             :         }
    1537             : 
    1538           0 :         SPDK_ENV_FOREACH_CORE(i) {
    1539           0 :                 if (!spdk_cpuset_get_cpu(cpumask, i)) {
    1540           0 :                         continue;
    1541             :                 }
    1542             : 
    1543           0 :                 snprintf(thread_name, sizeof(thread_name), "%s.%u_%u", vdev->name, vsession->vid, i);
    1544           0 :                 pg = &bvsession->poll_groups[index];
    1545           0 :                 pg->vdev = vdev;
    1546           0 :                 pg->vsession = vsession;
    1547           0 :                 pg->thread = spdk_thread_create(thread_name, cpumask);
    1548           0 :                 if (!pg->thread) {
    1549           0 :                         SPDK_ERRLOG("Failed to create %s session %d poll groups\n", vdev->name, vsession->vid);
    1550           0 :                         rc = -EFAULT;
    1551           0 :                         goto err;
    1552             :                 }
    1553           0 :                 spdk_thread_send_msg(pg->thread, session_start_poll_group, pg);
    1554           0 :                 index++;
    1555           0 :                 if (index == bvsession->num_poll_groups) {
    1556           0 :                         break;
    1557             :                 }
    1558             :         }
    1559             : 
    1560           0 :         return 0;
    1561             : 
    1562           0 : err:
    1563           0 :         session_stop_poll_groups(bvsession);
    1564           0 :         return rc;
    1565             : }
    1566             : 
    1567             : static int
    1568           0 : vhost_blk_start(struct spdk_vhost_dev *vdev,
    1569             :                 struct spdk_vhost_session *vsession, void *unused)
    1570             : {
    1571           0 :         struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession);
    1572             :         struct spdk_vhost_blk_dev *bvdev;
    1573             :         int i;
    1574             : 
    1575             :         /* return if start is already in progress */
    1576           0 :         if (vsession->started || vsession->starting) {
    1577           0 :                 SPDK_INFOLOG(vhost, "%s: is starting or started\n", vsession->name);
    1578           0 :                 return -EINPROGRESS;
    1579             :         }
    1580             : 
    1581             :         /* validate all I/O queues are in a contiguous index range */
    1582           0 :         for (i = 0; i < vsession->max_queues; i++) {
    1583             :                 /* vring.desc and vring.desc_packed are in a union struct
    1584             :                  * so q->vring.desc can replace q->vring.desc_packed.
    1585             :                  */
    1586           0 :                 if (vsession->virtqueue[i].vring.desc == NULL) {
    1587           0 :                         SPDK_ERRLOG("%s: queue %"PRIu32" is empty\n", vsession->name, i);
    1588           0 :                         return -1;
    1589             :                 }
    1590             :         }
    1591             : 
    1592           0 :         bvdev = to_blk_dev(vdev);
    1593           0 :         assert(bvdev != NULL);
    1594           0 :         bvsession->bvdev = bvdev;
    1595             : 
    1596           0 :         return session_start_poll_groups(vdev, vsession);
    1597             : }
    1598             : 
    1599             : static void
    1600           0 : session_stop_poll_group_done(void *arg)
    1601             : {
    1602           0 :         struct spdk_vhost_blk_session *bvession = arg;
    1603             : 
    1604           0 :         bvession->num_stopped_poll_groups++;
    1605           0 : }
    1606             : 
    1607             : static int
    1608           0 : pg_stop_poller_cb(void *args)
    1609             : {
    1610           0 :         struct vhost_user_poll_group *pg = args;
    1611             :         struct spdk_vhost_blk_session *bvsession;
    1612             :         struct vhost_user_pg_vq_info *vq_info, *tmp;
    1613             : 
    1614           0 :         if (!pg->task_cnt) {
    1615           0 :                 TAILQ_FOREACH_SAFE(vq_info, &pg->vqs, link, tmp) {
    1616           0 :                         TAILQ_REMOVE(&pg->vqs, vq_info, link);
    1617           0 :                         vq_info->vq->next_event_time = 0;
    1618           0 :                         vhost_vq_used_signal(pg->vsession, vq_info->vq);
    1619           0 :                         free(vq_info);
    1620             :                 }
    1621           0 :                 goto done;
    1622             :         }
    1623             : 
    1624           0 :         pg->stop_retry_count--;
    1625           0 :         if (pg->stop_retry_count) {
    1626           0 :                 return SPDK_POLLER_IDLE;
    1627             :         }
    1628             : 
    1629           0 : done:
    1630           0 :         SPDK_INFOLOG(vhost, "%s: stopping poller on lcore %d\n",
    1631             :                      pg->vsession->name, spdk_env_get_current_core());
    1632             : 
    1633           0 :         spdk_poller_unregister(&pg->stop_poller);
    1634           0 :         if (pg->io_channel) {
    1635           0 :                 vhost_blk_put_io_channel(pg->io_channel);
    1636           0 :                 pg->io_channel = NULL;
    1637             :         }
    1638             : 
    1639           0 :         bvsession = to_blk_session(pg->vsession);
    1640           0 :         spdk_thread_exit(pg->thread);
    1641           0 :         spdk_thread_send_msg(bvsession->thread, session_stop_poll_group_done, bvsession);
    1642             : 
    1643           0 :         return SPDK_POLLER_BUSY;
    1644             : }
    1645             : 
    1646             : static void
    1647           0 : session_stop_poll_group(void *args)
    1648             : {
    1649           0 :         struct vhost_user_poll_group *pg = args;
    1650             : 
    1651           0 :         spdk_poller_unregister(&pg->requestq_poller);
    1652           0 :         vhost_blk_pg_unregister_interrupts(pg);
    1653             : 
    1654             :         /* Timeout value should be less than SPDK_VHOST_SESSION_STOP_RETRY_TIMEOUT_IN_SEC */
    1655           0 :         pg->stop_retry_count = (SPDK_VHOST_SESSION_STOP_TIMEOUT_IN_SEC * 1000 *
    1656             :                                 1000) / SPDK_VHOST_SESSION_STOP_RETRY_PERIOD_IN_US;
    1657           0 :         pg->stop_poller = SPDK_POLLER_REGISTER(pg_stop_poller_cb, pg,
    1658             :                                                SPDK_VHOST_SESSION_STOP_RETRY_PERIOD_IN_US);
    1659           0 : }
    1660             : 
    1661             : static void
    1662           0 : session_stop_poll_groups(struct spdk_vhost_blk_session *bvsession)
    1663             : {
    1664             :         uint32_t i;
    1665             :         struct vhost_user_poll_group *pg;
    1666             : 
    1667           0 :         bvsession->num_stopped_poll_groups = 0;
    1668           0 :         for (i = 0; i < bvsession->num_poll_groups; i++) {
    1669           0 :                 pg = &bvsession->poll_groups[i];
    1670           0 :                 if (pg->thread) {
    1671           0 :                         spdk_thread_send_msg(pg->thread, session_stop_poll_group, pg);
    1672             :                 }
    1673             :         }
    1674           0 : }
    1675             : 
    1676             : static int
    1677           0 : destroy_session_poller_cb(void *arg)
    1678             : {
    1679           0 :         struct spdk_vhost_blk_session *bvsession = arg;
    1680           0 :         struct spdk_vhost_session *vsession = &bvsession->vsession;
    1681           0 :         struct spdk_vhost_user_dev *user_dev = to_user_dev(vsession->vdev);
    1682             : 
    1683           0 :         if ((bvsession->num_stopped_poll_groups != bvsession->num_poll_groups) ||
    1684           0 :             (pthread_mutex_trylock(&user_dev->lock) != 0)) {
    1685           0 :                 assert(vsession->stop_retry_count > 0);
    1686           0 :                 vsession->stop_retry_count--;
    1687           0 :                 if (vsession->stop_retry_count == 0) {
    1688           0 :                         SPDK_ERRLOG("%s: Timedout when destroy session (number of stopped pg %d)\n", vsession->name,
    1689             :                                     bvsession->num_stopped_poll_groups);
    1690           0 :                         spdk_poller_unregister(&bvsession->stop_poller);
    1691           0 :                         vhost_user_session_stop_done(vsession, -ETIMEDOUT);
    1692             :                 }
    1693             : 
    1694           0 :                 return SPDK_POLLER_BUSY;
    1695             :         }
    1696             : 
    1697           0 :         SPDK_DEBUGLOG(vhost_blk, "%s: session stoppped\n", vsession->name);
    1698           0 :         free(bvsession->poll_groups);
    1699           0 :         free_task_pool(bvsession);
    1700           0 :         spdk_poller_unregister(&bvsession->stop_poller);
    1701           0 :         vhost_user_session_stop_done(vsession, 0);
    1702             : 
    1703           0 :         pthread_mutex_unlock(&user_dev->lock);
    1704           0 :         return SPDK_POLLER_BUSY;
    1705             : }
    1706             : 
    1707             : static int
    1708           0 : vhost_blk_stop(struct spdk_vhost_dev *vdev,
    1709             :                struct spdk_vhost_session *vsession, void *unused)
    1710             : {
    1711           0 :         struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession);
    1712             : 
    1713             :         /* return if stop is already in progress */
    1714           0 :         if (bvsession->stop_poller) {
    1715           0 :                 return -EINPROGRESS;
    1716             :         }
    1717             : 
    1718           0 :         session_stop_poll_groups(bvsession);
    1719             : 
    1720           0 :         bvsession->vsession.stop_retry_count = (SPDK_VHOST_SESSION_STOP_RETRY_TIMEOUT_IN_SEC * 1000 *
    1721             :                                                 1000) / SPDK_VHOST_SESSION_STOP_RETRY_PERIOD_IN_US;
    1722           0 :         bvsession->stop_poller = SPDK_POLLER_REGISTER(destroy_session_poller_cb,
    1723             :                                  bvsession, SPDK_VHOST_SESSION_STOP_RETRY_PERIOD_IN_US);
    1724           0 :         return 0;
    1725             : }
    1726             : 
    1727             : static void
    1728           0 : vhost_blk_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w)
    1729             : {
    1730             :         struct spdk_vhost_blk_dev *bvdev;
    1731             : 
    1732           0 :         bvdev = to_blk_dev(vdev);
    1733           0 :         assert(bvdev != NULL);
    1734             : 
    1735           0 :         spdk_json_write_named_object_begin(w, "block");
    1736             : 
    1737           0 :         spdk_json_write_named_bool(w, "readonly", bvdev->readonly);
    1738             : 
    1739           0 :         spdk_json_write_name(w, "bdev");
    1740           0 :         if (bvdev->bdev) {
    1741           0 :                 spdk_json_write_string(w, spdk_bdev_get_name(bvdev->bdev));
    1742             :         } else {
    1743           0 :                 spdk_json_write_null(w);
    1744             :         }
    1745           0 :         spdk_json_write_named_string(w, "transport", bvdev->ops->name);
    1746             : 
    1747           0 :         spdk_json_write_object_end(w);
    1748           0 : }
    1749             : 
    1750             : static void
    1751           0 : vhost_blk_write_config_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w)
    1752             : {
    1753             :         struct spdk_vhost_blk_dev *bvdev;
    1754             : 
    1755           0 :         bvdev = to_blk_dev(vdev);
    1756           0 :         assert(bvdev != NULL);
    1757             : 
    1758           0 :         if (!bvdev->bdev) {
    1759           0 :                 return;
    1760             :         }
    1761             : 
    1762           0 :         spdk_json_write_object_begin(w);
    1763           0 :         spdk_json_write_named_string(w, "method", "vhost_create_blk_controller");
    1764             : 
    1765           0 :         spdk_json_write_named_object_begin(w, "params");
    1766           0 :         spdk_json_write_named_string(w, "ctrlr", vdev->name);
    1767           0 :         spdk_json_write_named_string(w, "dev_name", spdk_bdev_get_name(bvdev->bdev));
    1768           0 :         spdk_json_write_named_string(w, "cpumask",
    1769             :                                      spdk_cpuset_fmt(spdk_thread_get_cpumask(vdev->thread)));
    1770           0 :         spdk_json_write_named_bool(w, "readonly", bvdev->readonly);
    1771           0 :         spdk_json_write_named_string(w, "transport", bvdev->ops->name);
    1772           0 :         spdk_json_write_object_end(w);
    1773             : 
    1774           0 :         spdk_json_write_object_end(w);
    1775             : }
    1776             : 
    1777             : static int vhost_blk_destroy(struct spdk_vhost_dev *dev);
    1778             : 
    1779             : static int
    1780           0 : vhost_blk_get_config(struct spdk_vhost_dev *vdev, uint8_t *config,
    1781             :                      uint32_t len)
    1782             : {
    1783           0 :         struct virtio_blk_config blkcfg;
    1784             :         struct spdk_bdev *bdev;
    1785             :         uint32_t blk_size;
    1786             :         uint64_t blkcnt;
    1787             : 
    1788           0 :         memset(&blkcfg, 0, sizeof(blkcfg));
    1789           0 :         bdev = vhost_blk_get_bdev(vdev);
    1790           0 :         if (bdev == NULL) {
    1791             :                 /* We can't just return -1 here as this GET_CONFIG message might
    1792             :                  * be caused by a QEMU VM reboot. Returning -1 will indicate an
    1793             :                  * error to QEMU, who might then decide to terminate itself.
    1794             :                  * We don't want that. A simple reboot shouldn't break the system.
    1795             :                  *
    1796             :                  * Presenting a block device with block size 0 and block count 0
    1797             :                  * doesn't cause any problems on QEMU side and the virtio-pci
    1798             :                  * device is even still available inside the VM, but there will
    1799             :                  * be no block device created for it - the kernel drivers will
    1800             :                  * silently reject it.
    1801             :                  */
    1802           0 :                 blk_size = 0;
    1803           0 :                 blkcnt = 0;
    1804             :         } else {
    1805           0 :                 blk_size = spdk_bdev_get_block_size(bdev);
    1806           0 :                 blkcnt = spdk_bdev_get_num_blocks(bdev);
    1807           0 :                 if (spdk_bdev_get_buf_align(bdev) > 1) {
    1808           0 :                         blkcfg.size_max = SPDK_BDEV_LARGE_BUF_MAX_SIZE;
    1809           0 :                         blkcfg.seg_max = spdk_min(SPDK_VHOST_IOVS_MAX - 2 - 1, SPDK_BDEV_IO_NUM_CHILD_IOV - 2 - 1);
    1810             :                 } else {
    1811           0 :                         blkcfg.size_max = 131072;
    1812             :                         /*  -2 for REQ and RESP and -1 for region boundary splitting */
    1813           0 :                         blkcfg.seg_max = SPDK_VHOST_IOVS_MAX - 2 - 1;
    1814             :                 }
    1815             :         }
    1816             : 
    1817           0 :         blkcfg.blk_size = blk_size;
    1818             :         /* minimum I/O size in blocks */
    1819           0 :         blkcfg.min_io_size = 1;
    1820             :         /* expressed in 512 Bytes sectors */
    1821           0 :         blkcfg.capacity = (blkcnt * blk_size) / 512;
    1822             :         /* QEMU can overwrite this value when started */
    1823           0 :         blkcfg.num_queues = SPDK_VHOST_MAX_VQUEUES;
    1824             : 
    1825           0 :         if (bdev && spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) {
    1826             :                 /* 16MiB, expressed in 512 Bytes */
    1827           0 :                 blkcfg.max_discard_sectors = 32768;
    1828           0 :                 blkcfg.max_discard_seg = 1;
    1829           0 :                 blkcfg.discard_sector_alignment = blk_size / 512;
    1830             :         }
    1831           0 :         if (bdev && spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) {
    1832           0 :                 blkcfg.max_write_zeroes_sectors = 32768;
    1833           0 :                 blkcfg.max_write_zeroes_seg = 1;
    1834             :         }
    1835             : 
    1836           0 :         memcpy(config, &blkcfg, spdk_min(len, sizeof(blkcfg)));
    1837             : 
    1838           0 :         return 0;
    1839             : }
    1840             : 
    1841             : static int
    1842           0 : vhost_blk_set_coalescing(struct spdk_vhost_dev *vdev, uint32_t delay_base_us,
    1843             :                          uint32_t iops_threshold)
    1844             : {
    1845           0 :         struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
    1846             : 
    1847           0 :         assert(bvdev != NULL);
    1848             : 
    1849           0 :         return bvdev->ops->set_coalescing(vdev, delay_base_us, iops_threshold);
    1850             : }
    1851             : 
    1852             : static void
    1853           0 : vhost_blk_get_coalescing(struct spdk_vhost_dev *vdev, uint32_t *delay_base_us,
    1854             :                          uint32_t *iops_threshold)
    1855             : {
    1856           0 :         struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
    1857             : 
    1858           0 :         assert(bvdev != NULL);
    1859             : 
    1860           0 :         bvdev->ops->get_coalescing(vdev, delay_base_us, iops_threshold);
    1861           0 : }
    1862             : 
    1863             : static const struct spdk_vhost_user_dev_backend vhost_blk_user_device_backend = {
    1864             :         .session_ctx_size = sizeof(struct spdk_vhost_blk_session) - sizeof(struct spdk_vhost_session),
    1865             :         .start_session =  vhost_blk_start,
    1866             :         .stop_session = vhost_blk_stop,
    1867             :         .alloc_vq_tasks = alloc_vq_task_pool,
    1868             :         .enable_vq = vhost_blk_vq_enable,
    1869             : };
    1870             : 
    1871             : static const struct spdk_vhost_dev_backend vhost_blk_device_backend = {
    1872             :         .type = VHOST_BACKEND_BLK,
    1873             :         .vhost_get_config = vhost_blk_get_config,
    1874             :         .dump_info_json = vhost_blk_dump_info_json,
    1875             :         .write_config_json = vhost_blk_write_config_json,
    1876             :         .remove_device = vhost_blk_destroy,
    1877             :         .set_coalescing = vhost_blk_set_coalescing,
    1878             :         .get_coalescing = vhost_blk_get_coalescing,
    1879             : };
    1880             : 
    1881             : int
    1882           1 : virtio_blk_construct_ctrlr(struct spdk_vhost_dev *vdev, const char *address,
    1883             :                            struct spdk_cpuset *cpumask, const struct spdk_json_val *params,
    1884             :                            const struct spdk_vhost_user_dev_backend *user_backend)
    1885             : {
    1886           1 :         struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
    1887             : 
    1888           1 :         assert(bvdev != NULL);
    1889             : 
    1890           1 :         return bvdev->ops->create_ctrlr(vdev, cpumask, address, params, (void *)user_backend);
    1891             : }
    1892             : 
    1893             : int
    1894           1 : spdk_vhost_blk_construct(const char *name, const char *cpumask, const char *dev_name,
    1895             :                          const char *transport, const struct spdk_json_val *params)
    1896             : {
    1897           1 :         struct spdk_vhost_blk_dev *bvdev = NULL;
    1898             :         struct spdk_vhost_dev *vdev;
    1899             :         struct spdk_bdev *bdev;
    1900           1 :         const char *transport_name = VIRTIO_BLK_DEFAULT_TRANSPORT;
    1901           1 :         int ret = 0;
    1902             : 
    1903           1 :         bvdev = calloc(1, sizeof(*bvdev));
    1904           1 :         if (bvdev == NULL) {
    1905           0 :                 ret = -ENOMEM;
    1906           0 :                 goto out;
    1907             :         }
    1908             : 
    1909           1 :         if (transport != NULL) {
    1910           0 :                 transport_name = transport;
    1911             :         }
    1912             : 
    1913           1 :         bvdev->ops = virtio_blk_get_transport_ops(transport_name);
    1914           1 :         if (!bvdev->ops) {
    1915           0 :                 ret = -EINVAL;
    1916           0 :                 SPDK_ERRLOG("Transport type '%s' unavailable.\n", transport_name);
    1917           0 :                 goto out;
    1918             :         }
    1919             : 
    1920           1 :         ret = spdk_bdev_open_ext(dev_name, true, bdev_event_cb, bvdev, &bvdev->bdev_desc);
    1921           1 :         if (ret != 0) {
    1922           0 :                 SPDK_ERRLOG("%s: could not open bdev '%s', error=%d\n",
    1923             :                             name, dev_name, ret);
    1924           0 :                 goto out;
    1925             :         }
    1926           1 :         bdev = spdk_bdev_desc_get_bdev(bvdev->bdev_desc);
    1927             : 
    1928           1 :         vdev = &bvdev->vdev;
    1929           1 :         vdev->virtio_features = SPDK_VHOST_BLK_FEATURES_BASE;
    1930           1 :         vdev->disabled_features = SPDK_VHOST_BLK_DISABLED_FEATURES;
    1931           1 :         vdev->protocol_features = SPDK_VHOST_BLK_PROTOCOL_FEATURES;
    1932             : 
    1933           1 :         if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) {
    1934           1 :                 vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_DISCARD);
    1935             :         }
    1936           1 :         if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) {
    1937           1 :                 vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_WRITE_ZEROES);
    1938             :         }
    1939             : 
    1940           1 :         if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH)) {
    1941           1 :                 vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_FLUSH);
    1942             :         }
    1943             : 
    1944           1 :         bvdev->bdev = bdev;
    1945           1 :         bvdev->readonly = false;
    1946           1 :         ret = vhost_dev_register(vdev, name, cpumask, params, &vhost_blk_device_backend,
    1947             :                                  &vhost_blk_user_device_backend, false);
    1948           1 :         if (ret != 0) {
    1949           0 :                 spdk_bdev_close(bvdev->bdev_desc);
    1950           0 :                 goto out;
    1951             :         }
    1952             : 
    1953           1 :         SPDK_INFOLOG(vhost, "%s: using bdev '%s'\n", name, dev_name);
    1954           1 : out:
    1955           1 :         if (ret != 0 && bvdev) {
    1956           0 :                 free(bvdev);
    1957             :         }
    1958           1 :         return ret;
    1959             : }
    1960             : 
    1961             : int
    1962           1 : virtio_blk_destroy_ctrlr(struct spdk_vhost_dev *vdev)
    1963             : {
    1964           1 :         struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
    1965             : 
    1966           1 :         assert(bvdev != NULL);
    1967             : 
    1968           1 :         return bvdev->ops->destroy_ctrlr(vdev);
    1969             : }
    1970             : 
    1971             : static int
    1972           1 : vhost_blk_destroy(struct spdk_vhost_dev *vdev)
    1973             : {
    1974           1 :         struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
    1975             :         int rc;
    1976             : 
    1977           1 :         assert(bvdev != NULL);
    1978             : 
    1979           1 :         rc = vhost_dev_unregister(&bvdev->vdev);
    1980           1 :         if (rc != 0) {
    1981           0 :                 return rc;
    1982             :         }
    1983             : 
    1984           1 :         if (bvdev->bdev_desc) {
    1985           0 :                 spdk_bdev_close(bvdev->bdev_desc);
    1986           0 :                 bvdev->bdev_desc = NULL;
    1987             :         }
    1988           1 :         bvdev->bdev = NULL;
    1989             : 
    1990           1 :         free(bvdev);
    1991           1 :         return 0;
    1992             : }
    1993             : 
    1994             : struct spdk_io_channel *
    1995           0 : vhost_blk_get_io_channel(struct spdk_vhost_dev *vdev)
    1996             : {
    1997           0 :         struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
    1998             : 
    1999           0 :         assert(bvdev != NULL);
    2000             : 
    2001           0 :         return spdk_bdev_get_io_channel(bvdev->bdev_desc);
    2002             : }
    2003             : 
    2004             : void
    2005           0 : vhost_blk_put_io_channel(struct spdk_io_channel *ch)
    2006             : {
    2007           0 :         spdk_put_io_channel(ch);
    2008           0 : }
    2009             : 
    2010             : static struct spdk_virtio_blk_transport *
    2011           1 : vhost_user_blk_create(const struct spdk_json_val *params)
    2012             : {
    2013             :         int ret;
    2014             :         struct spdk_virtio_blk_transport *vhost_user_blk;
    2015             : 
    2016           1 :         vhost_user_blk = calloc(1, sizeof(*vhost_user_blk));
    2017           1 :         if (!vhost_user_blk) {
    2018           0 :                 return NULL;
    2019             :         }
    2020             : 
    2021           1 :         ret = vhost_user_init();
    2022           1 :         if (ret != 0) {
    2023           0 :                 free(vhost_user_blk);
    2024           0 :                 return NULL;
    2025             :         }
    2026             : 
    2027           1 :         return vhost_user_blk;
    2028             : }
    2029             : 
    2030             : static int
    2031           1 : vhost_user_blk_destroy(struct spdk_virtio_blk_transport *transport,
    2032             :                        spdk_vhost_fini_cb cb_fn)
    2033             : {
    2034           1 :         vhost_user_fini(cb_fn);
    2035           1 :         free(transport);
    2036           1 :         return 0;
    2037             : }
    2038             : 
    2039             : struct rpc_vhost_blk {
    2040             :         bool readonly;
    2041             :         bool packed_ring;
    2042             : };
    2043             : 
    2044             : static const struct spdk_json_object_decoder rpc_construct_vhost_blk[] = {
    2045             :         {"readonly", offsetof(struct rpc_vhost_blk, readonly), spdk_json_decode_bool, true},
    2046             :         {"packed_ring", offsetof(struct rpc_vhost_blk, packed_ring), spdk_json_decode_bool, true},
    2047             : };
    2048             : 
    2049             : static int
    2050           1 : vhost_user_blk_create_ctrlr(struct spdk_vhost_dev *vdev, struct spdk_cpuset *cpumask,
    2051             :                             const char *address, const struct spdk_json_val *params, void *custom_opts)
    2052             : {
    2053           1 :         struct rpc_vhost_blk req = {0};
    2054           1 :         struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
    2055             : 
    2056           1 :         assert(bvdev != NULL);
    2057             : 
    2058           1 :         if (spdk_json_decode_object_relaxed(params, rpc_construct_vhost_blk,
    2059             :                                             SPDK_COUNTOF(rpc_construct_vhost_blk),
    2060             :                                             &req)) {
    2061           0 :                 SPDK_DEBUGLOG(vhost_blk, "spdk_json_decode_object failed\n");
    2062           0 :                 return -EINVAL;
    2063             :         }
    2064             : 
    2065           1 :         if (req.packed_ring) {
    2066           0 :                 vdev->virtio_features |= (uint64_t)req.packed_ring << VIRTIO_F_RING_PACKED;
    2067             :         }
    2068           1 :         if (req.readonly) {
    2069           0 :                 vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_RO);
    2070           0 :                 bvdev->readonly = req.readonly;
    2071             :         }
    2072             : 
    2073           1 :         return vhost_user_dev_create(vdev, address, cpumask, custom_opts, false);
    2074             : }
    2075             : 
    2076             : static int
    2077           1 : vhost_user_blk_destroy_ctrlr(struct spdk_vhost_dev *vdev)
    2078             : {
    2079           1 :         return vhost_user_dev_unregister(vdev);
    2080             : }
    2081             : 
    2082             : static void
    2083           0 : vhost_user_blk_dump_opts(struct spdk_virtio_blk_transport *transport, struct spdk_json_write_ctx *w)
    2084             : {
    2085           0 :         assert(w != NULL);
    2086             : 
    2087           0 :         spdk_json_write_named_string(w, "name", transport->ops->name);
    2088           0 : }
    2089             : 
    2090             : static const struct spdk_virtio_blk_transport_ops vhost_user_blk = {
    2091             :         .name = "vhost_user_blk",
    2092             : 
    2093             :         .dump_opts = vhost_user_blk_dump_opts,
    2094             : 
    2095             :         .create = vhost_user_blk_create,
    2096             :         .destroy = vhost_user_blk_destroy,
    2097             : 
    2098             :         .create_ctrlr = vhost_user_blk_create_ctrlr,
    2099             :         .destroy_ctrlr = vhost_user_blk_destroy_ctrlr,
    2100             : 
    2101             :         .bdev_event = vhost_user_bdev_event_cb,
    2102             :         .set_coalescing = vhost_user_set_coalescing,
    2103             :         .get_coalescing = vhost_user_get_coalescing,
    2104             : };
    2105             : 
    2106           1 : SPDK_VIRTIO_BLK_TRANSPORT_REGISTER(vhost_user_blk, &vhost_user_blk);
    2107             : 
    2108           1 : SPDK_LOG_REGISTER_COMPONENT(vhost_blk)
    2109           1 : SPDK_LOG_REGISTER_COMPONENT(vhost_blk_data)

Generated by: LCOV version 1.15