LCOV - code coverage report
Current view: top level - lib/vhost - rte_vhost_user.c (source / functions) Hit Total Coverage
Test: ut_cov_unit.info Lines: 242 1138 21.3 %
Date: 2024-12-01 05:20:32 Functions: 20 74 27.0 %

          Line data    Source code
       1             : /*   SPDX-License-Identifier: BSD-3-Clause
       2             :  *   Copyright (C) 2019 Intel Corporation.
       3             :  *   All rights reserved.
       4             :  *   Copyright (c) 2021 Mellanox Technologies LTD. All rights reserved.
       5             :  */
       6             : 
       7             : #include "spdk/stdinc.h"
       8             : 
       9             : #include "spdk/env.h"
      10             : #include "spdk/likely.h"
      11             : #include "spdk/string.h"
      12             : #include "spdk/util.h"
      13             : #include "spdk/memory.h"
      14             : #include "spdk/barrier.h"
      15             : #include "spdk/vhost.h"
      16             : #include "vhost_internal.h"
      17             : #include <rte_version.h>
      18             : 
      19             : #include "spdk_internal/vhost_user.h"
      20             : 
      21             : /* Path to folder where character device will be created. Can be set by user. */
      22             : static char g_vhost_user_dev_dirname[PATH_MAX] = "";
      23             : 
      24             : static struct spdk_thread *g_vhost_user_init_thread;
      25             : 
      26             : struct vhost_session_fn_ctx {
      27             :         /** Device pointer obtained before enqueueing the event */
      28             :         struct spdk_vhost_dev *vdev;
      29             : 
      30             :         /** ID of the session to send event to. */
      31             :         uint32_t vsession_id;
      32             : 
      33             :         /** User provided function to be executed on session's thread. */
      34             :         spdk_vhost_session_fn cb_fn;
      35             : 
      36             :         /**
      37             :          * User provided function to be called on the init thread
      38             :          * after iterating through all sessions.
      39             :          */
      40             :         spdk_vhost_dev_fn cpl_fn;
      41             : 
      42             :         /** Custom user context */
      43             :         void *user_ctx;
      44             : };
      45             : 
      46             : static int vhost_user_wait_for_session_stop(struct spdk_vhost_session *vsession,
      47             :                 unsigned timeout_sec, const char *errmsg);
      48             : 
      49             : void *
      50           0 : vhost_gpa_to_vva(struct spdk_vhost_session *vsession, uint64_t addr, uint64_t len)
      51             : {
      52           0 :         void *vva;
      53           0 :         uint64_t newlen;
      54             : 
      55           0 :         newlen = len;
      56           0 :         vva = (void *)rte_vhost_va_from_guest_pa(vsession->mem, addr, &newlen);
      57           0 :         if (newlen != len) {
      58           0 :                 return NULL;
      59             :         }
      60             : 
      61           0 :         return vva;
      62             : 
      63           0 : }
      64             : 
      65             : static void
      66           0 : vhost_log_req_desc(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *virtqueue,
      67             :                    uint16_t req_id)
      68             : {
      69           0 :         struct vring_desc *desc, *desc_table;
      70           0 :         uint32_t desc_table_size;
      71           0 :         int rc;
      72             : 
      73           0 :         if (spdk_likely(!vhost_dev_has_feature(vsession, VHOST_F_LOG_ALL))) {
      74           0 :                 return;
      75             :         }
      76             : 
      77           0 :         rc = vhost_vq_get_desc(vsession, virtqueue, req_id, &desc, &desc_table, &desc_table_size);
      78           0 :         if (spdk_unlikely(rc != 0)) {
      79           0 :                 SPDK_ERRLOG("Can't log used ring descriptors!\n");
      80           0 :                 return;
      81             :         }
      82             : 
      83           0 :         do {
      84           0 :                 if (vhost_vring_desc_is_wr(desc)) {
      85             :                         /* To be honest, only pages really touched should be logged, but
      86             :                          * doing so would require tracking those changes in each backed.
      87             :                          * Also backend most likely will touch all/most of those pages so
      88             :                          * for lets assume we touched all pages passed to as writeable buffers. */
      89           0 :                         rte_vhost_log_write(vsession->vid, desc->addr, desc->len);
      90           0 :                 }
      91           0 :                 vhost_vring_desc_get_next(&desc, desc_table, desc_table_size);
      92           0 :         } while (desc);
      93           0 : }
      94             : 
      95             : static void
      96           7 : vhost_log_used_vring_elem(struct spdk_vhost_session *vsession,
      97             :                           struct spdk_vhost_virtqueue *virtqueue,
      98             :                           uint16_t idx)
      99             : {
     100           7 :         uint64_t offset, len;
     101             : 
     102           7 :         if (spdk_likely(!vhost_dev_has_feature(vsession, VHOST_F_LOG_ALL))) {
     103           7 :                 return;
     104             :         }
     105             : 
     106           0 :         if (spdk_unlikely(virtqueue->packed.packed_ring)) {
     107           0 :                 offset = idx * sizeof(struct vring_packed_desc);
     108           0 :                 len = sizeof(struct vring_packed_desc);
     109           0 :         } else {
     110           0 :                 offset = offsetof(struct vring_used, ring[idx]);
     111           0 :                 len = sizeof(virtqueue->vring.used->ring[idx]);
     112             :         }
     113             : 
     114           0 :         rte_vhost_log_used_vring(vsession->vid, virtqueue->vring_idx, offset, len);
     115           7 : }
     116             : 
     117             : static void
     118           0 : vhost_log_used_vring_idx(struct spdk_vhost_session *vsession,
     119             :                          struct spdk_vhost_virtqueue *virtqueue)
     120             : {
     121           0 :         uint64_t offset, len;
     122           0 :         uint16_t vq_idx;
     123             : 
     124           0 :         if (spdk_likely(!vhost_dev_has_feature(vsession, VHOST_F_LOG_ALL))) {
     125           0 :                 return;
     126             :         }
     127             : 
     128           0 :         offset = offsetof(struct vring_used, idx);
     129           0 :         len = sizeof(virtqueue->vring.used->idx);
     130           0 :         vq_idx = virtqueue - vsession->virtqueue;
     131             : 
     132           0 :         rte_vhost_log_used_vring(vsession->vid, vq_idx, offset, len);
     133           0 : }
     134             : 
     135             : /*
     136             :  * Get available requests from avail ring.
     137             :  */
     138             : uint16_t
     139           4 : vhost_vq_avail_ring_get(struct spdk_vhost_virtqueue *virtqueue, uint16_t *reqs,
     140             :                         uint16_t reqs_len)
     141             : {
     142           4 :         struct rte_vhost_vring *vring = &virtqueue->vring;
     143           4 :         struct vring_avail *avail = vring->avail;
     144           4 :         uint16_t size_mask = vring->size - 1;
     145           4 :         uint16_t last_idx = virtqueue->last_avail_idx, avail_idx = avail->idx;
     146           4 :         uint16_t count, i;
     147           4 :         int rc;
     148           4 :         uint64_t u64_value;
     149             : 
     150           4 :         spdk_smp_rmb();
     151             : 
     152           4 :         if (virtqueue->vsession && spdk_unlikely(spdk_interrupt_mode_is_enabled())) {
     153             :                 /* Read to clear vring's kickfd */
     154           0 :                 rc = read(vring->kickfd, &u64_value, sizeof(u64_value));
     155           0 :                 if (rc < 0) {
     156           0 :                         SPDK_ERRLOG("failed to acknowledge kickfd: %s.\n", spdk_strerror(errno));
     157           0 :                         return -errno;
     158             :                 }
     159           0 :         }
     160             : 
     161           4 :         count = avail_idx - last_idx;
     162           4 :         if (spdk_likely(count == 0)) {
     163           0 :                 return 0;
     164             :         }
     165             : 
     166           4 :         if (spdk_unlikely(count > vring->size)) {
     167             :                 /* TODO: the queue is unrecoverably broken and should be marked so.
     168             :                  * For now we will fail silently and report there are no new avail entries.
     169             :                  */
     170           1 :                 return 0;
     171             :         }
     172             : 
     173           3 :         count = spdk_min(count, reqs_len);
     174             : 
     175           3 :         virtqueue->last_avail_idx += count;
     176             :         /* Check whether there are unprocessed reqs in vq, then kick vq manually */
     177           3 :         if (virtqueue->vsession && spdk_unlikely(spdk_interrupt_mode_is_enabled())) {
     178             :                 /* If avail_idx is larger than virtqueue's last_avail_idx, then there is unprocessed reqs.
     179             :                  * avail_idx should get updated here from memory, in case of race condition with guest.
     180             :                  */
     181           0 :                 avail_idx = * (volatile uint16_t *) &avail->idx;
     182           0 :                 if (avail_idx > virtqueue->last_avail_idx) {
     183             :                         /* Write to notify vring's kickfd */
     184           0 :                         rc = write(vring->kickfd, &u64_value, sizeof(u64_value));
     185           0 :                         if (rc < 0) {
     186           0 :                                 SPDK_ERRLOG("failed to kick vring: %s.\n", spdk_strerror(errno));
     187           0 :                                 return -errno;
     188             :                         }
     189           0 :                 }
     190           0 :         }
     191             : 
     192          19 :         for (i = 0; i < count; i++) {
     193          16 :                 reqs[i] = vring->avail->ring[(last_idx + i) & size_mask];
     194          16 :         }
     195             : 
     196           3 :         SPDK_DEBUGLOG(vhost_ring,
     197             :                       "AVAIL: last_idx=%"PRIu16" avail_idx=%"PRIu16" count=%"PRIu16"\n",
     198             :                       last_idx, avail_idx, count);
     199             : 
     200           3 :         return count;
     201           4 : }
     202             : 
     203             : static bool
     204           0 : vhost_vring_desc_is_indirect(struct vring_desc *cur_desc)
     205             : {
     206           0 :         return !!(cur_desc->flags & VRING_DESC_F_INDIRECT);
     207             : }
     208             : 
     209             : static bool
     210           7 : vhost_vring_packed_desc_is_indirect(struct vring_packed_desc *cur_desc)
     211             : {
     212           7 :         return (cur_desc->flags & VRING_DESC_F_INDIRECT) != 0;
     213             : }
     214             : 
     215             : static bool
     216           0 : vhost_inflight_packed_desc_is_indirect(spdk_vhost_inflight_desc *cur_desc)
     217             : {
     218           0 :         return (cur_desc->flags & VRING_DESC_F_INDIRECT) != 0;
     219             : }
     220             : 
     221             : int
     222           0 : vhost_vq_get_desc(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *virtqueue,
     223             :                   uint16_t req_idx, struct vring_desc **desc, struct vring_desc **desc_table,
     224             :                   uint32_t *desc_table_size)
     225             : {
     226           0 :         if (spdk_unlikely(req_idx >= virtqueue->vring.size)) {
     227           0 :                 return -1;
     228             :         }
     229             : 
     230           0 :         *desc = &virtqueue->vring.desc[req_idx];
     231             : 
     232           0 :         if (vhost_vring_desc_is_indirect(*desc)) {
     233           0 :                 *desc_table_size = (*desc)->len / sizeof(**desc);
     234           0 :                 *desc_table = vhost_gpa_to_vva(vsession, (*desc)->addr,
     235           0 :                                                sizeof(**desc) * *desc_table_size);
     236           0 :                 *desc = *desc_table;
     237           0 :                 if (*desc == NULL) {
     238           0 :                         return -1;
     239             :                 }
     240             : 
     241           0 :                 return 0;
     242             :         }
     243             : 
     244           0 :         *desc_table = virtqueue->vring.desc;
     245           0 :         *desc_table_size = virtqueue->vring.size;
     246             : 
     247           0 :         return 0;
     248           0 : }
     249             : 
     250             : static bool
     251           0 : vhost_packed_desc_indirect_to_desc_table(struct spdk_vhost_session *vsession,
     252             :                 uint64_t addr, uint32_t len,
     253             :                 struct vring_packed_desc **desc_table,
     254             :                 uint32_t *desc_table_size)
     255             : {
     256           0 :         *desc_table_size = len / sizeof(struct vring_packed_desc);
     257             : 
     258           0 :         *desc_table = vhost_gpa_to_vva(vsession, addr, len);
     259           0 :         if (spdk_unlikely(*desc_table == NULL)) {
     260           0 :                 return false;
     261             :         }
     262             : 
     263           0 :         return true;
     264           0 : }
     265             : 
     266             : int
     267           0 : vhost_vq_get_desc_packed(struct spdk_vhost_session *vsession,
     268             :                          struct spdk_vhost_virtqueue *virtqueue,
     269             :                          uint16_t req_idx, struct vring_packed_desc **desc,
     270             :                          struct vring_packed_desc **desc_table, uint32_t *desc_table_size)
     271             : {
     272           0 :         *desc =  &virtqueue->vring.desc_packed[req_idx];
     273             : 
     274             :         /* In packed ring when the desc is non-indirect we get next desc
     275             :          * by judging (desc->flag & VRING_DESC_F_NEXT) != 0. When the desc
     276             :          * is indirect we get next desc by idx and desc_table_size. It's
     277             :          * different from split ring.
     278             :          */
     279           0 :         if (vhost_vring_packed_desc_is_indirect(*desc)) {
     280           0 :                 if (!vhost_packed_desc_indirect_to_desc_table(vsession, (*desc)->addr, (*desc)->len,
     281           0 :                                 desc_table, desc_table_size)) {
     282           0 :                         return -1;
     283             :                 }
     284             : 
     285           0 :                 *desc = *desc_table;
     286           0 :         } else {
     287           0 :                 *desc_table = NULL;
     288           0 :                 *desc_table_size  = 0;
     289             :         }
     290             : 
     291           0 :         return 0;
     292           0 : }
     293             : 
     294             : int
     295           0 : vhost_inflight_queue_get_desc(struct spdk_vhost_session *vsession,
     296             :                               spdk_vhost_inflight_desc *desc_array,
     297             :                               uint16_t req_idx, spdk_vhost_inflight_desc **desc,
     298             :                               struct vring_packed_desc  **desc_table, uint32_t *desc_table_size)
     299             : {
     300           0 :         *desc = &desc_array[req_idx];
     301             : 
     302           0 :         if (vhost_inflight_packed_desc_is_indirect(*desc)) {
     303           0 :                 if (!vhost_packed_desc_indirect_to_desc_table(vsession, (*desc)->addr, (*desc)->len,
     304           0 :                                 desc_table, desc_table_size)) {
     305           0 :                         return -1;
     306             :                 }
     307             : 
     308             :                 /* This desc is the inflight desc not the packed desc.
     309             :                  * When set the F_INDIRECT the table entry should be the packed desc
     310             :                  * so set the inflight desc NULL.
     311             :                  */
     312           0 :                 *desc = NULL;
     313           0 :         } else {
     314             :                 /* When not set the F_INDIRECT means there is no packed desc table */
     315           0 :                 *desc_table = NULL;
     316           0 :                 *desc_table_size = 0;
     317             :         }
     318             : 
     319           0 :         return 0;
     320           0 : }
     321             : 
     322             : int
     323           0 : vhost_vq_used_signal(struct spdk_vhost_session *vsession,
     324             :                      struct spdk_vhost_virtqueue *virtqueue)
     325             : {
     326           0 :         if (virtqueue->used_req_cnt == 0) {
     327           0 :                 return 0;
     328             :         }
     329             : 
     330           0 :         SPDK_DEBUGLOG(vhost_ring,
     331             :                       "Queue %td - USED RING: sending IRQ: last used %"PRIu16"\n",
     332             :                       virtqueue - vsession->virtqueue, virtqueue->last_used_idx);
     333             : 
     334             : #if RTE_VERSION < RTE_VERSION_NUM(22, 11, 0, 0)
     335             :         if (rte_vhost_vring_call(vsession->vid, virtqueue->vring_idx) == 0) {
     336             : #else
     337           0 :         if (rte_vhost_vring_call_nonblock(vsession->vid, virtqueue->vring_idx) == 0) {
     338             : #endif
     339             :                 /* interrupt signalled */
     340           0 :                 virtqueue->req_cnt += virtqueue->used_req_cnt;
     341           0 :                 virtqueue->used_req_cnt = 0;
     342           0 :                 return 1;
     343             :         } else {
     344             :                 /* interrupt not signalled */
     345           0 :                 return 0;
     346             :         }
     347           0 : }
     348             : 
     349             : static void
     350           0 : session_vq_io_stats_update(struct spdk_vhost_session *vsession,
     351             :                            struct spdk_vhost_virtqueue *virtqueue, uint64_t now)
     352             : {
     353           0 :         uint32_t irq_delay_base = vsession->coalescing_delay_time_base;
     354           0 :         uint32_t io_threshold = vsession->coalescing_io_rate_threshold;
     355           0 :         int32_t irq_delay;
     356           0 :         uint32_t req_cnt;
     357             : 
     358           0 :         req_cnt = virtqueue->req_cnt + virtqueue->used_req_cnt;
     359           0 :         if (req_cnt <= io_threshold) {
     360           0 :                 return;
     361             :         }
     362             : 
     363           0 :         irq_delay = (irq_delay_base * (req_cnt - io_threshold)) / io_threshold;
     364           0 :         virtqueue->irq_delay_time = (uint32_t) spdk_max(0, irq_delay);
     365             : 
     366           0 :         virtqueue->req_cnt = 0;
     367           0 :         virtqueue->next_event_time = now;
     368           0 : }
     369             : 
     370             : static void
     371           0 : check_session_vq_io_stats(struct spdk_vhost_session *vsession,
     372             :                           struct spdk_vhost_virtqueue *virtqueue, uint64_t now)
     373             : {
     374           0 :         if (now < vsession->next_stats_check_time) {
     375           0 :                 return;
     376             :         }
     377             : 
     378           0 :         vsession->next_stats_check_time = now + vsession->stats_check_interval;
     379           0 :         session_vq_io_stats_update(vsession, virtqueue, now);
     380           0 : }
     381             : 
     382             : static inline bool
     383           0 : vhost_vq_event_is_suppressed(struct spdk_vhost_virtqueue *vq)
     384             : {
     385           0 :         spdk_smp_mb();
     386             : 
     387           0 :         if (spdk_unlikely(vq->packed.packed_ring)) {
     388           0 :                 if (vq->vring.driver_event->flags & VRING_PACKED_EVENT_FLAG_DISABLE) {
     389           0 :                         return true;
     390             :                 }
     391           0 :         } else {
     392           0 :                 if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) {
     393           0 :                         return true;
     394             :                 }
     395             :         }
     396             : 
     397           0 :         return false;
     398           0 : }
     399             : 
     400             : void
     401           0 : vhost_session_vq_used_signal(struct spdk_vhost_virtqueue *virtqueue)
     402             : {
     403           0 :         struct spdk_vhost_session *vsession = virtqueue->vsession;
     404           0 :         uint64_t now;
     405             : 
     406           0 :         if (vsession->coalescing_delay_time_base == 0) {
     407           0 :                 if (virtqueue->vring.desc == NULL) {
     408           0 :                         return;
     409             :                 }
     410             : 
     411           0 :                 if (vhost_vq_event_is_suppressed(virtqueue)) {
     412           0 :                         return;
     413             :                 }
     414             : 
     415           0 :                 vhost_vq_used_signal(vsession, virtqueue);
     416           0 :         } else {
     417           0 :                 now = spdk_get_ticks();
     418           0 :                 check_session_vq_io_stats(vsession, virtqueue, now);
     419             : 
     420             :                 /* No need for event right now */
     421           0 :                 if (now < virtqueue->next_event_time) {
     422           0 :                         return;
     423             :                 }
     424             : 
     425           0 :                 if (vhost_vq_event_is_suppressed(virtqueue)) {
     426           0 :                         return;
     427             :                 }
     428             : 
     429           0 :                 if (!vhost_vq_used_signal(vsession, virtqueue)) {
     430           0 :                         return;
     431             :                 }
     432             : 
     433             :                 /* Syscall is quite long so update time */
     434           0 :                 now = spdk_get_ticks();
     435           0 :                 virtqueue->next_event_time = now + virtqueue->irq_delay_time;
     436             :         }
     437           0 : }
     438             : 
     439             : /*
     440             :  * Enqueue id and len to used ring.
     441             :  */
     442             : void
     443           0 : vhost_vq_used_ring_enqueue(struct spdk_vhost_session *vsession,
     444             :                            struct spdk_vhost_virtqueue *virtqueue,
     445             :                            uint16_t id, uint32_t len)
     446             : {
     447           0 :         struct rte_vhost_vring *vring = &virtqueue->vring;
     448           0 :         struct vring_used *used = vring->used;
     449           0 :         uint16_t last_idx = virtqueue->last_used_idx & (vring->size - 1);
     450           0 :         uint16_t vq_idx = virtqueue->vring_idx;
     451             : 
     452           0 :         SPDK_DEBUGLOG(vhost_ring,
     453             :                       "Queue %td - USED RING: last_idx=%"PRIu16" req id=%"PRIu16" len=%"PRIu32"\n",
     454             :                       virtqueue - vsession->virtqueue, virtqueue->last_used_idx, id, len);
     455             : 
     456           0 :         vhost_log_req_desc(vsession, virtqueue, id);
     457             : 
     458           0 :         virtqueue->last_used_idx++;
     459           0 :         used->ring[last_idx].id = id;
     460           0 :         used->ring[last_idx].len = len;
     461             : 
     462             :         /* Ensure the used ring is updated before we log it or increment used->idx. */
     463           0 :         spdk_smp_wmb();
     464             : 
     465           0 :         rte_vhost_set_last_inflight_io_split(vsession->vid, vq_idx, id);
     466             : 
     467           0 :         vhost_log_used_vring_elem(vsession, virtqueue, last_idx);
     468           0 :         * (volatile uint16_t *) &used->idx = virtqueue->last_used_idx;
     469           0 :         vhost_log_used_vring_idx(vsession, virtqueue);
     470             : 
     471           0 :         rte_vhost_clr_inflight_desc_split(vsession->vid, vq_idx, virtqueue->last_used_idx, id);
     472             : 
     473           0 :         virtqueue->used_req_cnt++;
     474             : 
     475           0 :         if (spdk_unlikely(spdk_interrupt_mode_is_enabled())) {
     476           0 :                 if (virtqueue->vring.desc == NULL || vhost_vq_event_is_suppressed(virtqueue)) {
     477           0 :                         return;
     478             :                 }
     479             : 
     480           0 :                 vhost_vq_used_signal(vsession, virtqueue);
     481           0 :         }
     482           0 : }
     483             : 
     484             : void
     485           7 : vhost_vq_packed_ring_enqueue(struct spdk_vhost_session *vsession,
     486             :                              struct spdk_vhost_virtqueue *virtqueue,
     487             :                              uint16_t num_descs, uint16_t buffer_id,
     488             :                              uint32_t length, uint16_t inflight_head)
     489             : {
     490           7 :         struct vring_packed_desc *desc = &virtqueue->vring.desc_packed[virtqueue->last_used_idx];
     491           7 :         bool used, avail;
     492             : 
     493           7 :         SPDK_DEBUGLOG(vhost_ring,
     494             :                       "Queue %td - RING: buffer_id=%"PRIu16"\n",
     495             :                       virtqueue - vsession->virtqueue, buffer_id);
     496             : 
     497             :         /* When the descriptor is used, two flags in descriptor
     498             :          * avail flag and used flag are set to equal
     499             :          * and used flag value == used_wrap_counter.
     500             :          */
     501           7 :         used = !!(desc->flags & VRING_DESC_F_USED);
     502           7 :         avail = !!(desc->flags & VRING_DESC_F_AVAIL);
     503           7 :         if (spdk_unlikely(used == virtqueue->packed.used_phase && used == avail)) {
     504           0 :                 SPDK_ERRLOG("descriptor has been used before\n");
     505           0 :                 return;
     506             :         }
     507             : 
     508             :         /* In used desc addr is unused and len specifies the buffer length
     509             :          * that has been written to by the device.
     510             :          */
     511           7 :         desc->addr = 0;
     512           7 :         desc->len = length;
     513             : 
     514             :         /* This bit specifies whether any data has been written by the device */
     515           7 :         if (length != 0) {
     516           7 :                 desc->flags |= VRING_DESC_F_WRITE;
     517           7 :         }
     518             : 
     519             :         /* Buffer ID is included in the last descriptor in the list.
     520             :          * The driver needs to keep track of the size of the list corresponding
     521             :          * to each buffer ID.
     522             :          */
     523           7 :         desc->id = buffer_id;
     524             : 
     525             :         /* A device MUST NOT make the descriptor used before buffer_id is
     526             :          * written to the descriptor.
     527             :          */
     528           7 :         spdk_smp_wmb();
     529             : 
     530           7 :         rte_vhost_set_last_inflight_io_packed(vsession->vid, virtqueue->vring_idx, inflight_head);
     531             :         /* To mark a desc as used, the device sets the F_USED bit in flags to match
     532             :          * the internal Device ring wrap counter. It also sets the F_AVAIL bit to
     533             :          * match the same value.
     534             :          */
     535           7 :         if (virtqueue->packed.used_phase) {
     536           4 :                 desc->flags |= VRING_DESC_F_AVAIL_USED;
     537           4 :         } else {
     538           3 :                 desc->flags &= ~VRING_DESC_F_AVAIL_USED;
     539             :         }
     540           7 :         rte_vhost_clr_inflight_desc_packed(vsession->vid, virtqueue->vring_idx, inflight_head);
     541             : 
     542           7 :         vhost_log_used_vring_elem(vsession, virtqueue, virtqueue->last_used_idx);
     543           7 :         virtqueue->last_used_idx += num_descs;
     544           7 :         if (virtqueue->last_used_idx >= virtqueue->vring.size) {
     545           1 :                 virtqueue->last_used_idx -= virtqueue->vring.size;
     546           1 :                 virtqueue->packed.used_phase = !virtqueue->packed.used_phase;
     547           1 :         }
     548             : 
     549           7 :         virtqueue->used_req_cnt++;
     550           7 : }
     551             : 
     552             : bool
     553          12 : vhost_vq_packed_ring_is_avail(struct spdk_vhost_virtqueue *virtqueue)
     554             : {
     555          12 :         uint16_t flags = virtqueue->vring.desc_packed[virtqueue->last_avail_idx].flags;
     556             : 
     557             :         /* To mark a desc as available, the driver sets the F_AVAIL bit in flags
     558             :          * to match the internal avail wrap counter. It also sets the F_USED bit to
     559             :          * match the inverse value but it's not mandatory.
     560             :          */
     561          12 :         return (!!(flags & VRING_DESC_F_AVAIL) == virtqueue->packed.avail_phase);
     562          12 : }
     563             : 
     564             : bool
     565           0 : vhost_vring_packed_desc_is_wr(struct vring_packed_desc *cur_desc)
     566             : {
     567           0 :         return (cur_desc->flags & VRING_DESC_F_WRITE) != 0;
     568             : }
     569             : 
     570             : bool
     571           0 : vhost_vring_inflight_desc_is_wr(spdk_vhost_inflight_desc *cur_desc)
     572             : {
     573           0 :         return (cur_desc->flags & VRING_DESC_F_WRITE) != 0;
     574             : }
     575             : 
     576             : int
     577           0 : vhost_vring_packed_desc_get_next(struct vring_packed_desc **desc, uint16_t *req_idx,
     578             :                                  struct spdk_vhost_virtqueue *vq,
     579             :                                  struct vring_packed_desc *desc_table,
     580             :                                  uint32_t desc_table_size)
     581             : {
     582           0 :         if (desc_table != NULL) {
     583             :                 /* When the desc_table isn't NULL means it's indirect and we get the next
     584             :                  * desc by req_idx and desc_table_size. The return value is NULL means
     585             :                  * we reach the last desc of this request.
     586             :                  */
     587           0 :                 (*req_idx)++;
     588           0 :                 if (*req_idx < desc_table_size) {
     589           0 :                         *desc = &desc_table[*req_idx];
     590           0 :                 } else {
     591           0 :                         *desc = NULL;
     592             :                 }
     593           0 :         } else {
     594             :                 /* When the desc_table is NULL means it's non-indirect and we get the next
     595             :                  * desc by req_idx and F_NEXT in flags. The return value is NULL means
     596             :                  * we reach the last desc of this request. When return new desc
     597             :                  * we update the req_idx too.
     598             :                  */
     599           0 :                 if (((*desc)->flags & VRING_DESC_F_NEXT) == 0) {
     600           0 :                         *desc = NULL;
     601           0 :                         return 0;
     602             :                 }
     603             : 
     604           0 :                 *req_idx = (*req_idx + 1) % vq->vring.size;
     605           0 :                 *desc = &vq->vring.desc_packed[*req_idx];
     606             :         }
     607             : 
     608           0 :         return 0;
     609           0 : }
     610             : 
     611             : static int
     612           6 : vhost_vring_desc_payload_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov,
     613             :                                 uint16_t *iov_index, uintptr_t payload, uint64_t remaining)
     614             : {
     615           6 :         uintptr_t vva;
     616           6 :         uint64_t len;
     617             : 
     618           6 :         do {
     619           7 :                 if (*iov_index >= SPDK_VHOST_IOVS_MAX) {
     620           1 :                         SPDK_ERRLOG("SPDK_VHOST_IOVS_MAX(%d) reached\n", SPDK_VHOST_IOVS_MAX);
     621           1 :                         return -1;
     622             :                 }
     623           6 :                 len = remaining;
     624           6 :                 vva = (uintptr_t)rte_vhost_va_from_guest_pa(vsession->mem, payload, &len);
     625           6 :                 if (vva == 0 || len == 0) {
     626           0 :                         SPDK_ERRLOG("gpa_to_vva(%p) == NULL\n", (void *)payload);
     627           0 :                         return -1;
     628             :                 }
     629           6 :                 iov[*iov_index].iov_base = (void *)vva;
     630           6 :                 iov[*iov_index].iov_len = len;
     631           6 :                 remaining -= len;
     632           6 :                 payload += len;
     633           6 :                 (*iov_index)++;
     634           6 :         } while (remaining);
     635             : 
     636           5 :         return 0;
     637           6 : }
     638             : 
     639             : int
     640           0 : vhost_vring_packed_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov,
     641             :                                uint16_t *iov_index, const struct vring_packed_desc *desc)
     642             : {
     643           0 :         return vhost_vring_desc_payload_to_iov(vsession, iov, iov_index,
     644           0 :                                                desc->addr, desc->len);
     645             : }
     646             : 
     647             : int
     648           0 : vhost_vring_inflight_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov,
     649             :                                  uint16_t *iov_index, const spdk_vhost_inflight_desc *desc)
     650             : {
     651           0 :         return vhost_vring_desc_payload_to_iov(vsession, iov, iov_index,
     652           0 :                                                desc->addr, desc->len);
     653             : }
     654             : 
     655             : /* 1, Traverse the desc chain to get the buffer_id and return buffer_id as task_idx.
     656             :  * 2, Update the vq->last_avail_idx to point next available desc chain.
     657             :  * 3, Update the avail_wrap_counter if last_avail_idx overturn.
     658             :  */
     659             : uint16_t
     660           7 : vhost_vring_packed_desc_get_buffer_id(struct spdk_vhost_virtqueue *vq, uint16_t req_idx,
     661             :                                       uint16_t *num_descs)
     662             : {
     663           7 :         struct vring_packed_desc *desc;
     664           7 :         uint16_t desc_head = req_idx;
     665             : 
     666           7 :         *num_descs = 1;
     667             : 
     668           7 :         desc =  &vq->vring.desc_packed[req_idx];
     669           7 :         if (!vhost_vring_packed_desc_is_indirect(desc)) {
     670           7 :                 while ((desc->flags & VRING_DESC_F_NEXT) != 0) {
     671           0 :                         req_idx = (req_idx + 1) % vq->vring.size;
     672           0 :                         desc = &vq->vring.desc_packed[req_idx];
     673           0 :                         (*num_descs)++;
     674             :                 }
     675           7 :         }
     676             : 
     677             :         /* Queue Size doesn't have to be a power of 2
     678             :          * Device maintains last_avail_idx so we can make sure
     679             :          * the value is valid(0 ~ vring.size - 1)
     680             :          */
     681           7 :         vq->last_avail_idx = (req_idx + 1) % vq->vring.size;
     682           7 :         if (vq->last_avail_idx < desc_head) {
     683           1 :                 vq->packed.avail_phase = !vq->packed.avail_phase;
     684           1 :         }
     685             : 
     686           7 :         return desc->id;
     687           7 : }
     688             : 
     689             : int
     690           0 : vhost_vring_desc_get_next(struct vring_desc **desc,
     691             :                           struct vring_desc *desc_table, uint32_t desc_table_size)
     692             : {
     693           0 :         struct vring_desc *old_desc = *desc;
     694           0 :         uint16_t next_idx;
     695             : 
     696           0 :         if ((old_desc->flags & VRING_DESC_F_NEXT) == 0) {
     697           0 :                 *desc = NULL;
     698           0 :                 return 0;
     699             :         }
     700             : 
     701           0 :         next_idx = old_desc->next;
     702           0 :         if (spdk_unlikely(next_idx >= desc_table_size)) {
     703           0 :                 *desc = NULL;
     704           0 :                 return -1;
     705             :         }
     706             : 
     707           0 :         *desc = &desc_table[next_idx];
     708           0 :         return 0;
     709           0 : }
     710             : 
     711             : int
     712           6 : vhost_vring_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov,
     713             :                         uint16_t *iov_index, const struct vring_desc *desc)
     714             : {
     715          12 :         return vhost_vring_desc_payload_to_iov(vsession, iov, iov_index,
     716           6 :                                                desc->addr, desc->len);
     717             : }
     718             : 
     719             : static inline void
     720           0 : vhost_session_mem_region_calc(uint64_t *previous_start, uint64_t *start, uint64_t *end,
     721             :                               uint64_t *len, struct rte_vhost_mem_region *region)
     722             : {
     723           0 :         *start = FLOOR_2MB(region->mmap_addr);
     724           0 :         *end = CEIL_2MB(region->mmap_addr + region->mmap_size);
     725           0 :         if (*start == *previous_start) {
     726           0 :                 *start += (size_t) VALUE_2MB;
     727           0 :         }
     728           0 :         *previous_start = *start;
     729           0 :         *len = *end - *start;
     730           0 : }
     731             : 
     732             : void
     733           0 : vhost_session_mem_register(struct rte_vhost_memory *mem)
     734             : {
     735           0 :         uint64_t start, end, len;
     736           0 :         uint32_t i;
     737           0 :         uint64_t previous_start = UINT64_MAX;
     738             : 
     739             : 
     740           0 :         for (i = 0; i < mem->nregions; i++) {
     741           0 :                 vhost_session_mem_region_calc(&previous_start, &start, &end, &len, &mem->regions[i]);
     742           0 :                 SPDK_INFOLOG(vhost, "Registering VM memory for vtophys translation - 0x%jx len:0x%jx\n",
     743             :                              start, len);
     744             : 
     745           0 :                 if (spdk_mem_register((void *)start, len) != 0) {
     746           0 :                         SPDK_WARNLOG("Failed to register memory region %"PRIu32". Future vtophys translation might fail.\n",
     747             :                                      i);
     748           0 :                         continue;
     749             :                 }
     750           0 :         }
     751           0 : }
     752             : 
     753             : void
     754           0 : vhost_session_mem_unregister(struct rte_vhost_memory *mem)
     755             : {
     756           0 :         uint64_t start, end, len;
     757           0 :         uint32_t i;
     758           0 :         uint64_t previous_start = UINT64_MAX;
     759             : 
     760           0 :         for (i = 0; i < mem->nregions; i++) {
     761           0 :                 vhost_session_mem_region_calc(&previous_start, &start, &end, &len, &mem->regions[i]);
     762           0 :                 if (spdk_vtophys((void *) start, NULL) == SPDK_VTOPHYS_ERROR) {
     763           0 :                         continue; /* region has not been registered */
     764             :                 }
     765             : 
     766           0 :                 if (spdk_mem_unregister((void *)start, len) != 0) {
     767           0 :                         assert(false);
     768             :                 }
     769           0 :         }
     770           0 : }
     771             : 
     772             : static bool
     773           0 : vhost_memory_changed(struct rte_vhost_memory *new,
     774             :                      struct rte_vhost_memory *old)
     775             : {
     776           0 :         uint32_t i;
     777             : 
     778           0 :         if (new->nregions != old->nregions) {
     779           0 :                 return true;
     780             :         }
     781             : 
     782           0 :         for (i = 0; i < new->nregions; ++i) {
     783           0 :                 struct rte_vhost_mem_region *new_r = &new->regions[i];
     784           0 :                 struct rte_vhost_mem_region *old_r = &old->regions[i];
     785             : 
     786           0 :                 if (new_r->guest_phys_addr != old_r->guest_phys_addr) {
     787           0 :                         return true;
     788             :                 }
     789           0 :                 if (new_r->size != old_r->size) {
     790           0 :                         return true;
     791             :                 }
     792           0 :                 if (new_r->guest_user_addr != old_r->guest_user_addr) {
     793           0 :                         return true;
     794             :                 }
     795           0 :                 if (new_r->mmap_addr != old_r->mmap_addr) {
     796           0 :                         return true;
     797             :                 }
     798           0 :                 if (new_r->fd != old_r->fd) {
     799           0 :                         return true;
     800             :                 }
     801           0 :         }
     802             : 
     803           0 :         return false;
     804           0 : }
     805             : 
     806             : static int
     807           0 : vhost_register_memtable_if_required(struct spdk_vhost_session *vsession, int vid)
     808             : {
     809           0 :         struct rte_vhost_memory *new_mem;
     810             : 
     811           0 :         if (vhost_get_mem_table(vid, &new_mem) != 0) {
     812           0 :                 SPDK_ERRLOG("vhost device %d: Failed to get guest memory table\n", vid);
     813           0 :                 return -1;
     814             :         }
     815             : 
     816           0 :         if (vsession->mem == NULL) {
     817           0 :                 SPDK_INFOLOG(vhost, "Start to set memtable\n");
     818           0 :                 vsession->mem = new_mem;
     819           0 :                 vhost_session_mem_register(vsession->mem);
     820           0 :                 return 0;
     821             :         }
     822             : 
     823           0 :         if (vhost_memory_changed(new_mem, vsession->mem)) {
     824           0 :                 SPDK_INFOLOG(vhost, "Memtable is changed\n");
     825           0 :                 vhost_session_mem_unregister(vsession->mem);
     826           0 :                 free(vsession->mem);
     827             : 
     828           0 :                 vsession->mem = new_mem;
     829           0 :                 vhost_session_mem_register(vsession->mem);
     830           0 :                 return 0;
     831             : 
     832             :         }
     833             : 
     834           0 :         SPDK_INFOLOG(vhost, "Memtable is unchanged\n");
     835           0 :         free(new_mem);
     836           0 :         return 0;
     837           0 : }
     838             : 
     839             : static int
     840           0 : _stop_session(struct spdk_vhost_session *vsession)
     841             : {
     842           0 :         struct spdk_vhost_virtqueue *q;
     843           0 :         int rc;
     844           0 :         uint16_t i;
     845             : 
     846           0 :         rc = vhost_user_wait_for_session_stop(vsession, SPDK_VHOST_SESSION_STOP_TIMEOUT_IN_SEC,
     847             :                                               "stop session");
     848           0 :         if (rc != 0) {
     849           0 :                 SPDK_ERRLOG("Couldn't stop device with vid %d.\n", vsession->vid);
     850           0 :                 return rc;
     851             :         }
     852             : 
     853           0 :         for (i = 0; i < vsession->max_queues; i++) {
     854           0 :                 q = &vsession->virtqueue[i];
     855             : 
     856             :                 /* vring.desc and vring.desc_packed are in a union struct
     857             :                  * so q->vring.desc can replace q->vring.desc_packed.
     858             :                  */
     859           0 :                 if (q->vring.desc == NULL) {
     860           0 :                         continue;
     861             :                 }
     862             : 
     863             :                 /* Packed virtqueues support up to 2^15 entries each
     864             :                  * so left one bit can be used as wrap counter.
     865             :                  */
     866           0 :                 if (q->packed.packed_ring) {
     867           0 :                         q->last_avail_idx = q->last_avail_idx |
     868           0 :                                             ((uint16_t)q->packed.avail_phase << 15);
     869           0 :                         q->last_used_idx = q->last_used_idx |
     870           0 :                                            ((uint16_t)q->packed.used_phase << 15);
     871           0 :                 }
     872             : 
     873           0 :                 rte_vhost_set_vring_base(vsession->vid, i, q->last_avail_idx, q->last_used_idx);
     874           0 :                 q->vring.desc = NULL;
     875           0 :         }
     876           0 :         vsession->max_queues = 0;
     877             : 
     878           0 :         return 0;
     879           0 : }
     880             : 
     881             : static int
     882           0 : new_connection(int vid)
     883             : {
     884           0 :         struct spdk_vhost_dev *vdev;
     885           0 :         struct spdk_vhost_user_dev *user_dev;
     886           0 :         struct spdk_vhost_session *vsession;
     887           0 :         size_t dev_dirname_len;
     888           0 :         char ifname[PATH_MAX];
     889           0 :         char *ctrlr_name;
     890             : 
     891           0 :         if (rte_vhost_get_ifname(vid, ifname, PATH_MAX) < 0) {
     892           0 :                 SPDK_ERRLOG("Couldn't get a valid ifname for device with vid %d\n", vid);
     893           0 :                 return -1;
     894             :         }
     895             : 
     896           0 :         ctrlr_name = &ifname[0];
     897           0 :         dev_dirname_len = strlen(g_vhost_user_dev_dirname);
     898           0 :         if (strncmp(ctrlr_name, g_vhost_user_dev_dirname, dev_dirname_len) == 0) {
     899           0 :                 ctrlr_name += dev_dirname_len;
     900           0 :         }
     901             : 
     902           0 :         spdk_vhost_lock();
     903           0 :         vdev = spdk_vhost_dev_find(ctrlr_name);
     904           0 :         if (vdev == NULL) {
     905           0 :                 SPDK_ERRLOG("Couldn't find device with vid %d to create connection for.\n", vid);
     906           0 :                 spdk_vhost_unlock();
     907           0 :                 return -1;
     908             :         }
     909           0 :         spdk_vhost_unlock();
     910             : 
     911           0 :         user_dev = to_user_dev(vdev);
     912           0 :         pthread_mutex_lock(&user_dev->lock);
     913           0 :         if (user_dev->registered == false) {
     914           0 :                 SPDK_ERRLOG("Device %s is unregistered\n", ctrlr_name);
     915           0 :                 pthread_mutex_unlock(&user_dev->lock);
     916           0 :                 return -1;
     917             :         }
     918             : 
     919             :         /* We expect sessions inside user_dev->vsessions to be sorted in ascending
     920             :          * order in regard of vsession->id. For now we always set id = vsessions_num++
     921             :          * and append each session to the very end of the vsessions list.
     922             :          * This is required for vhost_user_dev_foreach_session() to work.
     923             :          */
     924           0 :         if (user_dev->vsessions_num == UINT_MAX) {
     925           0 :                 pthread_mutex_unlock(&user_dev->lock);
     926           0 :                 assert(false);
     927             :                 return -EINVAL;
     928             :         }
     929             : 
     930           0 :         if (posix_memalign((void **)&vsession, SPDK_CACHE_LINE_SIZE, sizeof(*vsession) +
     931           0 :                            user_dev->user_backend->session_ctx_size)) {
     932           0 :                 SPDK_ERRLOG("vsession alloc failed\n");
     933           0 :                 pthread_mutex_unlock(&user_dev->lock);
     934           0 :                 return -1;
     935             :         }
     936           0 :         memset(vsession, 0, sizeof(*vsession) + user_dev->user_backend->session_ctx_size);
     937             : 
     938           0 :         vsession->vdev = vdev;
     939           0 :         vsession->vid = vid;
     940           0 :         vsession->id = user_dev->vsessions_num++;
     941           0 :         vsession->name = spdk_sprintf_alloc("%ss%u", vdev->name, vsession->vid);
     942           0 :         if (vsession->name == NULL) {
     943           0 :                 SPDK_ERRLOG("vsession alloc failed\n");
     944           0 :                 free(vsession);
     945           0 :                 pthread_mutex_unlock(&user_dev->lock);
     946           0 :                 return -1;
     947             :         }
     948             : 
     949           0 :         if (sem_init(&vsession->dpdk_sem, 0, 0) != 0) {
     950           0 :                 SPDK_ERRLOG("Failed to initialize semaphore for rte_vhost pthread.\n");
     951           0 :                 free(vsession->name);
     952           0 :                 free(vsession);
     953           0 :                 pthread_mutex_unlock(&user_dev->lock);
     954           0 :                 return -1;
     955             :         }
     956             : 
     957           0 :         vsession->started = false;
     958           0 :         vsession->starting = false;
     959           0 :         vsession->next_stats_check_time = 0;
     960           0 :         vsession->stats_check_interval = SPDK_VHOST_STATS_CHECK_INTERVAL_MS *
     961           0 :                                          spdk_get_ticks_hz() / 1000UL;
     962           0 :         TAILQ_INSERT_TAIL(&user_dev->vsessions, vsession, tailq);
     963           0 :         vhost_session_install_rte_compat_hooks(vsession);
     964           0 :         pthread_mutex_unlock(&user_dev->lock);
     965             : 
     966           0 :         return 0;
     967           0 : }
     968             : 
     969             : static void
     970           0 : vhost_user_session_start(void *arg1)
     971             : {
     972           0 :         struct spdk_vhost_session *vsession = arg1;
     973           0 :         struct spdk_vhost_dev *vdev = vsession->vdev;
     974           0 :         struct spdk_vhost_user_dev *user_dev = to_user_dev(vsession->vdev);
     975           0 :         const struct spdk_vhost_user_dev_backend *backend;
     976           0 :         int rc;
     977             : 
     978           0 :         SPDK_INFOLOG(vhost, "Starting new session for device %s with vid %d\n", vdev->name, vsession->vid);
     979           0 :         pthread_mutex_lock(&user_dev->lock);
     980           0 :         vsession->starting = false;
     981           0 :         backend = user_dev->user_backend;
     982           0 :         rc = backend->start_session(vdev, vsession, NULL);
     983           0 :         if (rc == 0) {
     984           0 :                 vsession->started = true;
     985           0 :         }
     986           0 :         pthread_mutex_unlock(&user_dev->lock);
     987           0 : }
     988             : 
     989             : static int
     990           0 : set_device_vq_callfd(struct spdk_vhost_session *vsession, uint16_t qid)
     991             : {
     992           0 :         struct spdk_vhost_virtqueue *q;
     993             : 
     994           0 :         if (qid >= SPDK_VHOST_MAX_VQUEUES) {
     995           0 :                 return -EINVAL;
     996             :         }
     997             : 
     998           0 :         q = &vsession->virtqueue[qid];
     999             :         /* vq isn't enabled yet */
    1000           0 :         if (q->vring_idx != qid) {
    1001           0 :                 return 0;
    1002             :         }
    1003             : 
    1004             :         /* vring.desc and vring.desc_packed are in a union struct
    1005             :          * so q->vring.desc can replace q->vring.desc_packed.
    1006             :          */
    1007           0 :         if (q->vring.desc == NULL || q->vring.size == 0) {
    1008           0 :                 return 0;
    1009             :         }
    1010             : 
    1011             :         /*
    1012             :          * Not sure right now but this look like some kind of QEMU bug and guest IO
    1013             :          * might be frozed without kicking all queues after live-migration. This look like
    1014             :          * the previous vhost instance failed to effectively deliver all interrupts before
    1015             :          * the GET_VRING_BASE message. This shouldn't harm guest since spurious interrupts
    1016             :          * should be ignored by guest virtio driver.
    1017             :          *
    1018             :          * Tested on QEMU 2.10.91 and 2.11.50.
    1019             :          *
    1020             :          * Make sure a successful call of
    1021             :          * `rte_vhost_vring_call` will happen
    1022             :          * after starting the device.
    1023             :          */
    1024           0 :         q->used_req_cnt += 1;
    1025             : 
    1026           0 :         return 0;
    1027           0 : }
    1028             : 
    1029             : static int
    1030           0 : enable_device_vq(struct spdk_vhost_session *vsession, uint16_t qid)
    1031             : {
    1032           0 :         struct spdk_vhost_virtqueue *q;
    1033           0 :         bool packed_ring;
    1034           0 :         const struct spdk_vhost_user_dev_backend *backend;
    1035           0 :         int rc;
    1036             : 
    1037           0 :         if (qid >= SPDK_VHOST_MAX_VQUEUES) {
    1038           0 :                 return -EINVAL;
    1039             :         }
    1040             : 
    1041           0 :         q = &vsession->virtqueue[qid];
    1042           0 :         memset(q, 0, sizeof(*q));
    1043           0 :         packed_ring = ((vsession->negotiated_features & (1ULL << VIRTIO_F_RING_PACKED)) != 0);
    1044             : 
    1045           0 :         q->vsession = vsession;
    1046           0 :         q->vring_idx = -1;
    1047           0 :         if (rte_vhost_get_vhost_vring(vsession->vid, qid, &q->vring)) {
    1048           0 :                 return 0;
    1049             :         }
    1050           0 :         q->vring_idx = qid;
    1051           0 :         rte_vhost_get_vhost_ring_inflight(vsession->vid, qid, &q->vring_inflight);
    1052             : 
    1053             :         /* vring.desc and vring.desc_packed are in a union struct
    1054             :          * so q->vring.desc can replace q->vring.desc_packed.
    1055             :          */
    1056           0 :         if (q->vring.desc == NULL || q->vring.size == 0) {
    1057           0 :                 return 0;
    1058             :         }
    1059             : 
    1060           0 :         if (rte_vhost_get_vring_base(vsession->vid, qid, &q->last_avail_idx, &q->last_used_idx)) {
    1061           0 :                 q->vring.desc = NULL;
    1062           0 :                 return 0;
    1063             :         }
    1064             : 
    1065           0 :         backend = to_user_dev(vsession->vdev)->user_backend;
    1066           0 :         rc = backend->alloc_vq_tasks(vsession, qid);
    1067           0 :         if (rc) {
    1068           0 :                 return rc;
    1069             :         }
    1070             : 
    1071             :         /*
    1072             :          * This shouldn't harm guest since spurious interrupts should be ignored by
    1073             :          * guest virtio driver.
    1074             :          *
    1075             :          * Make sure a successful call of `rte_vhost_vring_call` will happen after
    1076             :          * restarting the device.
    1077             :          */
    1078           0 :         if (vsession->needs_restart) {
    1079           0 :                 q->used_req_cnt += 1;
    1080           0 :         }
    1081             : 
    1082           0 :         if (packed_ring) {
    1083             :                 /* Since packed ring flag is already negotiated between SPDK and VM, VM doesn't
    1084             :                  * restore `last_avail_idx` and `last_used_idx` for packed ring, so use the
    1085             :                  * inflight mem to restore the `last_avail_idx` and `last_used_idx`.
    1086             :                  */
    1087           0 :                 rte_vhost_get_vring_base_from_inflight(vsession->vid, qid, &q->last_avail_idx,
    1088           0 :                                                        &q->last_used_idx);
    1089             : 
    1090             :                 /* Packed virtqueues support up to 2^15 entries each
    1091             :                  * so left one bit can be used as wrap counter.
    1092             :                  */
    1093           0 :                 q->packed.avail_phase = q->last_avail_idx >> 15;
    1094           0 :                 q->last_avail_idx = q->last_avail_idx & 0x7FFF;
    1095           0 :                 q->packed.used_phase = q->last_used_idx >> 15;
    1096           0 :                 q->last_used_idx = q->last_used_idx & 0x7FFF;
    1097             : 
    1098           0 :                 if (!spdk_interrupt_mode_is_enabled()) {
    1099             :                         /* Disable I/O submission notifications, we'll be polling. */
    1100           0 :                         q->vring.device_event->flags = VRING_PACKED_EVENT_FLAG_DISABLE;
    1101           0 :                 } else {
    1102             :                         /* Enable I/O submission notifications, we'll be interrupting. */
    1103           0 :                         q->vring.device_event->flags = VRING_PACKED_EVENT_FLAG_ENABLE;
    1104             :                 }
    1105           0 :         } else {
    1106           0 :                 if (!spdk_interrupt_mode_is_enabled()) {
    1107             :                         /* Disable I/O submission notifications, we'll be polling. */
    1108           0 :                         q->vring.used->flags = VRING_USED_F_NO_NOTIFY;
    1109           0 :                 } else {
    1110             :                         /* Enable I/O submission notifications, we'll be interrupting. */
    1111           0 :                         q->vring.used->flags = 0;
    1112             :                 }
    1113             :         }
    1114             : 
    1115           0 :         if (backend->enable_vq) {
    1116           0 :                 rc = backend->enable_vq(vsession, q);
    1117           0 :                 if (rc) {
    1118           0 :                         return rc;
    1119             :                 }
    1120           0 :         }
    1121             : 
    1122           0 :         q->packed.packed_ring = packed_ring;
    1123           0 :         vsession->max_queues = spdk_max(vsession->max_queues, qid + 1);
    1124             : 
    1125           0 :         return 0;
    1126           0 : }
    1127             : 
    1128             : static int
    1129           0 : start_device(int vid)
    1130             : {
    1131           0 :         struct spdk_vhost_dev *vdev;
    1132           0 :         struct spdk_vhost_session *vsession;
    1133           0 :         struct spdk_vhost_user_dev *user_dev;
    1134           0 :         int rc = 0;
    1135             : 
    1136           0 :         vsession = vhost_session_find_by_vid(vid);
    1137           0 :         if (vsession == NULL) {
    1138           0 :                 SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid);
    1139           0 :                 return -1;
    1140             :         }
    1141           0 :         vdev = vsession->vdev;
    1142           0 :         user_dev = to_user_dev(vdev);
    1143             : 
    1144           0 :         pthread_mutex_lock(&user_dev->lock);
    1145           0 :         if (vsession->started) {
    1146             :                 /* already started, nothing to do */
    1147           0 :                 goto out;
    1148             :         }
    1149             : 
    1150           0 :         if (!vsession->mem) {
    1151           0 :                 rc = -1;
    1152           0 :                 SPDK_ERRLOG("Session %s doesn't set memory table yet\n", vsession->name);
    1153           0 :                 goto out;
    1154             :         }
    1155             : 
    1156           0 :         vsession->starting = true;
    1157           0 :         SPDK_INFOLOG(vhost, "Session %s is scheduled to start\n", vsession->name);
    1158           0 :         vhost_user_session_set_coalescing(vdev, vsession, NULL);
    1159           0 :         spdk_thread_send_msg(vdev->thread, vhost_user_session_start, vsession);
    1160             : 
    1161             : out:
    1162           0 :         pthread_mutex_unlock(&user_dev->lock);
    1163           0 :         return rc;
    1164           0 : }
    1165             : 
    1166             : static void
    1167           0 : stop_device(int vid)
    1168             : {
    1169           0 :         struct spdk_vhost_session *vsession;
    1170           0 :         struct spdk_vhost_user_dev *user_dev;
    1171             : 
    1172           0 :         vsession = vhost_session_find_by_vid(vid);
    1173           0 :         if (vsession == NULL) {
    1174           0 :                 SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid);
    1175           0 :                 return;
    1176             :         }
    1177           0 :         user_dev = to_user_dev(vsession->vdev);
    1178             : 
    1179           0 :         pthread_mutex_lock(&user_dev->lock);
    1180           0 :         if (!vsession->started && !vsession->starting) {
    1181           0 :                 pthread_mutex_unlock(&user_dev->lock);
    1182             :                 /* already stopped, nothing to do */
    1183           0 :                 return;
    1184             :         }
    1185             : 
    1186           0 :         _stop_session(vsession);
    1187           0 :         pthread_mutex_unlock(&user_dev->lock);
    1188           0 : }
    1189             : 
    1190             : static void
    1191           0 : destroy_connection(int vid)
    1192             : {
    1193           0 :         struct spdk_vhost_session *vsession;
    1194           0 :         struct spdk_vhost_user_dev *user_dev;
    1195             : 
    1196           0 :         vsession = vhost_session_find_by_vid(vid);
    1197           0 :         if (vsession == NULL) {
    1198           0 :                 SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid);
    1199           0 :                 return;
    1200             :         }
    1201           0 :         user_dev = to_user_dev(vsession->vdev);
    1202             : 
    1203           0 :         pthread_mutex_lock(&user_dev->lock);
    1204           0 :         if (vsession->started || vsession->starting) {
    1205           0 :                 if (_stop_session(vsession) != 0) {
    1206           0 :                         pthread_mutex_unlock(&user_dev->lock);
    1207           0 :                         return;
    1208             :                 }
    1209           0 :         }
    1210             : 
    1211           0 :         if (vsession->mem) {
    1212           0 :                 vhost_session_mem_unregister(vsession->mem);
    1213           0 :                 free(vsession->mem);
    1214           0 :         }
    1215             : 
    1216           0 :         TAILQ_REMOVE(&to_user_dev(vsession->vdev)->vsessions, vsession, tailq);
    1217           0 :         sem_destroy(&vsession->dpdk_sem);
    1218           0 :         free(vsession->name);
    1219           0 :         free(vsession);
    1220           0 :         pthread_mutex_unlock(&user_dev->lock);
    1221           0 : }
    1222             : 
    1223             : static const struct rte_vhost_device_ops g_spdk_vhost_ops = {
    1224             :         .new_device =  start_device,
    1225             :         .destroy_device = stop_device,
    1226             :         .new_connection = new_connection,
    1227             :         .destroy_connection = destroy_connection,
    1228             : };
    1229             : 
    1230             : static struct spdk_vhost_session *
    1231           0 : vhost_session_find_by_id(struct spdk_vhost_dev *vdev, unsigned id)
    1232             : {
    1233           0 :         struct spdk_vhost_session *vsession;
    1234             : 
    1235           0 :         TAILQ_FOREACH(vsession, &to_user_dev(vdev)->vsessions, tailq) {
    1236           0 :                 if (vsession->id == id) {
    1237           0 :                         return vsession;
    1238             :                 }
    1239           0 :         }
    1240             : 
    1241           0 :         return NULL;
    1242           0 : }
    1243             : 
    1244             : struct spdk_vhost_session *
    1245           2 : vhost_session_find_by_vid(int vid)
    1246             : {
    1247           2 :         struct spdk_vhost_dev *vdev;
    1248           2 :         struct spdk_vhost_session *vsession;
    1249           2 :         struct spdk_vhost_user_dev *user_dev;
    1250             : 
    1251           2 :         spdk_vhost_lock();
    1252           3 :         for (vdev = spdk_vhost_dev_next(NULL); vdev != NULL;
    1253           1 :              vdev = spdk_vhost_dev_next(vdev)) {
    1254           2 :                 user_dev = to_user_dev(vdev);
    1255             : 
    1256           2 :                 pthread_mutex_lock(&user_dev->lock);
    1257           3 :                 TAILQ_FOREACH(vsession, &user_dev->vsessions, tailq) {
    1258           2 :                         if (vsession->vid == vid) {
    1259           1 :                                 pthread_mutex_unlock(&user_dev->lock);
    1260           1 :                                 spdk_vhost_unlock();
    1261           1 :                                 return vsession;
    1262             :                         }
    1263           1 :                 }
    1264           1 :                 pthread_mutex_unlock(&user_dev->lock);
    1265           1 :         }
    1266           1 :         spdk_vhost_unlock();
    1267             : 
    1268           1 :         return NULL;
    1269           2 : }
    1270             : 
    1271             : static void
    1272           0 : vhost_session_wait_for_semaphore(struct spdk_vhost_session *vsession, int timeout_sec,
    1273             :                                  const char *errmsg)
    1274             : {
    1275           0 :         struct timespec timeout;
    1276           0 :         int rc;
    1277             : 
    1278           0 :         clock_gettime(CLOCK_REALTIME, &timeout);
    1279           0 :         timeout.tv_sec += timeout_sec;
    1280           0 :         rc = sem_timedwait(&vsession->dpdk_sem, &timeout);
    1281           0 :         if (rc != 0) {
    1282           0 :                 SPDK_ERRLOG("Timeout waiting for event: %s.\n", errmsg);
    1283           0 :                 sem_wait(&vsession->dpdk_sem);
    1284           0 :         }
    1285           0 : }
    1286             : 
    1287             : void
    1288           0 : vhost_user_session_stop_done(struct spdk_vhost_session *vsession, int response)
    1289             : {
    1290           0 :         if (response == 0) {
    1291           0 :                 vsession->started = false;
    1292           0 :         }
    1293             : 
    1294           0 :         vsession->dpdk_response = response;
    1295           0 :         sem_post(&vsession->dpdk_sem);
    1296           0 : }
    1297             : 
    1298             : static void
    1299           0 : vhost_user_session_stop_event(void *arg1)
    1300             : {
    1301           0 :         struct vhost_session_fn_ctx *ctx = arg1;
    1302           0 :         struct spdk_vhost_dev *vdev = ctx->vdev;
    1303           0 :         struct spdk_vhost_user_dev *user_dev = to_user_dev(vdev);
    1304           0 :         struct spdk_vhost_session *vsession;
    1305             : 
    1306           0 :         if (pthread_mutex_trylock(&user_dev->lock) != 0) {
    1307           0 :                 spdk_thread_send_msg(spdk_get_thread(), vhost_user_session_stop_event, arg1);
    1308           0 :                 return;
    1309             :         }
    1310             : 
    1311           0 :         vsession = vhost_session_find_by_id(vdev, ctx->vsession_id);
    1312           0 :         user_dev->user_backend->stop_session(vdev, vsession, NULL);
    1313           0 :         pthread_mutex_unlock(&user_dev->lock);
    1314           0 : }
    1315             : 
    1316             : static int
    1317           0 : vhost_user_wait_for_session_stop(struct spdk_vhost_session *vsession,
    1318             :                                  unsigned timeout_sec, const char *errmsg)
    1319             : {
    1320           0 :         struct vhost_session_fn_ctx ev_ctx = {0};
    1321           0 :         struct spdk_vhost_dev *vdev = vsession->vdev;
    1322           0 :         struct spdk_vhost_user_dev *user_dev = to_user_dev(vdev);
    1323             : 
    1324           0 :         ev_ctx.vdev = vdev;
    1325           0 :         ev_ctx.vsession_id = vsession->id;
    1326             : 
    1327           0 :         spdk_thread_send_msg(vdev->thread, vhost_user_session_stop_event, &ev_ctx);
    1328             : 
    1329           0 :         pthread_mutex_unlock(&user_dev->lock);
    1330           0 :         vhost_session_wait_for_semaphore(vsession, timeout_sec, errmsg);
    1331           0 :         pthread_mutex_lock(&user_dev->lock);
    1332             : 
    1333           0 :         return vsession->dpdk_response;
    1334           0 : }
    1335             : 
    1336             : static void
    1337           0 : foreach_session_finish_cb(void *arg1)
    1338             : {
    1339           0 :         struct vhost_session_fn_ctx *ev_ctx = arg1;
    1340           0 :         struct spdk_vhost_dev *vdev = ev_ctx->vdev;
    1341           0 :         struct spdk_vhost_user_dev *user_dev = to_user_dev(vdev);
    1342             : 
    1343           0 :         if (pthread_mutex_trylock(&user_dev->lock) != 0) {
    1344           0 :                 spdk_thread_send_msg(spdk_get_thread(),
    1345           0 :                                      foreach_session_finish_cb, arg1);
    1346           0 :                 return;
    1347             :         }
    1348             : 
    1349           0 :         assert(user_dev->pending_async_op_num > 0);
    1350           0 :         user_dev->pending_async_op_num--;
    1351           0 :         if (ev_ctx->cpl_fn != NULL) {
    1352           0 :                 ev_ctx->cpl_fn(vdev, ev_ctx->user_ctx);
    1353           0 :         }
    1354             : 
    1355           0 :         pthread_mutex_unlock(&user_dev->lock);
    1356           0 :         free(ev_ctx);
    1357           0 : }
    1358             : 
    1359             : static void
    1360           0 : foreach_session(void *arg1)
    1361             : {
    1362           0 :         struct vhost_session_fn_ctx *ev_ctx = arg1;
    1363           0 :         struct spdk_vhost_dev *vdev = ev_ctx->vdev;
    1364           0 :         struct spdk_vhost_user_dev *user_dev = to_user_dev(vdev);
    1365           0 :         struct spdk_vhost_session *vsession;
    1366           0 :         int rc;
    1367             : 
    1368           0 :         if (pthread_mutex_trylock(&user_dev->lock) != 0) {
    1369           0 :                 spdk_thread_send_msg(spdk_get_thread(), foreach_session, arg1);
    1370           0 :                 return;
    1371             :         }
    1372             : 
    1373           0 :         TAILQ_FOREACH(vsession, &user_dev->vsessions, tailq) {
    1374           0 :                 rc = ev_ctx->cb_fn(vdev, vsession, ev_ctx->user_ctx);
    1375           0 :                 if (rc < 0) {
    1376           0 :                         goto out;
    1377             :                 }
    1378           0 :         }
    1379             : 
    1380             : out:
    1381           0 :         pthread_mutex_unlock(&user_dev->lock);
    1382           0 :         spdk_thread_send_msg(g_vhost_user_init_thread, foreach_session_finish_cb, arg1);
    1383           0 : }
    1384             : 
    1385             : void
    1386           0 : vhost_user_dev_foreach_session(struct spdk_vhost_dev *vdev,
    1387             :                                spdk_vhost_session_fn fn,
    1388             :                                spdk_vhost_dev_fn cpl_fn,
    1389             :                                void *arg)
    1390             : {
    1391           0 :         struct vhost_session_fn_ctx *ev_ctx;
    1392           0 :         struct spdk_vhost_user_dev *user_dev = to_user_dev(vdev);
    1393             : 
    1394           0 :         ev_ctx = calloc(1, sizeof(*ev_ctx));
    1395           0 :         if (ev_ctx == NULL) {
    1396           0 :                 SPDK_ERRLOG("Failed to alloc vhost event.\n");
    1397           0 :                 assert(false);
    1398             :                 return;
    1399             :         }
    1400             : 
    1401           0 :         ev_ctx->vdev = vdev;
    1402           0 :         ev_ctx->cb_fn = fn;
    1403           0 :         ev_ctx->cpl_fn = cpl_fn;
    1404           0 :         ev_ctx->user_ctx = arg;
    1405             : 
    1406           0 :         pthread_mutex_lock(&user_dev->lock);
    1407           0 :         assert(user_dev->pending_async_op_num < UINT32_MAX);
    1408           0 :         user_dev->pending_async_op_num++;
    1409           0 :         pthread_mutex_unlock(&user_dev->lock);
    1410             : 
    1411           0 :         spdk_thread_send_msg(vdev->thread, foreach_session, ev_ctx);
    1412           0 : }
    1413             : 
    1414             : void
    1415           0 : vhost_user_session_set_interrupt_mode(struct spdk_vhost_session *vsession, bool interrupt_mode)
    1416             : {
    1417           0 :         uint16_t i;
    1418           0 :         int rc = 0;
    1419             : 
    1420           0 :         for (i = 0; i < vsession->max_queues; i++) {
    1421           0 :                 struct spdk_vhost_virtqueue *q = &vsession->virtqueue[i];
    1422           0 :                 uint64_t num_events = 1;
    1423             : 
    1424             :                 /* vring.desc and vring.desc_packed are in a union struct
    1425             :                  * so q->vring.desc can replace q->vring.desc_packed.
    1426             :                  */
    1427           0 :                 if (q->vring.desc == NULL || q->vring.size == 0) {
    1428           0 :                         continue;
    1429             :                 }
    1430             : 
    1431           0 :                 if (interrupt_mode) {
    1432             : 
    1433             :                         /* In case of race condition, always kick vring when switch to intr */
    1434           0 :                         rc = write(q->vring.kickfd, &num_events, sizeof(num_events));
    1435           0 :                         if (rc < 0) {
    1436           0 :                                 SPDK_ERRLOG("failed to kick vring: %s.\n", spdk_strerror(errno));
    1437           0 :                         }
    1438           0 :                 }
    1439           0 :         }
    1440           0 : }
    1441             : 
    1442             : static int
    1443           0 : extern_vhost_pre_msg_handler(int vid, void *_msg)
    1444             : {
    1445           0 :         struct vhost_user_msg *msg = _msg;
    1446           0 :         struct spdk_vhost_session *vsession;
    1447           0 :         struct spdk_vhost_user_dev *user_dev;
    1448             : 
    1449           0 :         vsession = vhost_session_find_by_vid(vid);
    1450           0 :         if (vsession == NULL) {
    1451           0 :                 SPDK_ERRLOG("Received a message to uninitialized session (vid %d).\n", vid);
    1452           0 :                 assert(false);
    1453             :                 return RTE_VHOST_MSG_RESULT_ERR;
    1454             :         }
    1455           0 :         user_dev = to_user_dev(vsession->vdev);
    1456             : 
    1457           0 :         switch (msg->request) {
    1458             :         case VHOST_USER_GET_VRING_BASE:
    1459           0 :                 pthread_mutex_lock(&user_dev->lock);
    1460           0 :                 if (vsession->started || vsession->starting) {
    1461           0 :                         pthread_mutex_unlock(&user_dev->lock);
    1462           0 :                         g_spdk_vhost_ops.destroy_device(vid);
    1463           0 :                         break;
    1464             :                 }
    1465           0 :                 pthread_mutex_unlock(&user_dev->lock);
    1466           0 :                 break;
    1467             :         case VHOST_USER_SET_MEM_TABLE:
    1468           0 :                 pthread_mutex_lock(&user_dev->lock);
    1469           0 :                 if (vsession->started || vsession->starting) {
    1470           0 :                         vsession->original_max_queues = vsession->max_queues;
    1471           0 :                         pthread_mutex_unlock(&user_dev->lock);
    1472           0 :                         g_spdk_vhost_ops.destroy_device(vid);
    1473           0 :                         vsession->needs_restart = true;
    1474           0 :                         break;
    1475             :                 }
    1476           0 :                 pthread_mutex_unlock(&user_dev->lock);
    1477           0 :                 break;
    1478             :         case VHOST_USER_GET_CONFIG: {
    1479           0 :                 int rc = 0;
    1480             : 
    1481           0 :                 pthread_mutex_lock(&user_dev->lock);
    1482           0 :                 if (vsession->vdev->backend->vhost_get_config) {
    1483           0 :                         rc = vsession->vdev->backend->vhost_get_config(vsession->vdev,
    1484           0 :                                         msg->payload.cfg.region, msg->payload.cfg.size);
    1485           0 :                         if (rc != 0) {
    1486           0 :                                 msg->size = 0;
    1487           0 :                         }
    1488           0 :                 }
    1489           0 :                 pthread_mutex_unlock(&user_dev->lock);
    1490             : 
    1491           0 :                 return RTE_VHOST_MSG_RESULT_REPLY;
    1492           0 :         }
    1493             :         case VHOST_USER_SET_CONFIG: {
    1494           0 :                 int rc = 0;
    1495             : 
    1496           0 :                 pthread_mutex_lock(&user_dev->lock);
    1497           0 :                 if (vsession->vdev->backend->vhost_set_config) {
    1498           0 :                         rc = vsession->vdev->backend->vhost_set_config(vsession->vdev,
    1499           0 :                                         msg->payload.cfg.region, msg->payload.cfg.offset,
    1500           0 :                                         msg->payload.cfg.size, msg->payload.cfg.flags);
    1501           0 :                 }
    1502           0 :                 pthread_mutex_unlock(&user_dev->lock);
    1503             : 
    1504           0 :                 return rc == 0 ? RTE_VHOST_MSG_RESULT_OK : RTE_VHOST_MSG_RESULT_ERR;
    1505           0 :         }
    1506             :         default:
    1507           0 :                 break;
    1508             :         }
    1509             : 
    1510           0 :         return RTE_VHOST_MSG_RESULT_NOT_HANDLED;
    1511           0 : }
    1512             : 
    1513             : static int
    1514           0 : extern_vhost_post_msg_handler(int vid, void *_msg)
    1515             : {
    1516           0 :         struct vhost_user_msg *msg = _msg;
    1517           0 :         struct spdk_vhost_session *vsession;
    1518           0 :         struct spdk_vhost_user_dev *user_dev;
    1519           0 :         uint16_t qid;
    1520           0 :         int rc;
    1521             : 
    1522           0 :         vsession = vhost_session_find_by_vid(vid);
    1523           0 :         if (vsession == NULL) {
    1524           0 :                 SPDK_ERRLOG("Received a message to uninitialized session (vid %d).\n", vid);
    1525           0 :                 assert(false);
    1526             :                 return RTE_VHOST_MSG_RESULT_ERR;
    1527             :         }
    1528           0 :         user_dev = to_user_dev(vsession->vdev);
    1529             : 
    1530           0 :         switch (msg->request) {
    1531             :         case VHOST_USER_SET_FEATURES:
    1532           0 :                 rc = vhost_get_negotiated_features(vid, &vsession->negotiated_features);
    1533           0 :                 if (rc) {
    1534           0 :                         SPDK_ERRLOG("vhost device %d: Failed to get negotiated driver features\n", vid);
    1535           0 :                         return RTE_VHOST_MSG_RESULT_ERR;
    1536             :                 }
    1537           0 :                 break;
    1538             :         case VHOST_USER_SET_VRING_CALL:
    1539           0 :                 qid = ((uint16_t)msg->payload.u64) & VHOST_USER_VRING_IDX_MASK;
    1540           0 :                 rc = set_device_vq_callfd(vsession, qid);
    1541           0 :                 if (rc) {
    1542           0 :                         return RTE_VHOST_MSG_RESULT_ERR;
    1543             :                 }
    1544           0 :                 break;
    1545             :         case VHOST_USER_SET_VRING_KICK:
    1546           0 :                 qid = ((uint16_t)msg->payload.u64) & VHOST_USER_VRING_IDX_MASK;
    1547           0 :                 rc = enable_device_vq(vsession, qid);
    1548           0 :                 if (rc) {
    1549           0 :                         return RTE_VHOST_MSG_RESULT_ERR;
    1550             :                 }
    1551             : 
    1552             :                 /* vhost-user spec tells us to start polling a queue after receiving
    1553             :                  * its SET_VRING_KICK message. Let's do it!
    1554             :                  */
    1555           0 :                 pthread_mutex_lock(&user_dev->lock);
    1556           0 :                 if (!vsession->started && !vsession->starting) {
    1557           0 :                         pthread_mutex_unlock(&user_dev->lock);
    1558           0 :                         g_spdk_vhost_ops.new_device(vid);
    1559           0 :                         return RTE_VHOST_MSG_RESULT_NOT_HANDLED;
    1560             :                 }
    1561           0 :                 pthread_mutex_unlock(&user_dev->lock);
    1562           0 :                 break;
    1563             :         case VHOST_USER_SET_MEM_TABLE:
    1564           0 :                 vhost_register_memtable_if_required(vsession, vid);
    1565           0 :                 pthread_mutex_lock(&user_dev->lock);
    1566           0 :                 if (vsession->needs_restart) {
    1567           0 :                         pthread_mutex_unlock(&user_dev->lock);
    1568           0 :                         for (qid = 0; qid < vsession->original_max_queues; qid++) {
    1569           0 :                                 enable_device_vq(vsession, qid);
    1570           0 :                         }
    1571           0 :                         vsession->original_max_queues = 0;
    1572           0 :                         vsession->needs_restart = false;
    1573           0 :                         g_spdk_vhost_ops.new_device(vid);
    1574           0 :                         break;
    1575             :                 }
    1576           0 :                 pthread_mutex_unlock(&user_dev->lock);
    1577           0 :                 break;
    1578             :         default:
    1579           0 :                 break;
    1580             :         }
    1581             : 
    1582           0 :         return RTE_VHOST_MSG_RESULT_NOT_HANDLED;
    1583           0 : }
    1584             : 
    1585             : struct rte_vhost_user_extern_ops g_spdk_extern_vhost_ops = {
    1586             :         .pre_msg_handle = extern_vhost_pre_msg_handler,
    1587             :         .post_msg_handle = extern_vhost_post_msg_handler,
    1588             : };
    1589             : 
    1590             : void
    1591           0 : vhost_session_install_rte_compat_hooks(struct spdk_vhost_session *vsession)
    1592             : {
    1593           0 :         int rc;
    1594             : 
    1595           0 :         rc = rte_vhost_extern_callback_register(vsession->vid, &g_spdk_extern_vhost_ops, NULL);
    1596           0 :         if (rc != 0) {
    1597           0 :                 SPDK_ERRLOG("rte_vhost_extern_callback_register() failed for vid = %d\n",
    1598             :                             vsession->vid);
    1599           0 :                 return;
    1600             :         }
    1601           0 : }
    1602             : 
    1603             : int
    1604           9 : vhost_register_unix_socket(const char *path, const char *ctrl_name,
    1605             :                            uint64_t virtio_features, uint64_t disabled_features, uint64_t protocol_features)
    1606             : {
    1607           9 :         struct stat file_stat;
    1608           9 :         uint64_t features = 0;
    1609           9 :         uint64_t flags = 0;
    1610             : 
    1611             :         /* Register vhost driver to handle vhost messages. */
    1612           9 :         if (stat(path, &file_stat) != -1) {
    1613           0 :                 if (!S_ISSOCK(file_stat.st_mode)) {
    1614           0 :                         SPDK_ERRLOG("Cannot create a domain socket at path \"%s\": "
    1615             :                                     "The file already exists and is not a socket.\n",
    1616             :                                     path);
    1617           0 :                         return -EIO;
    1618           0 :                 } else if (unlink(path) != 0) {
    1619           0 :                         SPDK_ERRLOG("Cannot create a domain socket at path \"%s\": "
    1620             :                                     "The socket already exists and failed to unlink.\n",
    1621             :                                     path);
    1622           0 :                         return -EIO;
    1623             :                 }
    1624           0 :         }
    1625             : 
    1626           9 :         flags = spdk_iommu_is_enabled() ? 0 : RTE_VHOST_USER_ASYNC_COPY;
    1627           9 :         if (rte_vhost_driver_register(path, flags) != 0) {
    1628           0 :                 SPDK_ERRLOG("Could not register controller %s with vhost library\n", ctrl_name);
    1629           0 :                 SPDK_ERRLOG("Check if domain socket %s already exists\n", path);
    1630           0 :                 return -EIO;
    1631             :         }
    1632           9 :         if (rte_vhost_driver_set_features(path, virtio_features) ||
    1633           9 :             rte_vhost_driver_disable_features(path, disabled_features)) {
    1634           0 :                 SPDK_ERRLOG("Couldn't set vhost features for controller %s\n", ctrl_name);
    1635             : 
    1636           0 :                 rte_vhost_driver_unregister(path);
    1637           0 :                 return -EIO;
    1638             :         }
    1639             : 
    1640           9 :         if (rte_vhost_driver_callback_register(path, &g_spdk_vhost_ops) != 0) {
    1641           0 :                 rte_vhost_driver_unregister(path);
    1642           0 :                 SPDK_ERRLOG("Couldn't register callbacks for controller %s\n", ctrl_name);
    1643           0 :                 return -EIO;
    1644             :         }
    1645             : 
    1646           9 :         rte_vhost_driver_get_protocol_features(path, &features);
    1647           9 :         features |= protocol_features;
    1648           9 :         rte_vhost_driver_set_protocol_features(path, features);
    1649             : 
    1650           9 :         if (rte_vhost_driver_start(path) != 0) {
    1651           0 :                 SPDK_ERRLOG("Failed to start vhost driver for controller %s (%d): %s\n",
    1652             :                             ctrl_name, errno, spdk_strerror(errno));
    1653           0 :                 rte_vhost_driver_unregister(path);
    1654           0 :                 return -EIO;
    1655             :         }
    1656             : 
    1657           9 :         return 0;
    1658           9 : }
    1659             : 
    1660             : int
    1661           0 : vhost_get_mem_table(int vid, struct rte_vhost_memory **mem)
    1662             : {
    1663           0 :         return rte_vhost_get_mem_table(vid, mem);
    1664             : }
    1665             : 
    1666             : int
    1667           9 : vhost_driver_unregister(const char *path)
    1668             : {
    1669           9 :         return rte_vhost_driver_unregister(path);
    1670             : }
    1671             : 
    1672             : int
    1673           0 : vhost_get_negotiated_features(int vid, uint64_t *negotiated_features)
    1674             : {
    1675           0 :         return rte_vhost_get_negotiated_features(vid, negotiated_features);
    1676             : }
    1677             : 
    1678             : int
    1679           9 : vhost_user_dev_set_coalescing(struct spdk_vhost_user_dev *user_dev, uint32_t delay_base_us,
    1680             :                               uint32_t iops_threshold)
    1681             : {
    1682           9 :         uint64_t delay_time_base = delay_base_us * spdk_get_ticks_hz() / 1000000ULL;
    1683           9 :         uint32_t io_rate = iops_threshold * SPDK_VHOST_STATS_CHECK_INTERVAL_MS / 1000U;
    1684             : 
    1685           9 :         if (delay_time_base >= UINT32_MAX) {
    1686           0 :                 SPDK_ERRLOG("Delay time of %"PRIu32" is to big\n", delay_base_us);
    1687           0 :                 return -EINVAL;
    1688           9 :         } else if (io_rate == 0) {
    1689           0 :                 SPDK_ERRLOG("IOPS rate of %"PRIu32" is too low. Min is %u\n", io_rate,
    1690             :                             1000U / SPDK_VHOST_STATS_CHECK_INTERVAL_MS);
    1691           0 :                 return -EINVAL;
    1692             :         }
    1693             : 
    1694           9 :         user_dev->coalescing_delay_us = delay_base_us;
    1695           9 :         user_dev->coalescing_iops_threshold = iops_threshold;
    1696           9 :         return 0;
    1697           9 : }
    1698             : 
    1699             : int
    1700           0 : vhost_user_session_set_coalescing(struct spdk_vhost_dev *vdev,
    1701             :                                   struct spdk_vhost_session *vsession, void *ctx)
    1702             : {
    1703           0 :         vsession->coalescing_delay_time_base =
    1704           0 :                 to_user_dev(vdev)->coalescing_delay_us * spdk_get_ticks_hz() / 1000000ULL;
    1705           0 :         vsession->coalescing_io_rate_threshold =
    1706           0 :                 to_user_dev(vdev)->coalescing_iops_threshold * SPDK_VHOST_STATS_CHECK_INTERVAL_MS / 1000U;
    1707           0 :         return 0;
    1708             : }
    1709             : 
    1710             : int
    1711           0 : vhost_user_set_coalescing(struct spdk_vhost_dev *vdev, uint32_t delay_base_us,
    1712             :                           uint32_t iops_threshold)
    1713             : {
    1714           0 :         int rc;
    1715             : 
    1716           0 :         rc = vhost_user_dev_set_coalescing(to_user_dev(vdev), delay_base_us, iops_threshold);
    1717           0 :         if (rc != 0) {
    1718           0 :                 return rc;
    1719             :         }
    1720             : 
    1721           0 :         vhost_user_dev_foreach_session(vdev, vhost_user_session_set_coalescing, NULL, NULL);
    1722             : 
    1723           0 :         return 0;
    1724           0 : }
    1725             : 
    1726             : void
    1727           0 : vhost_user_get_coalescing(struct spdk_vhost_dev *vdev, uint32_t *delay_base_us,
    1728             :                           uint32_t *iops_threshold)
    1729             : {
    1730           0 :         struct spdk_vhost_user_dev *user_dev = to_user_dev(vdev);
    1731             : 
    1732           0 :         if (delay_base_us) {
    1733           0 :                 *delay_base_us = user_dev->coalescing_delay_us;
    1734           0 :         }
    1735             : 
    1736           0 :         if (iops_threshold) {
    1737           0 :                 *iops_threshold = user_dev->coalescing_iops_threshold;
    1738           0 :         }
    1739           0 : }
    1740             : 
    1741             : int
    1742           0 : spdk_vhost_set_socket_path(const char *basename)
    1743             : {
    1744           0 :         int ret;
    1745             : 
    1746           0 :         if (basename && strlen(basename) > 0) {
    1747           0 :                 ret = snprintf(g_vhost_user_dev_dirname, sizeof(g_vhost_user_dev_dirname) - 2, "%s", basename);
    1748           0 :                 if (ret <= 0) {
    1749           0 :                         return -EINVAL;
    1750             :                 }
    1751           0 :                 if ((size_t)ret >= sizeof(g_vhost_user_dev_dirname) - 2) {
    1752           0 :                         SPDK_ERRLOG("Char dev dir path length %d is too long\n", ret);
    1753           0 :                         return -EINVAL;
    1754             :                 }
    1755             : 
    1756           0 :                 if (g_vhost_user_dev_dirname[ret - 1] != '/') {
    1757           0 :                         g_vhost_user_dev_dirname[ret] = '/';
    1758           0 :                         g_vhost_user_dev_dirname[ret + 1]  = '\0';
    1759           0 :                 }
    1760           0 :         }
    1761             : 
    1762           0 :         return 0;
    1763           0 : }
    1764             : 
    1765             : static void
    1766           0 : vhost_dev_thread_exit(void *arg1)
    1767             : {
    1768           0 :         spdk_thread_exit(spdk_get_thread());
    1769           0 : }
    1770             : 
    1771             : static bool g_vhost_user_started = false;
    1772             : 
    1773             : int
    1774          10 : vhost_user_dev_init(struct spdk_vhost_dev *vdev, const char *name,
    1775             :                     struct spdk_cpuset *cpumask, const struct spdk_vhost_user_dev_backend *user_backend)
    1776             : {
    1777          10 :         char path[PATH_MAX];
    1778          10 :         struct spdk_vhost_user_dev *user_dev;
    1779             : 
    1780          10 :         if (snprintf(path, sizeof(path), "%s%s", g_vhost_user_dev_dirname, name) >= (int)sizeof(path)) {
    1781           1 :                 SPDK_ERRLOG("Resulting socket path for controller %s is too long: %s%s\n",
    1782             :                             name, g_vhost_user_dev_dirname, name);
    1783           1 :                 return -EINVAL;
    1784             :         }
    1785             : 
    1786           9 :         vdev->path = strdup(path);
    1787           9 :         if (vdev->path == NULL) {
    1788           0 :                 return -EIO;
    1789             :         }
    1790             : 
    1791           9 :         user_dev = calloc(1, sizeof(*user_dev));
    1792           9 :         if (user_dev == NULL) {
    1793           0 :                 free(vdev->path);
    1794           0 :                 return -ENOMEM;
    1795             :         }
    1796           9 :         vdev->ctxt = user_dev;
    1797             : 
    1798           9 :         vdev->thread = spdk_thread_create(vdev->name, cpumask);
    1799           9 :         if (vdev->thread == NULL) {
    1800           0 :                 free(user_dev);
    1801           0 :                 free(vdev->path);
    1802           0 :                 SPDK_ERRLOG("Failed to create thread for vhost controller %s.\n", name);
    1803           0 :                 return -EIO;
    1804             :         }
    1805             : 
    1806           9 :         user_dev->user_backend = user_backend;
    1807           9 :         user_dev->vdev = vdev;
    1808           9 :         user_dev->registered = true;
    1809           9 :         TAILQ_INIT(&user_dev->vsessions);
    1810           9 :         pthread_mutex_init(&user_dev->lock, NULL);
    1811             : 
    1812           9 :         vhost_user_dev_set_coalescing(user_dev, SPDK_VHOST_COALESCING_DELAY_BASE_US,
    1813             :                                       SPDK_VHOST_VQ_IOPS_COALESCING_THRESHOLD);
    1814             : 
    1815           9 :         return 0;
    1816          10 : }
    1817             : 
    1818             : int
    1819           9 : vhost_user_dev_start(struct spdk_vhost_dev *vdev)
    1820             : {
    1821          18 :         return vhost_register_unix_socket(vdev->path, vdev->name, vdev->virtio_features,
    1822           9 :                                           vdev->disabled_features,
    1823           9 :                                           vdev->protocol_features);
    1824             : }
    1825             : 
    1826             : int
    1827          10 : vhost_user_dev_create(struct spdk_vhost_dev *vdev, const char *name, struct spdk_cpuset *cpumask,
    1828             :                       const struct spdk_vhost_user_dev_backend *user_backend, bool delay)
    1829             : {
    1830          10 :         int rc;
    1831          10 :         struct spdk_vhost_user_dev *user_dev;
    1832             : 
    1833          10 :         rc = vhost_user_dev_init(vdev, name, cpumask, user_backend);
    1834          10 :         if (rc != 0) {
    1835           1 :                 return rc;
    1836             :         }
    1837             : 
    1838           9 :         if (delay == false) {
    1839           9 :                 rc = vhost_user_dev_start(vdev);
    1840           9 :                 if (rc != 0) {
    1841           0 :                         user_dev = to_user_dev(vdev);
    1842           0 :                         spdk_thread_send_msg(vdev->thread, vhost_dev_thread_exit, NULL);
    1843           0 :                         pthread_mutex_destroy(&user_dev->lock);
    1844           0 :                         free(user_dev);
    1845           0 :                         free(vdev->path);
    1846           0 :                 }
    1847           9 :         }
    1848             : 
    1849           9 :         return rc;
    1850          10 : }
    1851             : 
    1852             : int
    1853          10 : vhost_user_dev_unregister(struct spdk_vhost_dev *vdev)
    1854             : {
    1855          10 :         struct spdk_vhost_user_dev *user_dev = to_user_dev(vdev);
    1856          10 :         struct spdk_vhost_session *vsession, *tmp_vsession;
    1857             : 
    1858          10 :         if (pthread_mutex_trylock(&user_dev->lock) != 0) {
    1859           0 :                 return -EBUSY;
    1860             :         }
    1861             : 
    1862          10 :         if (user_dev->pending_async_op_num) {
    1863           0 :                 pthread_mutex_unlock(&user_dev->lock);
    1864           0 :                 return -EBUSY;
    1865             :         }
    1866             : 
    1867             :         /* This is the case that uses RPC call `vhost_delete_controller` while VM is connected */
    1868          10 :         if (!TAILQ_EMPTY(&user_dev->vsessions) && g_vhost_user_started) {
    1869           1 :                 SPDK_ERRLOG("Controller %s has still valid connection.\n", vdev->name);
    1870           1 :                 pthread_mutex_unlock(&user_dev->lock);
    1871           1 :                 return -EBUSY;
    1872             :         }
    1873             : 
    1874             :         /* This is the case that quits the subsystem while VM is connected, the VM
    1875             :          * should be stopped by the shutdown thread.
    1876             :          */
    1877           9 :         if (!g_vhost_user_started) {
    1878           0 :                 TAILQ_FOREACH_SAFE(vsession, &user_dev->vsessions, tailq, tmp_vsession) {
    1879           0 :                         assert(vsession->started == false);
    1880           0 :                         TAILQ_REMOVE(&user_dev->vsessions, vsession, tailq);
    1881           0 :                         if (vsession->mem) {
    1882           0 :                                 vhost_session_mem_unregister(vsession->mem);
    1883           0 :                                 free(vsession->mem);
    1884           0 :                         }
    1885           0 :                         sem_destroy(&vsession->dpdk_sem);
    1886           0 :                         free(vsession->name);
    1887           0 :                         free(vsession);
    1888           0 :                 }
    1889           0 :         }
    1890             : 
    1891           9 :         user_dev->registered = false;
    1892           9 :         pthread_mutex_unlock(&user_dev->lock);
    1893             : 
    1894             :         /* There are no valid connections now, and it's not an error if the domain
    1895             :          * socket was already removed by shutdown thread.
    1896             :          */
    1897           9 :         vhost_driver_unregister(vdev->path);
    1898             : 
    1899           9 :         spdk_thread_send_msg(vdev->thread, vhost_dev_thread_exit, NULL);
    1900           9 :         pthread_mutex_destroy(&user_dev->lock);
    1901             : 
    1902           9 :         free(user_dev);
    1903           9 :         free(vdev->path);
    1904             : 
    1905           9 :         return 0;
    1906          10 : }
    1907             : 
    1908             : int
    1909           2 : vhost_user_init(void)
    1910             : {
    1911           2 :         size_t len;
    1912             : 
    1913           2 :         if (g_vhost_user_started) {
    1914           1 :                 return 0;
    1915             :         }
    1916             : 
    1917           1 :         if (g_vhost_user_dev_dirname[0] == '\0') {
    1918           1 :                 if (getcwd(g_vhost_user_dev_dirname, sizeof(g_vhost_user_dev_dirname) - 1) == NULL) {
    1919           0 :                         SPDK_ERRLOG("getcwd failed (%d): %s\n", errno, spdk_strerror(errno));
    1920           0 :                         return -1;
    1921             :                 }
    1922             : 
    1923           1 :                 len = strlen(g_vhost_user_dev_dirname);
    1924           1 :                 if (g_vhost_user_dev_dirname[len - 1] != '/') {
    1925           1 :                         g_vhost_user_dev_dirname[len] = '/';
    1926           1 :                         g_vhost_user_dev_dirname[len + 1] = '\0';
    1927           1 :                 }
    1928           1 :         }
    1929             : 
    1930           1 :         g_vhost_user_started = true;
    1931             : 
    1932           1 :         g_vhost_user_init_thread = spdk_get_thread();
    1933           1 :         assert(g_vhost_user_init_thread != NULL);
    1934             : 
    1935           1 :         return 0;
    1936           2 : }
    1937             : 
    1938             : static void
    1939           1 : vhost_user_session_shutdown_on_init(void *vhost_cb)
    1940             : {
    1941           1 :         spdk_vhost_fini_cb fn = vhost_cb;
    1942             : 
    1943           1 :         fn();
    1944           1 : }
    1945             : 
    1946             : static void *
    1947           1 : vhost_user_session_shutdown(void *vhost_cb)
    1948             : {
    1949           1 :         struct spdk_vhost_dev *vdev = NULL;
    1950           1 :         struct spdk_vhost_session *vsession;
    1951           1 :         struct spdk_vhost_user_dev *user_dev;
    1952           1 :         int ret;
    1953             : 
    1954           1 :         for (vdev = spdk_vhost_dev_next(NULL); vdev != NULL;
    1955           0 :              vdev = spdk_vhost_dev_next(vdev)) {
    1956           0 :                 user_dev = to_user_dev(vdev);
    1957           0 :                 ret = 0;
    1958           0 :                 pthread_mutex_lock(&user_dev->lock);
    1959           0 :                 TAILQ_FOREACH(vsession, &user_dev->vsessions, tailq) {
    1960           0 :                         if (vsession->started || vsession->starting) {
    1961           0 :                                 ret += _stop_session(vsession);
    1962           0 :                         }
    1963           0 :                 }
    1964           0 :                 pthread_mutex_unlock(&user_dev->lock);
    1965           0 :                 if (ret == 0) {
    1966           0 :                         vhost_driver_unregister(vdev->path);
    1967           0 :                 }
    1968           0 :         }
    1969             : 
    1970           1 :         SPDK_INFOLOG(vhost, "Exiting\n");
    1971           1 :         spdk_thread_send_msg(g_vhost_user_init_thread, vhost_user_session_shutdown_on_init, vhost_cb);
    1972           1 :         return NULL;
    1973           1 : }
    1974             : 
    1975             : void
    1976           2 : vhost_user_fini(spdk_vhost_fini_cb vhost_cb)
    1977             : {
    1978           2 :         pthread_t tid;
    1979           2 :         int rc;
    1980             : 
    1981           2 :         if (!g_vhost_user_started) {
    1982           1 :                 vhost_cb();
    1983           1 :                 return;
    1984             :         }
    1985             : 
    1986           1 :         g_vhost_user_started = false;
    1987             : 
    1988             :         /* rte_vhost API for removing sockets is not asynchronous. Since it may call SPDK
    1989             :          * ops for stopping a device or removing a connection, we need to call it from
    1990             :          * a separate thread to avoid deadlock.
    1991             :          */
    1992           1 :         rc = pthread_create(&tid, NULL, &vhost_user_session_shutdown, vhost_cb);
    1993           1 :         if (rc != 0) {
    1994           0 :                 SPDK_ERRLOG("Failed to start session shutdown thread (%d): %s\n", rc, spdk_strerror(rc));
    1995           0 :                 abort();
    1996             :         }
    1997           1 :         pthread_detach(tid);
    1998           2 : }
    1999             : 
    2000             : void
    2001           0 : vhost_session_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w)
    2002             : {
    2003           0 :         struct spdk_vhost_session *vsession;
    2004           0 :         struct spdk_vhost_user_dev *user_dev;
    2005             : 
    2006           0 :         user_dev = to_user_dev(vdev);
    2007           0 :         pthread_mutex_lock(&user_dev->lock);
    2008           0 :         TAILQ_FOREACH(vsession, &user_dev->vsessions, tailq) {
    2009           0 :                 spdk_json_write_object_begin(w);
    2010           0 :                 spdk_json_write_named_uint32(w, "vid", vsession->vid);
    2011           0 :                 spdk_json_write_named_uint32(w, "id", vsession->id);
    2012           0 :                 spdk_json_write_named_string(w, "name", vsession->name);
    2013           0 :                 spdk_json_write_named_bool(w, "started", vsession->started);
    2014           0 :                 spdk_json_write_named_uint32(w, "max_queues", vsession->max_queues);
    2015           0 :                 spdk_json_write_named_uint32(w, "inflight_task_cnt", vsession->task_cnt);
    2016           0 :                 spdk_json_write_object_end(w);
    2017           0 :         }
    2018           0 :         pthread_mutex_unlock(&user_dev->lock);
    2019           0 : }

Generated by: LCOV version 1.15