LCOV - code coverage report
Current view: top level - lib/vhost - rte_vhost_user.c (source / functions) Hit Total Coverage
Test: ut_cov_unit.info Lines: 201 956 21.0 %
Date: 2024-07-13 12:24:37 Functions: 22 76 28.9 %

          Line data    Source code
       1             : /*   SPDX-License-Identifier: BSD-3-Clause
       2             :  *   Copyright (C) 2019 Intel Corporation.
       3             :  *   All rights reserved.
       4             :  *   Copyright (c) 2021 Mellanox Technologies LTD. All rights reserved.
       5             :  */
       6             : 
       7             : #include "spdk/stdinc.h"
       8             : 
       9             : #include "spdk/env.h"
      10             : #include "spdk/likely.h"
      11             : #include "spdk/string.h"
      12             : #include "spdk/util.h"
      13             : #include "spdk/memory.h"
      14             : #include "spdk/barrier.h"
      15             : #include "spdk/vhost.h"
      16             : #include "vhost_internal.h"
      17             : #include <rte_version.h>
      18             : 
      19             : #include "spdk_internal/vhost_user.h"
      20             : 
      21             : /* Path to folder where character device will be created. Can be set by user. */
      22             : static char g_vhost_user_dev_dirname[PATH_MAX] = "";
      23             : 
      24             : static struct spdk_thread *g_vhost_user_init_thread;
      25             : 
      26             : /**
      27             :  * DPDK calls our callbacks synchronously but the work those callbacks
      28             :  * perform needs to be async. Luckily, all DPDK callbacks are called on
      29             :  * a DPDK-internal pthread, so we'll just wait on a semaphore in there.
      30             :  */
      31             : static sem_t g_dpdk_sem;
      32             : 
      33             : /** Return code for the current DPDK callback */
      34             : static int g_dpdk_response;
      35             : 
      36             : struct vhost_session_fn_ctx {
      37             :         /** Device pointer obtained before enqueueing the event */
      38             :         struct spdk_vhost_dev *vdev;
      39             : 
      40             :         /** ID of the session to send event to. */
      41             :         uint32_t vsession_id;
      42             : 
      43             :         /** User provided function to be executed on session's thread. */
      44             :         spdk_vhost_session_fn cb_fn;
      45             : 
      46             :         /**
      47             :          * User provided function to be called on the init thread
      48             :          * after iterating through all sessions.
      49             :          */
      50             :         spdk_vhost_dev_fn cpl_fn;
      51             : 
      52             :         /** Custom user context */
      53             :         void *user_ctx;
      54             : };
      55             : 
      56             : static int vhost_user_wait_for_session_stop(struct spdk_vhost_session *vsession,
      57             :                 unsigned timeout_sec, const char *errmsg);
      58             : 
      59             : static void
      60             : __attribute__((constructor))
      61           1 : _vhost_user_sem_init(void)
      62             : {
      63           1 :         if (sem_init(&g_dpdk_sem, 0, 0) != 0) {
      64           0 :                 SPDK_ERRLOG("Failed to initialize semaphore for rte_vhost pthread.\n");
      65           0 :                 abort();
      66             :         }
      67           1 : }
      68             : 
      69             : static void
      70             : __attribute__((destructor))
      71           1 : _vhost_user_sem_destroy(void)
      72             : {
      73           1 :         sem_destroy(&g_dpdk_sem);
      74           1 : }
      75             : 
      76             : void *
      77           0 : vhost_gpa_to_vva(struct spdk_vhost_session *vsession, uint64_t addr, uint64_t len)
      78             : {
      79             :         void *vva;
      80           0 :         uint64_t newlen;
      81             : 
      82           0 :         newlen = len;
      83           0 :         vva = (void *)rte_vhost_va_from_guest_pa(vsession->mem, addr, &newlen);
      84           0 :         if (newlen != len) {
      85           0 :                 return NULL;
      86             :         }
      87             : 
      88           0 :         return vva;
      89             : 
      90             : }
      91             : 
      92             : static void
      93           0 : vhost_log_req_desc(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *virtqueue,
      94             :                    uint16_t req_id)
      95             : {
      96           0 :         struct vring_desc *desc, *desc_table;
      97           0 :         uint32_t desc_table_size;
      98             :         int rc;
      99             : 
     100           0 :         if (spdk_likely(!vhost_dev_has_feature(vsession, VHOST_F_LOG_ALL))) {
     101           0 :                 return;
     102             :         }
     103             : 
     104           0 :         rc = vhost_vq_get_desc(vsession, virtqueue, req_id, &desc, &desc_table, &desc_table_size);
     105           0 :         if (spdk_unlikely(rc != 0)) {
     106           0 :                 SPDK_ERRLOG("Can't log used ring descriptors!\n");
     107           0 :                 return;
     108             :         }
     109             : 
     110             :         do {
     111           0 :                 if (vhost_vring_desc_is_wr(desc)) {
     112             :                         /* To be honest, only pages really touched should be logged, but
     113             :                          * doing so would require tracking those changes in each backed.
     114             :                          * Also backend most likely will touch all/most of those pages so
     115             :                          * for lets assume we touched all pages passed to as writeable buffers. */
     116           0 :                         rte_vhost_log_write(vsession->vid, desc->addr, desc->len);
     117             :                 }
     118           0 :                 vhost_vring_desc_get_next(&desc, desc_table, desc_table_size);
     119           0 :         } while (desc);
     120             : }
     121             : 
     122             : static void
     123           7 : vhost_log_used_vring_elem(struct spdk_vhost_session *vsession,
     124             :                           struct spdk_vhost_virtqueue *virtqueue,
     125             :                           uint16_t idx)
     126             : {
     127             :         uint64_t offset, len;
     128             : 
     129           7 :         if (spdk_likely(!vhost_dev_has_feature(vsession, VHOST_F_LOG_ALL))) {
     130           7 :                 return;
     131             :         }
     132             : 
     133           0 :         if (spdk_unlikely(virtqueue->packed.packed_ring)) {
     134           0 :                 offset = idx * sizeof(struct vring_packed_desc);
     135           0 :                 len = sizeof(struct vring_packed_desc);
     136             :         } else {
     137           0 :                 offset = offsetof(struct vring_used, ring[idx]);
     138           0 :                 len = sizeof(virtqueue->vring.used->ring[idx]);
     139             :         }
     140             : 
     141           0 :         rte_vhost_log_used_vring(vsession->vid, virtqueue->vring_idx, offset, len);
     142             : }
     143             : 
     144             : static void
     145           0 : vhost_log_used_vring_idx(struct spdk_vhost_session *vsession,
     146             :                          struct spdk_vhost_virtqueue *virtqueue)
     147             : {
     148             :         uint64_t offset, len;
     149             :         uint16_t vq_idx;
     150             : 
     151           0 :         if (spdk_likely(!vhost_dev_has_feature(vsession, VHOST_F_LOG_ALL))) {
     152           0 :                 return;
     153             :         }
     154             : 
     155           0 :         offset = offsetof(struct vring_used, idx);
     156           0 :         len = sizeof(virtqueue->vring.used->idx);
     157           0 :         vq_idx = virtqueue - vsession->virtqueue;
     158             : 
     159           0 :         rte_vhost_log_used_vring(vsession->vid, vq_idx, offset, len);
     160             : }
     161             : 
     162             : /*
     163             :  * Get available requests from avail ring.
     164             :  */
     165             : uint16_t
     166           4 : vhost_vq_avail_ring_get(struct spdk_vhost_virtqueue *virtqueue, uint16_t *reqs,
     167             :                         uint16_t reqs_len)
     168             : {
     169           4 :         struct rte_vhost_vring *vring = &virtqueue->vring;
     170           4 :         struct vring_avail *avail = vring->avail;
     171           4 :         uint16_t size_mask = vring->size - 1;
     172           4 :         uint16_t last_idx = virtqueue->last_avail_idx, avail_idx = avail->idx;
     173             :         uint16_t count, i;
     174             :         int rc;
     175           4 :         uint64_t u64_value;
     176             : 
     177           4 :         spdk_smp_rmb();
     178             : 
     179           4 :         if (virtqueue->vsession && spdk_unlikely(virtqueue->vsession->interrupt_mode)) {
     180             :                 /* Read to clear vring's kickfd */
     181           0 :                 rc = read(vring->kickfd, &u64_value, sizeof(u64_value));
     182           0 :                 if (rc < 0) {
     183           0 :                         SPDK_ERRLOG("failed to acknowledge kickfd: %s.\n", spdk_strerror(errno));
     184           0 :                         return -errno;
     185             :                 }
     186             :         }
     187             : 
     188           4 :         count = avail_idx - last_idx;
     189           4 :         if (spdk_likely(count == 0)) {
     190           0 :                 return 0;
     191             :         }
     192             : 
     193           4 :         if (spdk_unlikely(count > vring->size)) {
     194             :                 /* TODO: the queue is unrecoverably broken and should be marked so.
     195             :                  * For now we will fail silently and report there are no new avail entries.
     196             :                  */
     197           1 :                 return 0;
     198             :         }
     199             : 
     200           3 :         count = spdk_min(count, reqs_len);
     201             : 
     202           3 :         virtqueue->last_avail_idx += count;
     203             :         /* Check whether there are unprocessed reqs in vq, then kick vq manually */
     204           3 :         if (virtqueue->vsession && spdk_unlikely(virtqueue->vsession->interrupt_mode)) {
     205             :                 /* If avail_idx is larger than virtqueue's last_avail_idx, then there is unprocessed reqs.
     206             :                  * avail_idx should get updated here from memory, in case of race condition with guest.
     207             :                  */
     208           0 :                 avail_idx = * (volatile uint16_t *) &avail->idx;
     209           0 :                 if (avail_idx > virtqueue->last_avail_idx) {
     210             :                         /* Write to notify vring's kickfd */
     211           0 :                         rc = write(vring->kickfd, &u64_value, sizeof(u64_value));
     212           0 :                         if (rc < 0) {
     213           0 :                                 SPDK_ERRLOG("failed to kick vring: %s.\n", spdk_strerror(errno));
     214           0 :                                 return -errno;
     215             :                         }
     216             :                 }
     217             :         }
     218             : 
     219          19 :         for (i = 0; i < count; i++) {
     220          16 :                 reqs[i] = vring->avail->ring[(last_idx + i) & size_mask];
     221             :         }
     222             : 
     223           3 :         SPDK_DEBUGLOG(vhost_ring,
     224             :                       "AVAIL: last_idx=%"PRIu16" avail_idx=%"PRIu16" count=%"PRIu16"\n",
     225             :                       last_idx, avail_idx, count);
     226             : 
     227           3 :         return count;
     228             : }
     229             : 
     230             : static bool
     231           0 : vhost_vring_desc_is_indirect(struct vring_desc *cur_desc)
     232             : {
     233           0 :         return !!(cur_desc->flags & VRING_DESC_F_INDIRECT);
     234             : }
     235             : 
     236             : static bool
     237           7 : vhost_vring_packed_desc_is_indirect(struct vring_packed_desc *cur_desc)
     238             : {
     239           7 :         return (cur_desc->flags & VRING_DESC_F_INDIRECT) != 0;
     240             : }
     241             : 
     242             : static bool
     243           0 : vhost_inflight_packed_desc_is_indirect(spdk_vhost_inflight_desc *cur_desc)
     244             : {
     245           0 :         return (cur_desc->flags & VRING_DESC_F_INDIRECT) != 0;
     246             : }
     247             : 
     248             : int
     249           0 : vhost_vq_get_desc(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *virtqueue,
     250             :                   uint16_t req_idx, struct vring_desc **desc, struct vring_desc **desc_table,
     251             :                   uint32_t *desc_table_size)
     252             : {
     253           0 :         if (spdk_unlikely(req_idx >= virtqueue->vring.size)) {
     254           0 :                 return -1;
     255             :         }
     256             : 
     257           0 :         *desc = &virtqueue->vring.desc[req_idx];
     258             : 
     259           0 :         if (vhost_vring_desc_is_indirect(*desc)) {
     260           0 :                 *desc_table_size = (*desc)->len / sizeof(**desc);
     261           0 :                 *desc_table = vhost_gpa_to_vva(vsession, (*desc)->addr,
     262           0 :                                                sizeof(**desc) * *desc_table_size);
     263           0 :                 *desc = *desc_table;
     264           0 :                 if (*desc == NULL) {
     265           0 :                         return -1;
     266             :                 }
     267             : 
     268           0 :                 return 0;
     269             :         }
     270             : 
     271           0 :         *desc_table = virtqueue->vring.desc;
     272           0 :         *desc_table_size = virtqueue->vring.size;
     273             : 
     274           0 :         return 0;
     275             : }
     276             : 
     277             : static bool
     278           0 : vhost_packed_desc_indirect_to_desc_table(struct spdk_vhost_session *vsession,
     279             :                 uint64_t addr, uint32_t len,
     280             :                 struct vring_packed_desc **desc_table,
     281             :                 uint32_t *desc_table_size)
     282             : {
     283           0 :         *desc_table_size = len / sizeof(struct vring_packed_desc);
     284             : 
     285           0 :         *desc_table = vhost_gpa_to_vva(vsession, addr, len);
     286           0 :         if (spdk_unlikely(*desc_table == NULL)) {
     287           0 :                 return false;
     288             :         }
     289             : 
     290           0 :         return true;
     291             : }
     292             : 
     293             : int
     294           0 : vhost_vq_get_desc_packed(struct spdk_vhost_session *vsession,
     295             :                          struct spdk_vhost_virtqueue *virtqueue,
     296             :                          uint16_t req_idx, struct vring_packed_desc **desc,
     297             :                          struct vring_packed_desc **desc_table, uint32_t *desc_table_size)
     298             : {
     299           0 :         *desc =  &virtqueue->vring.desc_packed[req_idx];
     300             : 
     301             :         /* In packed ring when the desc is non-indirect we get next desc
     302             :          * by judging (desc->flag & VRING_DESC_F_NEXT) != 0. When the desc
     303             :          * is indirect we get next desc by idx and desc_table_size. It's
     304             :          * different from split ring.
     305             :          */
     306           0 :         if (vhost_vring_packed_desc_is_indirect(*desc)) {
     307           0 :                 if (!vhost_packed_desc_indirect_to_desc_table(vsession, (*desc)->addr, (*desc)->len,
     308             :                                 desc_table, desc_table_size)) {
     309           0 :                         return -1;
     310             :                 }
     311             : 
     312           0 :                 *desc = *desc_table;
     313             :         } else {
     314           0 :                 *desc_table = NULL;
     315           0 :                 *desc_table_size  = 0;
     316             :         }
     317             : 
     318           0 :         return 0;
     319             : }
     320             : 
     321             : int
     322           0 : vhost_inflight_queue_get_desc(struct spdk_vhost_session *vsession,
     323             :                               spdk_vhost_inflight_desc *desc_array,
     324             :                               uint16_t req_idx, spdk_vhost_inflight_desc **desc,
     325             :                               struct vring_packed_desc  **desc_table, uint32_t *desc_table_size)
     326             : {
     327           0 :         *desc = &desc_array[req_idx];
     328             : 
     329           0 :         if (vhost_inflight_packed_desc_is_indirect(*desc)) {
     330           0 :                 if (!vhost_packed_desc_indirect_to_desc_table(vsession, (*desc)->addr, (*desc)->len,
     331             :                                 desc_table, desc_table_size)) {
     332           0 :                         return -1;
     333             :                 }
     334             : 
     335             :                 /* This desc is the inflight desc not the packed desc.
     336             :                  * When set the F_INDIRECT the table entry should be the packed desc
     337             :                  * so set the inflight desc NULL.
     338             :                  */
     339           0 :                 *desc = NULL;
     340             :         } else {
     341             :                 /* When not set the F_INDIRECT means there is no packed desc table */
     342           0 :                 *desc_table = NULL;
     343           0 :                 *desc_table_size = 0;
     344             :         }
     345             : 
     346           0 :         return 0;
     347             : }
     348             : 
     349             : int
     350           0 : vhost_vq_used_signal(struct spdk_vhost_session *vsession,
     351             :                      struct spdk_vhost_virtqueue *virtqueue)
     352             : {
     353           0 :         if (virtqueue->used_req_cnt == 0) {
     354           0 :                 return 0;
     355             :         }
     356             : 
     357           0 :         SPDK_DEBUGLOG(vhost_ring,
     358             :                       "Queue %td - USED RING: sending IRQ: last used %"PRIu16"\n",
     359             :                       virtqueue - vsession->virtqueue, virtqueue->last_used_idx);
     360             : 
     361             : #if RTE_VERSION < RTE_VERSION_NUM(22, 11, 0, 0)
     362             :         if (rte_vhost_vring_call(vsession->vid, virtqueue->vring_idx) == 0) {
     363             : #else
     364           0 :         if (rte_vhost_vring_call_nonblock(vsession->vid, virtqueue->vring_idx) == 0) {
     365             : #endif
     366             :                 /* interrupt signalled */
     367           0 :                 virtqueue->req_cnt += virtqueue->used_req_cnt;
     368           0 :                 virtqueue->used_req_cnt = 0;
     369           0 :                 return 1;
     370             :         } else {
     371             :                 /* interrupt not signalled */
     372           0 :                 return 0;
     373             :         }
     374             : }
     375             : 
     376             : static void
     377           0 : session_vq_io_stats_update(struct spdk_vhost_session *vsession,
     378             :                            struct spdk_vhost_virtqueue *virtqueue, uint64_t now)
     379             : {
     380           0 :         uint32_t irq_delay_base = vsession->coalescing_delay_time_base;
     381           0 :         uint32_t io_threshold = vsession->coalescing_io_rate_threshold;
     382             :         int32_t irq_delay;
     383             :         uint32_t req_cnt;
     384             : 
     385           0 :         req_cnt = virtqueue->req_cnt + virtqueue->used_req_cnt;
     386           0 :         if (req_cnt <= io_threshold) {
     387           0 :                 return;
     388             :         }
     389             : 
     390           0 :         irq_delay = (irq_delay_base * (req_cnt - io_threshold)) / io_threshold;
     391           0 :         virtqueue->irq_delay_time = (uint32_t) spdk_max(0, irq_delay);
     392             : 
     393           0 :         virtqueue->req_cnt = 0;
     394           0 :         virtqueue->next_event_time = now;
     395             : }
     396             : 
     397             : static void
     398           0 : check_session_vq_io_stats(struct spdk_vhost_session *vsession,
     399             :                           struct spdk_vhost_virtqueue *virtqueue, uint64_t now)
     400             : {
     401           0 :         if (now < vsession->next_stats_check_time) {
     402           0 :                 return;
     403             :         }
     404             : 
     405           0 :         vsession->next_stats_check_time = now + vsession->stats_check_interval;
     406           0 :         session_vq_io_stats_update(vsession, virtqueue, now);
     407             : }
     408             : 
     409             : static inline bool
     410           0 : vhost_vq_event_is_suppressed(struct spdk_vhost_virtqueue *vq)
     411             : {
     412           0 :         spdk_smp_mb();
     413             : 
     414           0 :         if (spdk_unlikely(vq->packed.packed_ring)) {
     415           0 :                 if (vq->vring.driver_event->flags & VRING_PACKED_EVENT_FLAG_DISABLE) {
     416           0 :                         return true;
     417             :                 }
     418             :         } else {
     419           0 :                 if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) {
     420           0 :                         return true;
     421             :                 }
     422             :         }
     423             : 
     424           0 :         return false;
     425             : }
     426             : 
     427             : void
     428           0 : vhost_session_vq_used_signal(struct spdk_vhost_virtqueue *virtqueue)
     429             : {
     430           0 :         struct spdk_vhost_session *vsession = virtqueue->vsession;
     431             :         uint64_t now;
     432             : 
     433           0 :         if (vsession->coalescing_delay_time_base == 0) {
     434           0 :                 if (virtqueue->vring.desc == NULL) {
     435           0 :                         return;
     436             :                 }
     437             : 
     438           0 :                 if (vhost_vq_event_is_suppressed(virtqueue)) {
     439           0 :                         return;
     440             :                 }
     441             : 
     442           0 :                 vhost_vq_used_signal(vsession, virtqueue);
     443             :         } else {
     444           0 :                 now = spdk_get_ticks();
     445           0 :                 check_session_vq_io_stats(vsession, virtqueue, now);
     446             : 
     447             :                 /* No need for event right now */
     448           0 :                 if (now < virtqueue->next_event_time) {
     449           0 :                         return;
     450             :                 }
     451             : 
     452           0 :                 if (vhost_vq_event_is_suppressed(virtqueue)) {
     453           0 :                         return;
     454             :                 }
     455             : 
     456           0 :                 if (!vhost_vq_used_signal(vsession, virtqueue)) {
     457           0 :                         return;
     458             :                 }
     459             : 
     460             :                 /* Syscall is quite long so update time */
     461           0 :                 now = spdk_get_ticks();
     462           0 :                 virtqueue->next_event_time = now + virtqueue->irq_delay_time;
     463             :         }
     464             : }
     465             : 
     466             : /*
     467             :  * Enqueue id and len to used ring.
     468             :  */
     469             : void
     470           0 : vhost_vq_used_ring_enqueue(struct spdk_vhost_session *vsession,
     471             :                            struct spdk_vhost_virtqueue *virtqueue,
     472             :                            uint16_t id, uint32_t len)
     473             : {
     474           0 :         struct rte_vhost_vring *vring = &virtqueue->vring;
     475           0 :         struct vring_used *used = vring->used;
     476           0 :         uint16_t last_idx = virtqueue->last_used_idx & (vring->size - 1);
     477           0 :         uint16_t vq_idx = virtqueue->vring_idx;
     478             : 
     479           0 :         SPDK_DEBUGLOG(vhost_ring,
     480             :                       "Queue %td - USED RING: last_idx=%"PRIu16" req id=%"PRIu16" len=%"PRIu32"\n",
     481             :                       virtqueue - vsession->virtqueue, virtqueue->last_used_idx, id, len);
     482             : 
     483           0 :         vhost_log_req_desc(vsession, virtqueue, id);
     484             : 
     485           0 :         virtqueue->last_used_idx++;
     486           0 :         used->ring[last_idx].id = id;
     487           0 :         used->ring[last_idx].len = len;
     488             : 
     489             :         /* Ensure the used ring is updated before we log it or increment used->idx. */
     490           0 :         spdk_smp_wmb();
     491             : 
     492           0 :         rte_vhost_set_last_inflight_io_split(vsession->vid, vq_idx, id);
     493             : 
     494           0 :         vhost_log_used_vring_elem(vsession, virtqueue, last_idx);
     495           0 :         * (volatile uint16_t *) &used->idx = virtqueue->last_used_idx;
     496           0 :         vhost_log_used_vring_idx(vsession, virtqueue);
     497             : 
     498           0 :         rte_vhost_clr_inflight_desc_split(vsession->vid, vq_idx, virtqueue->last_used_idx, id);
     499             : 
     500           0 :         virtqueue->used_req_cnt++;
     501             : 
     502           0 :         if (vsession->interrupt_mode) {
     503           0 :                 if (virtqueue->vring.desc == NULL || vhost_vq_event_is_suppressed(virtqueue)) {
     504           0 :                         return;
     505             :                 }
     506             : 
     507           0 :                 vhost_vq_used_signal(vsession, virtqueue);
     508             :         }
     509             : }
     510             : 
     511             : void
     512           7 : vhost_vq_packed_ring_enqueue(struct spdk_vhost_session *vsession,
     513             :                              struct spdk_vhost_virtqueue *virtqueue,
     514             :                              uint16_t num_descs, uint16_t buffer_id,
     515             :                              uint32_t length, uint16_t inflight_head)
     516             : {
     517           7 :         struct vring_packed_desc *desc = &virtqueue->vring.desc_packed[virtqueue->last_used_idx];
     518             :         bool used, avail;
     519             : 
     520           7 :         SPDK_DEBUGLOG(vhost_ring,
     521             :                       "Queue %td - RING: buffer_id=%"PRIu16"\n",
     522             :                       virtqueue - vsession->virtqueue, buffer_id);
     523             : 
     524             :         /* When the descriptor is used, two flags in descriptor
     525             :          * avail flag and used flag are set to equal
     526             :          * and used flag value == used_wrap_counter.
     527             :          */
     528           7 :         used = !!(desc->flags & VRING_DESC_F_USED);
     529           7 :         avail = !!(desc->flags & VRING_DESC_F_AVAIL);
     530           7 :         if (spdk_unlikely(used == virtqueue->packed.used_phase && used == avail)) {
     531           0 :                 SPDK_ERRLOG("descriptor has been used before\n");
     532           0 :                 return;
     533             :         }
     534             : 
     535             :         /* In used desc addr is unused and len specifies the buffer length
     536             :          * that has been written to by the device.
     537             :          */
     538           7 :         desc->addr = 0;
     539           7 :         desc->len = length;
     540             : 
     541             :         /* This bit specifies whether any data has been written by the device */
     542           7 :         if (length != 0) {
     543           7 :                 desc->flags |= VRING_DESC_F_WRITE;
     544             :         }
     545             : 
     546             :         /* Buffer ID is included in the last descriptor in the list.
     547             :          * The driver needs to keep track of the size of the list corresponding
     548             :          * to each buffer ID.
     549             :          */
     550           7 :         desc->id = buffer_id;
     551             : 
     552             :         /* A device MUST NOT make the descriptor used before buffer_id is
     553             :          * written to the descriptor.
     554             :          */
     555           7 :         spdk_smp_wmb();
     556             : 
     557           7 :         rte_vhost_set_last_inflight_io_packed(vsession->vid, virtqueue->vring_idx, inflight_head);
     558             :         /* To mark a desc as used, the device sets the F_USED bit in flags to match
     559             :          * the internal Device ring wrap counter. It also sets the F_AVAIL bit to
     560             :          * match the same value.
     561             :          */
     562           7 :         if (virtqueue->packed.used_phase) {
     563           4 :                 desc->flags |= VRING_DESC_F_AVAIL_USED;
     564             :         } else {
     565           3 :                 desc->flags &= ~VRING_DESC_F_AVAIL_USED;
     566             :         }
     567           7 :         rte_vhost_clr_inflight_desc_packed(vsession->vid, virtqueue->vring_idx, inflight_head);
     568             : 
     569           7 :         vhost_log_used_vring_elem(vsession, virtqueue, virtqueue->last_used_idx);
     570           7 :         virtqueue->last_used_idx += num_descs;
     571           7 :         if (virtqueue->last_used_idx >= virtqueue->vring.size) {
     572           1 :                 virtqueue->last_used_idx -= virtqueue->vring.size;
     573           1 :                 virtqueue->packed.used_phase = !virtqueue->packed.used_phase;
     574             :         }
     575             : 
     576           7 :         virtqueue->used_req_cnt++;
     577             : }
     578             : 
     579             : bool
     580          12 : vhost_vq_packed_ring_is_avail(struct spdk_vhost_virtqueue *virtqueue)
     581             : {
     582          12 :         uint16_t flags = virtqueue->vring.desc_packed[virtqueue->last_avail_idx].flags;
     583             : 
     584             :         /* To mark a desc as available, the driver sets the F_AVAIL bit in flags
     585             :          * to match the internal avail wrap counter. It also sets the F_USED bit to
     586             :          * match the inverse value but it's not mandatory.
     587             :          */
     588          12 :         return (!!(flags & VRING_DESC_F_AVAIL) == virtqueue->packed.avail_phase);
     589             : }
     590             : 
     591             : bool
     592           0 : vhost_vring_packed_desc_is_wr(struct vring_packed_desc *cur_desc)
     593             : {
     594           0 :         return (cur_desc->flags & VRING_DESC_F_WRITE) != 0;
     595             : }
     596             : 
     597             : bool
     598           0 : vhost_vring_inflight_desc_is_wr(spdk_vhost_inflight_desc *cur_desc)
     599             : {
     600           0 :         return (cur_desc->flags & VRING_DESC_F_WRITE) != 0;
     601             : }
     602             : 
     603             : int
     604           0 : vhost_vring_packed_desc_get_next(struct vring_packed_desc **desc, uint16_t *req_idx,
     605             :                                  struct spdk_vhost_virtqueue *vq,
     606             :                                  struct vring_packed_desc *desc_table,
     607             :                                  uint32_t desc_table_size)
     608             : {
     609           0 :         if (desc_table != NULL) {
     610             :                 /* When the desc_table isn't NULL means it's indirect and we get the next
     611             :                  * desc by req_idx and desc_table_size. The return value is NULL means
     612             :                  * we reach the last desc of this request.
     613             :                  */
     614           0 :                 (*req_idx)++;
     615           0 :                 if (*req_idx < desc_table_size) {
     616           0 :                         *desc = &desc_table[*req_idx];
     617             :                 } else {
     618           0 :                         *desc = NULL;
     619             :                 }
     620             :         } else {
     621             :                 /* When the desc_table is NULL means it's non-indirect and we get the next
     622             :                  * desc by req_idx and F_NEXT in flags. The return value is NULL means
     623             :                  * we reach the last desc of this request. When return new desc
     624             :                  * we update the req_idx too.
     625             :                  */
     626           0 :                 if (((*desc)->flags & VRING_DESC_F_NEXT) == 0) {
     627           0 :                         *desc = NULL;
     628           0 :                         return 0;
     629             :                 }
     630             : 
     631           0 :                 *req_idx = (*req_idx + 1) % vq->vring.size;
     632           0 :                 *desc = &vq->vring.desc_packed[*req_idx];
     633             :         }
     634             : 
     635           0 :         return 0;
     636             : }
     637             : 
     638             : static int
     639           6 : vhost_vring_desc_payload_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov,
     640             :                                 uint16_t *iov_index, uintptr_t payload, uint64_t remaining)
     641             : {
     642             :         uintptr_t vva;
     643           6 :         uint64_t len;
     644             : 
     645             :         do {
     646           7 :                 if (*iov_index >= SPDK_VHOST_IOVS_MAX) {
     647           1 :                         SPDK_ERRLOG("SPDK_VHOST_IOVS_MAX(%d) reached\n", SPDK_VHOST_IOVS_MAX);
     648           1 :                         return -1;
     649             :                 }
     650           6 :                 len = remaining;
     651           6 :                 vva = (uintptr_t)rte_vhost_va_from_guest_pa(vsession->mem, payload, &len);
     652           6 :                 if (vva == 0 || len == 0) {
     653           0 :                         SPDK_ERRLOG("gpa_to_vva(%p) == NULL\n", (void *)payload);
     654           0 :                         return -1;
     655             :                 }
     656           6 :                 iov[*iov_index].iov_base = (void *)vva;
     657           6 :                 iov[*iov_index].iov_len = len;
     658           6 :                 remaining -= len;
     659           6 :                 payload += len;
     660           6 :                 (*iov_index)++;
     661           6 :         } while (remaining);
     662             : 
     663           5 :         return 0;
     664             : }
     665             : 
     666             : int
     667           0 : vhost_vring_packed_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov,
     668             :                                uint16_t *iov_index, const struct vring_packed_desc *desc)
     669             : {
     670           0 :         return vhost_vring_desc_payload_to_iov(vsession, iov, iov_index,
     671           0 :                                                desc->addr, desc->len);
     672             : }
     673             : 
     674             : int
     675           0 : vhost_vring_inflight_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov,
     676             :                                  uint16_t *iov_index, const spdk_vhost_inflight_desc *desc)
     677             : {
     678           0 :         return vhost_vring_desc_payload_to_iov(vsession, iov, iov_index,
     679           0 :                                                desc->addr, desc->len);
     680             : }
     681             : 
     682             : /* 1, Traverse the desc chain to get the buffer_id and return buffer_id as task_idx.
     683             :  * 2, Update the vq->last_avail_idx to point next available desc chain.
     684             :  * 3, Update the avail_wrap_counter if last_avail_idx overturn.
     685             :  */
     686             : uint16_t
     687           7 : vhost_vring_packed_desc_get_buffer_id(struct spdk_vhost_virtqueue *vq, uint16_t req_idx,
     688             :                                       uint16_t *num_descs)
     689             : {
     690             :         struct vring_packed_desc *desc;
     691           7 :         uint16_t desc_head = req_idx;
     692             : 
     693           7 :         *num_descs = 1;
     694             : 
     695           7 :         desc =  &vq->vring.desc_packed[req_idx];
     696           7 :         if (!vhost_vring_packed_desc_is_indirect(desc)) {
     697           7 :                 while ((desc->flags & VRING_DESC_F_NEXT) != 0) {
     698           0 :                         req_idx = (req_idx + 1) % vq->vring.size;
     699           0 :                         desc = &vq->vring.desc_packed[req_idx];
     700           0 :                         (*num_descs)++;
     701             :                 }
     702             :         }
     703             : 
     704             :         /* Queue Size doesn't have to be a power of 2
     705             :          * Device maintains last_avail_idx so we can make sure
     706             :          * the value is valid(0 ~ vring.size - 1)
     707             :          */
     708           7 :         vq->last_avail_idx = (req_idx + 1) % vq->vring.size;
     709           7 :         if (vq->last_avail_idx < desc_head) {
     710           1 :                 vq->packed.avail_phase = !vq->packed.avail_phase;
     711             :         }
     712             : 
     713           7 :         return desc->id;
     714             : }
     715             : 
     716             : int
     717           0 : vhost_vring_desc_get_next(struct vring_desc **desc,
     718             :                           struct vring_desc *desc_table, uint32_t desc_table_size)
     719             : {
     720           0 :         struct vring_desc *old_desc = *desc;
     721             :         uint16_t next_idx;
     722             : 
     723           0 :         if ((old_desc->flags & VRING_DESC_F_NEXT) == 0) {
     724           0 :                 *desc = NULL;
     725           0 :                 return 0;
     726             :         }
     727             : 
     728           0 :         next_idx = old_desc->next;
     729           0 :         if (spdk_unlikely(next_idx >= desc_table_size)) {
     730           0 :                 *desc = NULL;
     731           0 :                 return -1;
     732             :         }
     733             : 
     734           0 :         *desc = &desc_table[next_idx];
     735           0 :         return 0;
     736             : }
     737             : 
     738             : int
     739           6 : vhost_vring_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov,
     740             :                         uint16_t *iov_index, const struct vring_desc *desc)
     741             : {
     742          12 :         return vhost_vring_desc_payload_to_iov(vsession, iov, iov_index,
     743           6 :                                                desc->addr, desc->len);
     744             : }
     745             : 
     746             : static inline void
     747           0 : vhost_session_mem_region_calc(uint64_t *previous_start, uint64_t *start, uint64_t *end,
     748             :                               uint64_t *len, struct rte_vhost_mem_region *region)
     749             : {
     750           0 :         *start = FLOOR_2MB(region->mmap_addr);
     751           0 :         *end = CEIL_2MB(region->mmap_addr + region->mmap_size);
     752           0 :         if (*start == *previous_start) {
     753           0 :                 *start += (size_t) VALUE_2MB;
     754             :         }
     755           0 :         *previous_start = *start;
     756           0 :         *len = *end - *start;
     757           0 : }
     758             : 
     759             : void
     760           0 : vhost_session_mem_register(struct rte_vhost_memory *mem)
     761             : {
     762           0 :         uint64_t start, end, len;
     763             :         uint32_t i;
     764           0 :         uint64_t previous_start = UINT64_MAX;
     765             : 
     766             : 
     767           0 :         for (i = 0; i < mem->nregions; i++) {
     768           0 :                 vhost_session_mem_region_calc(&previous_start, &start, &end, &len, &mem->regions[i]);
     769           0 :                 SPDK_INFOLOG(vhost, "Registering VM memory for vtophys translation - 0x%jx len:0x%jx\n",
     770             :                              start, len);
     771             : 
     772           0 :                 if (spdk_mem_register((void *)start, len) != 0) {
     773           0 :                         SPDK_WARNLOG("Failed to register memory region %"PRIu32". Future vtophys translation might fail.\n",
     774             :                                      i);
     775           0 :                         continue;
     776             :                 }
     777             :         }
     778           0 : }
     779             : 
     780             : void
     781           0 : vhost_session_mem_unregister(struct rte_vhost_memory *mem)
     782             : {
     783           0 :         uint64_t start, end, len;
     784             :         uint32_t i;
     785           0 :         uint64_t previous_start = UINT64_MAX;
     786             : 
     787           0 :         for (i = 0; i < mem->nregions; i++) {
     788           0 :                 vhost_session_mem_region_calc(&previous_start, &start, &end, &len, &mem->regions[i]);
     789           0 :                 if (spdk_vtophys((void *) start, NULL) == SPDK_VTOPHYS_ERROR) {
     790           0 :                         continue; /* region has not been registered */
     791             :                 }
     792             : 
     793           0 :                 if (spdk_mem_unregister((void *)start, len) != 0) {
     794           0 :                         assert(false);
     795             :                 }
     796             :         }
     797           0 : }
     798             : 
     799             : static bool
     800           0 : vhost_memory_changed(struct rte_vhost_memory *new,
     801             :                      struct rte_vhost_memory *old)
     802             : {
     803             :         uint32_t i;
     804             : 
     805           0 :         if (new->nregions != old->nregions) {
     806           0 :                 return true;
     807             :         }
     808             : 
     809           0 :         for (i = 0; i < new->nregions; ++i) {
     810           0 :                 struct rte_vhost_mem_region *new_r = &new->regions[i];
     811           0 :                 struct rte_vhost_mem_region *old_r = &old->regions[i];
     812             : 
     813           0 :                 if (new_r->guest_phys_addr != old_r->guest_phys_addr) {
     814           0 :                         return true;
     815             :                 }
     816           0 :                 if (new_r->size != old_r->size) {
     817           0 :                         return true;
     818             :                 }
     819           0 :                 if (new_r->guest_user_addr != old_r->guest_user_addr) {
     820           0 :                         return true;
     821             :                 }
     822           0 :                 if (new_r->mmap_addr != old_r->mmap_addr) {
     823           0 :                         return true;
     824             :                 }
     825           0 :                 if (new_r->fd != old_r->fd) {
     826           0 :                         return true;
     827             :                 }
     828             :         }
     829             : 
     830           0 :         return false;
     831             : }
     832             : 
     833             : static int
     834           0 : vhost_register_memtable_if_required(struct spdk_vhost_session *vsession, int vid)
     835             : {
     836           0 :         struct rte_vhost_memory *new_mem;
     837             : 
     838           0 :         if (vhost_get_mem_table(vid, &new_mem) != 0) {
     839           0 :                 SPDK_ERRLOG("vhost device %d: Failed to get guest memory table\n", vid);
     840           0 :                 return -1;
     841             :         }
     842             : 
     843           0 :         if (vsession->mem == NULL) {
     844           0 :                 SPDK_INFOLOG(vhost, "Start to set memtable\n");
     845           0 :                 vsession->mem = new_mem;
     846           0 :                 vhost_session_mem_register(vsession->mem);
     847           0 :                 return 0;
     848             :         }
     849             : 
     850           0 :         if (vhost_memory_changed(new_mem, vsession->mem)) {
     851           0 :                 SPDK_INFOLOG(vhost, "Memtable is changed\n");
     852           0 :                 vhost_session_mem_unregister(vsession->mem);
     853           0 :                 free(vsession->mem);
     854             : 
     855           0 :                 vsession->mem = new_mem;
     856           0 :                 vhost_session_mem_register(vsession->mem);
     857           0 :                 return 0;
     858             : 
     859             :         }
     860             : 
     861           0 :         SPDK_INFOLOG(vhost, "Memtable is unchanged\n");
     862           0 :         free(new_mem);
     863           0 :         return 0;
     864             : }
     865             : 
     866             : static int
     867           0 : _stop_session(struct spdk_vhost_session *vsession)
     868             : {
     869             :         struct spdk_vhost_virtqueue *q;
     870             :         int rc;
     871             :         uint16_t i;
     872             : 
     873           0 :         rc = vhost_user_wait_for_session_stop(vsession, 3, "stop session");
     874           0 :         if (rc != 0) {
     875           0 :                 SPDK_ERRLOG("Couldn't stop device with vid %d.\n", vsession->vid);
     876           0 :                 return rc;
     877             :         }
     878             : 
     879           0 :         for (i = 0; i < vsession->max_queues; i++) {
     880           0 :                 q = &vsession->virtqueue[i];
     881             : 
     882             :                 /* vring.desc and vring.desc_packed are in a union struct
     883             :                  * so q->vring.desc can replace q->vring.desc_packed.
     884             :                  */
     885           0 :                 if (q->vring.desc == NULL) {
     886           0 :                         continue;
     887             :                 }
     888             : 
     889             :                 /* Packed virtqueues support up to 2^15 entries each
     890             :                  * so left one bit can be used as wrap counter.
     891             :                  */
     892           0 :                 if (q->packed.packed_ring) {
     893           0 :                         q->last_avail_idx = q->last_avail_idx |
     894           0 :                                             ((uint16_t)q->packed.avail_phase << 15);
     895           0 :                         q->last_used_idx = q->last_used_idx |
     896           0 :                                            ((uint16_t)q->packed.used_phase << 15);
     897             :                 }
     898             : 
     899           0 :                 rte_vhost_set_vring_base(vsession->vid, i, q->last_avail_idx, q->last_used_idx);
     900           0 :                 q->vring.desc = NULL;
     901             :         }
     902           0 :         vsession->max_queues = 0;
     903             : 
     904           0 :         return 0;
     905             : }
     906             : 
     907             : static int
     908           0 : new_connection(int vid)
     909             : {
     910             :         struct spdk_vhost_dev *vdev;
     911             :         struct spdk_vhost_user_dev *user_dev;
     912           0 :         struct spdk_vhost_session *vsession;
     913             :         size_t dev_dirname_len;
     914           0 :         char ifname[PATH_MAX];
     915             :         char *ctrlr_name;
     916             : 
     917           0 :         if (rte_vhost_get_ifname(vid, ifname, PATH_MAX) < 0) {
     918           0 :                 SPDK_ERRLOG("Couldn't get a valid ifname for device with vid %d\n", vid);
     919           0 :                 return -1;
     920             :         }
     921             : 
     922           0 :         ctrlr_name = &ifname[0];
     923           0 :         dev_dirname_len = strlen(g_vhost_user_dev_dirname);
     924           0 :         if (strncmp(ctrlr_name, g_vhost_user_dev_dirname, dev_dirname_len) == 0) {
     925           0 :                 ctrlr_name += dev_dirname_len;
     926             :         }
     927             : 
     928           0 :         spdk_vhost_lock();
     929           0 :         vdev = spdk_vhost_dev_find(ctrlr_name);
     930           0 :         if (vdev == NULL) {
     931           0 :                 SPDK_ERRLOG("Couldn't find device with vid %d to create connection for.\n", vid);
     932           0 :                 spdk_vhost_unlock();
     933           0 :                 return -1;
     934             :         }
     935           0 :         spdk_vhost_unlock();
     936             : 
     937           0 :         user_dev = to_user_dev(vdev);
     938           0 :         pthread_mutex_lock(&user_dev->lock);
     939           0 :         if (user_dev->registered == false) {
     940           0 :                 SPDK_ERRLOG("Device %s is unregistered\n", ctrlr_name);
     941           0 :                 pthread_mutex_unlock(&user_dev->lock);
     942           0 :                 return -1;
     943             :         }
     944             : 
     945             :         /* We expect sessions inside user_dev->vsessions to be sorted in ascending
     946             :          * order in regard of vsession->id. For now we always set id = vsessions_num++
     947             :          * and append each session to the very end of the vsessions list.
     948             :          * This is required for vhost_user_dev_foreach_session() to work.
     949             :          */
     950           0 :         if (user_dev->vsessions_num == UINT_MAX) {
     951           0 :                 pthread_mutex_unlock(&user_dev->lock);
     952           0 :                 assert(false);
     953             :                 return -EINVAL;
     954             :         }
     955             : 
     956           0 :         if (posix_memalign((void **)&vsession, SPDK_CACHE_LINE_SIZE, sizeof(*vsession) +
     957           0 :                            user_dev->user_backend->session_ctx_size)) {
     958           0 :                 SPDK_ERRLOG("vsession alloc failed\n");
     959           0 :                 pthread_mutex_unlock(&user_dev->lock);
     960           0 :                 return -1;
     961             :         }
     962           0 :         memset(vsession, 0, sizeof(*vsession) + user_dev->user_backend->session_ctx_size);
     963             : 
     964           0 :         vsession->vdev = vdev;
     965           0 :         vsession->vid = vid;
     966           0 :         vsession->id = user_dev->vsessions_num++;
     967           0 :         vsession->name = spdk_sprintf_alloc("%ss%u", vdev->name, vsession->vid);
     968           0 :         if (vsession->name == NULL) {
     969           0 :                 SPDK_ERRLOG("vsession alloc failed\n");
     970           0 :                 free(vsession);
     971           0 :                 pthread_mutex_unlock(&user_dev->lock);
     972           0 :                 return -1;
     973             :         }
     974           0 :         vsession->started = false;
     975           0 :         vsession->starting = false;
     976           0 :         vsession->next_stats_check_time = 0;
     977           0 :         vsession->stats_check_interval = SPDK_VHOST_STATS_CHECK_INTERVAL_MS *
     978           0 :                                          spdk_get_ticks_hz() / 1000UL;
     979           0 :         TAILQ_INSERT_TAIL(&user_dev->vsessions, vsession, tailq);
     980           0 :         vhost_session_install_rte_compat_hooks(vsession);
     981           0 :         pthread_mutex_unlock(&user_dev->lock);
     982             : 
     983           0 :         return 0;
     984             : }
     985             : 
     986             : static void
     987           0 : vhost_user_session_start(void *arg1)
     988             : {
     989           0 :         struct spdk_vhost_session *vsession = arg1;
     990           0 :         struct spdk_vhost_dev *vdev = vsession->vdev;
     991           0 :         struct spdk_vhost_user_dev *user_dev = to_user_dev(vsession->vdev);
     992             :         const struct spdk_vhost_user_dev_backend *backend;
     993             :         int rc;
     994             : 
     995           0 :         SPDK_INFOLOG(vhost, "Starting new session for device %s with vid %d\n", vdev->name, vsession->vid);
     996           0 :         pthread_mutex_lock(&user_dev->lock);
     997           0 :         vsession->starting = false;
     998           0 :         backend = user_dev->user_backend;
     999           0 :         rc = backend->start_session(vdev, vsession, NULL);
    1000           0 :         if (rc == 0) {
    1001           0 :                 vsession->started = true;
    1002             :         }
    1003           0 :         pthread_mutex_unlock(&user_dev->lock);
    1004           0 : }
    1005             : 
    1006             : static int
    1007           0 : set_device_vq_callfd(struct spdk_vhost_session *vsession, uint16_t qid)
    1008             : {
    1009             :         struct spdk_vhost_virtqueue *q;
    1010             : 
    1011           0 :         if (qid >= SPDK_VHOST_MAX_VQUEUES) {
    1012           0 :                 return -EINVAL;
    1013             :         }
    1014             : 
    1015           0 :         q = &vsession->virtqueue[qid];
    1016             :         /* vq isn't enabled yet */
    1017           0 :         if (q->vring_idx != qid) {
    1018           0 :                 return 0;
    1019             :         }
    1020             : 
    1021             :         /* vring.desc and vring.desc_packed are in a union struct
    1022             :          * so q->vring.desc can replace q->vring.desc_packed.
    1023             :          */
    1024           0 :         if (q->vring.desc == NULL || q->vring.size == 0) {
    1025           0 :                 return 0;
    1026             :         }
    1027             : 
    1028             :         /*
    1029             :          * Not sure right now but this look like some kind of QEMU bug and guest IO
    1030             :          * might be frozed without kicking all queues after live-migration. This look like
    1031             :          * the previous vhost instance failed to effectively deliver all interrupts before
    1032             :          * the GET_VRING_BASE message. This shouldn't harm guest since spurious interrupts
    1033             :          * should be ignored by guest virtio driver.
    1034             :          *
    1035             :          * Tested on QEMU 2.10.91 and 2.11.50.
    1036             :          *
    1037             :          * Make sure a successful call of
    1038             :          * `rte_vhost_vring_call` will happen
    1039             :          * after starting the device.
    1040             :          */
    1041           0 :         q->used_req_cnt += 1;
    1042             : 
    1043           0 :         return 0;
    1044             : }
    1045             : 
    1046             : static int
    1047           0 : enable_device_vq(struct spdk_vhost_session *vsession, uint16_t qid)
    1048             : {
    1049             :         struct spdk_vhost_virtqueue *q;
    1050             :         bool packed_ring;
    1051             :         const struct spdk_vhost_user_dev_backend *backend;
    1052             :         int rc;
    1053             : 
    1054           0 :         if (qid >= SPDK_VHOST_MAX_VQUEUES) {
    1055           0 :                 return -EINVAL;
    1056             :         }
    1057             : 
    1058           0 :         q = &vsession->virtqueue[qid];
    1059           0 :         memset(q, 0, sizeof(*q));
    1060           0 :         packed_ring = ((vsession->negotiated_features & (1ULL << VIRTIO_F_RING_PACKED)) != 0);
    1061             : 
    1062           0 :         q->vsession = vsession;
    1063           0 :         q->vring_idx = -1;
    1064           0 :         if (rte_vhost_get_vhost_vring(vsession->vid, qid, &q->vring)) {
    1065           0 :                 return 0;
    1066             :         }
    1067           0 :         q->vring_idx = qid;
    1068           0 :         rte_vhost_get_vhost_ring_inflight(vsession->vid, qid, &q->vring_inflight);
    1069             : 
    1070             :         /* vring.desc and vring.desc_packed are in a union struct
    1071             :          * so q->vring.desc can replace q->vring.desc_packed.
    1072             :          */
    1073           0 :         if (q->vring.desc == NULL || q->vring.size == 0) {
    1074           0 :                 return 0;
    1075             :         }
    1076             : 
    1077           0 :         if (rte_vhost_get_vring_base(vsession->vid, qid, &q->last_avail_idx, &q->last_used_idx)) {
    1078           0 :                 q->vring.desc = NULL;
    1079           0 :                 return 0;
    1080             :         }
    1081             : 
    1082           0 :         backend = to_user_dev(vsession->vdev)->user_backend;
    1083           0 :         rc = backend->alloc_vq_tasks(vsession, qid);
    1084           0 :         if (rc) {
    1085           0 :                 return rc;
    1086             :         }
    1087             : 
    1088             :         /*
    1089             :          * This shouldn't harm guest since spurious interrupts should be ignored by
    1090             :          * guest virtio driver.
    1091             :          *
    1092             :          * Make sure a successful call of `rte_vhost_vring_call` will happen after
    1093             :          * restarting the device.
    1094             :          */
    1095           0 :         if (vsession->needs_restart) {
    1096           0 :                 q->used_req_cnt += 1;
    1097             :         }
    1098             : 
    1099           0 :         if (packed_ring) {
    1100             :                 /* Since packed ring flag is already negociated between SPDK and VM, VM doesn't
    1101             :                  * restore `last_avail_idx` and `last_used_idx` for packed ring, so use the
    1102             :                  * inflight mem to restore the `last_avail_idx` and `last_used_idx`.
    1103             :                  */
    1104           0 :                 rte_vhost_get_vring_base_from_inflight(vsession->vid, qid, &q->last_avail_idx,
    1105             :                                                        &q->last_used_idx);
    1106             : 
    1107             :                 /* Packed virtqueues support up to 2^15 entries each
    1108             :                  * so left one bit can be used as wrap counter.
    1109             :                  */
    1110           0 :                 q->packed.avail_phase = q->last_avail_idx >> 15;
    1111           0 :                 q->last_avail_idx = q->last_avail_idx & 0x7FFF;
    1112           0 :                 q->packed.used_phase = q->last_used_idx >> 15;
    1113           0 :                 q->last_used_idx = q->last_used_idx & 0x7FFF;
    1114             : 
    1115           0 :                 if (!spdk_interrupt_mode_is_enabled()) {
    1116             :                         /* Disable I/O submission notifications, we'll be polling. */
    1117           0 :                         q->vring.device_event->flags = VRING_PACKED_EVENT_FLAG_DISABLE;
    1118             :                 } else {
    1119             :                         /* Enable I/O submission notifications, we'll be interrupting. */
    1120           0 :                         q->vring.device_event->flags = VRING_PACKED_EVENT_FLAG_ENABLE;
    1121             :                 }
    1122             :         } else {
    1123           0 :                 if (!spdk_interrupt_mode_is_enabled()) {
    1124             :                         /* Disable I/O submission notifications, we'll be polling. */
    1125           0 :                         q->vring.used->flags = VRING_USED_F_NO_NOTIFY;
    1126             :                 } else {
    1127             :                         /* Enable I/O submission notifications, we'll be interrupting. */
    1128           0 :                         q->vring.used->flags = 0;
    1129             :                 }
    1130             :         }
    1131             : 
    1132           0 :         if (spdk_interrupt_mode_is_enabled() && backend->register_vq_interrupt) {
    1133           0 :                 backend->register_vq_interrupt(vsession, q);
    1134             :         }
    1135             : 
    1136           0 :         q->packed.packed_ring = packed_ring;
    1137           0 :         vsession->max_queues = spdk_max(vsession->max_queues, qid + 1);
    1138             : 
    1139           0 :         return 0;
    1140             : }
    1141             : 
    1142             : static int
    1143           0 : start_device(int vid)
    1144             : {
    1145             :         struct spdk_vhost_dev *vdev;
    1146             :         struct spdk_vhost_session *vsession;
    1147             :         struct spdk_vhost_user_dev *user_dev;
    1148           0 :         int rc = 0;
    1149             : 
    1150           0 :         vsession = vhost_session_find_by_vid(vid);
    1151           0 :         if (vsession == NULL) {
    1152           0 :                 SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid);
    1153           0 :                 return -1;
    1154             :         }
    1155           0 :         vdev = vsession->vdev;
    1156           0 :         user_dev = to_user_dev(vdev);
    1157             : 
    1158           0 :         pthread_mutex_lock(&user_dev->lock);
    1159           0 :         if (vsession->started) {
    1160             :                 /* already started, nothing to do */
    1161           0 :                 goto out;
    1162             :         }
    1163             : 
    1164           0 :         if (!vsession->mem) {
    1165           0 :                 rc = -1;
    1166           0 :                 SPDK_ERRLOG("Session %s doesn't set memory table yet\n", vsession->name);
    1167           0 :                 goto out;
    1168             :         }
    1169             : 
    1170           0 :         vsession->starting = true;
    1171           0 :         SPDK_INFOLOG(vhost, "Session %s is scheduled to start\n", vsession->name);
    1172           0 :         vhost_user_session_set_coalescing(vdev, vsession, NULL);
    1173           0 :         spdk_thread_send_msg(vdev->thread, vhost_user_session_start, vsession);
    1174             : 
    1175           0 : out:
    1176           0 :         pthread_mutex_unlock(&user_dev->lock);
    1177           0 :         return rc;
    1178             : }
    1179             : 
    1180             : static void
    1181           0 : stop_device(int vid)
    1182             : {
    1183             :         struct spdk_vhost_session *vsession;
    1184             :         struct spdk_vhost_user_dev *user_dev;
    1185             : 
    1186           0 :         vsession = vhost_session_find_by_vid(vid);
    1187           0 :         if (vsession == NULL) {
    1188           0 :                 SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid);
    1189           0 :                 return;
    1190             :         }
    1191           0 :         user_dev = to_user_dev(vsession->vdev);
    1192             : 
    1193           0 :         pthread_mutex_lock(&user_dev->lock);
    1194           0 :         if (!vsession->started && !vsession->starting) {
    1195           0 :                 pthread_mutex_unlock(&user_dev->lock);
    1196             :                 /* already stopped, nothing to do */
    1197           0 :                 return;
    1198             :         }
    1199             : 
    1200           0 :         _stop_session(vsession);
    1201           0 :         pthread_mutex_unlock(&user_dev->lock);
    1202             : }
    1203             : 
    1204             : static void
    1205           0 : destroy_connection(int vid)
    1206             : {
    1207             :         struct spdk_vhost_session *vsession;
    1208             :         struct spdk_vhost_user_dev *user_dev;
    1209             : 
    1210           0 :         vsession = vhost_session_find_by_vid(vid);
    1211           0 :         if (vsession == NULL) {
    1212           0 :                 SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid);
    1213           0 :                 return;
    1214             :         }
    1215           0 :         user_dev = to_user_dev(vsession->vdev);
    1216             : 
    1217           0 :         pthread_mutex_lock(&user_dev->lock);
    1218           0 :         if (vsession->started || vsession->starting) {
    1219           0 :                 if (_stop_session(vsession) != 0) {
    1220           0 :                         pthread_mutex_unlock(&user_dev->lock);
    1221           0 :                         return;
    1222             :                 }
    1223             :         }
    1224             : 
    1225           0 :         if (vsession->mem) {
    1226           0 :                 vhost_session_mem_unregister(vsession->mem);
    1227           0 :                 free(vsession->mem);
    1228             :         }
    1229             : 
    1230           0 :         TAILQ_REMOVE(&to_user_dev(vsession->vdev)->vsessions, vsession, tailq);
    1231           0 :         free(vsession->name);
    1232           0 :         free(vsession);
    1233           0 :         pthread_mutex_unlock(&user_dev->lock);
    1234             : }
    1235             : 
    1236             : static const struct rte_vhost_device_ops g_spdk_vhost_ops = {
    1237             :         .new_device =  start_device,
    1238             :         .destroy_device = stop_device,
    1239             :         .new_connection = new_connection,
    1240             :         .destroy_connection = destroy_connection,
    1241             : };
    1242             : 
    1243             : static struct spdk_vhost_session *
    1244           0 : vhost_session_find_by_id(struct spdk_vhost_dev *vdev, unsigned id)
    1245             : {
    1246             :         struct spdk_vhost_session *vsession;
    1247             : 
    1248           0 :         TAILQ_FOREACH(vsession, &to_user_dev(vdev)->vsessions, tailq) {
    1249           0 :                 if (vsession->id == id) {
    1250           0 :                         return vsession;
    1251             :                 }
    1252             :         }
    1253             : 
    1254           0 :         return NULL;
    1255             : }
    1256             : 
    1257             : struct spdk_vhost_session *
    1258           2 : vhost_session_find_by_vid(int vid)
    1259             : {
    1260             :         struct spdk_vhost_dev *vdev;
    1261             :         struct spdk_vhost_session *vsession;
    1262             :         struct spdk_vhost_user_dev *user_dev;
    1263             : 
    1264           2 :         spdk_vhost_lock();
    1265           3 :         for (vdev = spdk_vhost_dev_next(NULL); vdev != NULL;
    1266           1 :              vdev = spdk_vhost_dev_next(vdev)) {
    1267           2 :                 user_dev = to_user_dev(vdev);
    1268             : 
    1269           2 :                 pthread_mutex_lock(&user_dev->lock);
    1270           3 :                 TAILQ_FOREACH(vsession, &user_dev->vsessions, tailq) {
    1271           2 :                         if (vsession->vid == vid) {
    1272           1 :                                 pthread_mutex_unlock(&user_dev->lock);
    1273           1 :                                 spdk_vhost_unlock();
    1274           1 :                                 return vsession;
    1275             :                         }
    1276             :                 }
    1277           1 :                 pthread_mutex_unlock(&user_dev->lock);
    1278             :         }
    1279           1 :         spdk_vhost_unlock();
    1280             : 
    1281           1 :         return NULL;
    1282             : }
    1283             : 
    1284             : static void
    1285           0 : wait_for_semaphore(int timeout_sec, const char *errmsg)
    1286             : {
    1287           0 :         struct timespec timeout;
    1288             :         int rc;
    1289             : 
    1290           0 :         clock_gettime(CLOCK_REALTIME, &timeout);
    1291           0 :         timeout.tv_sec += timeout_sec;
    1292           0 :         rc = sem_timedwait(&g_dpdk_sem, &timeout);
    1293           0 :         if (rc != 0) {
    1294           0 :                 SPDK_ERRLOG("Timeout waiting for event: %s.\n", errmsg);
    1295           0 :                 sem_wait(&g_dpdk_sem);
    1296             :         }
    1297           0 : }
    1298             : 
    1299             : void
    1300           0 : vhost_user_session_stop_done(struct spdk_vhost_session *vsession, int response)
    1301             : {
    1302           0 :         if (response == 0) {
    1303           0 :                 vsession->started = false;
    1304             :         }
    1305             : 
    1306           0 :         g_dpdk_response = response;
    1307           0 :         sem_post(&g_dpdk_sem);
    1308           0 : }
    1309             : 
    1310             : static void
    1311           0 : vhost_user_session_stop_event(void *arg1)
    1312             : {
    1313           0 :         struct vhost_session_fn_ctx *ctx = arg1;
    1314           0 :         struct spdk_vhost_dev *vdev = ctx->vdev;
    1315           0 :         struct spdk_vhost_user_dev *user_dev = to_user_dev(vdev);
    1316             :         struct spdk_vhost_session *vsession;
    1317             : 
    1318           0 :         if (pthread_mutex_trylock(&user_dev->lock) != 0) {
    1319           0 :                 spdk_thread_send_msg(spdk_get_thread(), vhost_user_session_stop_event, arg1);
    1320           0 :                 return;
    1321             :         }
    1322             : 
    1323           0 :         vsession = vhost_session_find_by_id(vdev, ctx->vsession_id);
    1324           0 :         user_dev->user_backend->stop_session(vdev, vsession, NULL);
    1325           0 :         pthread_mutex_unlock(&user_dev->lock);
    1326             : }
    1327             : 
    1328             : static int
    1329           0 : vhost_user_wait_for_session_stop(struct spdk_vhost_session *vsession,
    1330             :                                  unsigned timeout_sec, const char *errmsg)
    1331             : {
    1332           0 :         struct vhost_session_fn_ctx ev_ctx = {0};
    1333           0 :         struct spdk_vhost_dev *vdev = vsession->vdev;
    1334           0 :         struct spdk_vhost_user_dev *user_dev = to_user_dev(vdev);
    1335             : 
    1336           0 :         ev_ctx.vdev = vdev;
    1337           0 :         ev_ctx.vsession_id = vsession->id;
    1338             : 
    1339           0 :         spdk_thread_send_msg(vdev->thread, vhost_user_session_stop_event, &ev_ctx);
    1340             : 
    1341           0 :         pthread_mutex_unlock(&user_dev->lock);
    1342           0 :         wait_for_semaphore(timeout_sec, errmsg);
    1343           0 :         pthread_mutex_lock(&user_dev->lock);
    1344             : 
    1345           0 :         return g_dpdk_response;
    1346             : }
    1347             : 
    1348             : static void
    1349           0 : foreach_session_finish_cb(void *arg1)
    1350             : {
    1351           0 :         struct vhost_session_fn_ctx *ev_ctx = arg1;
    1352           0 :         struct spdk_vhost_dev *vdev = ev_ctx->vdev;
    1353           0 :         struct spdk_vhost_user_dev *user_dev = to_user_dev(vdev);
    1354             : 
    1355           0 :         if (pthread_mutex_trylock(&user_dev->lock) != 0) {
    1356           0 :                 spdk_thread_send_msg(spdk_get_thread(),
    1357             :                                      foreach_session_finish_cb, arg1);
    1358           0 :                 return;
    1359             :         }
    1360             : 
    1361           0 :         assert(user_dev->pending_async_op_num > 0);
    1362           0 :         user_dev->pending_async_op_num--;
    1363           0 :         if (ev_ctx->cpl_fn != NULL) {
    1364           0 :                 ev_ctx->cpl_fn(vdev, ev_ctx->user_ctx);
    1365             :         }
    1366             : 
    1367           0 :         pthread_mutex_unlock(&user_dev->lock);
    1368           0 :         free(ev_ctx);
    1369             : }
    1370             : 
    1371             : static void
    1372           0 : foreach_session(void *arg1)
    1373             : {
    1374           0 :         struct vhost_session_fn_ctx *ev_ctx = arg1;
    1375           0 :         struct spdk_vhost_dev *vdev = ev_ctx->vdev;
    1376           0 :         struct spdk_vhost_user_dev *user_dev = to_user_dev(vdev);
    1377             :         struct spdk_vhost_session *vsession;
    1378             :         int rc;
    1379             : 
    1380           0 :         if (pthread_mutex_trylock(&user_dev->lock) != 0) {
    1381           0 :                 spdk_thread_send_msg(spdk_get_thread(), foreach_session, arg1);
    1382           0 :                 return;
    1383             :         }
    1384             : 
    1385           0 :         TAILQ_FOREACH(vsession, &user_dev->vsessions, tailq) {
    1386           0 :                 rc = ev_ctx->cb_fn(vdev, vsession, ev_ctx->user_ctx);
    1387           0 :                 if (rc < 0) {
    1388           0 :                         goto out;
    1389             :                 }
    1390             :         }
    1391             : 
    1392           0 : out:
    1393           0 :         pthread_mutex_unlock(&user_dev->lock);
    1394           0 :         spdk_thread_send_msg(g_vhost_user_init_thread, foreach_session_finish_cb, arg1);
    1395             : }
    1396             : 
    1397             : void
    1398           0 : vhost_user_dev_foreach_session(struct spdk_vhost_dev *vdev,
    1399             :                                spdk_vhost_session_fn fn,
    1400             :                                spdk_vhost_dev_fn cpl_fn,
    1401             :                                void *arg)
    1402             : {
    1403             :         struct vhost_session_fn_ctx *ev_ctx;
    1404           0 :         struct spdk_vhost_user_dev *user_dev = to_user_dev(vdev);
    1405             : 
    1406           0 :         ev_ctx = calloc(1, sizeof(*ev_ctx));
    1407           0 :         if (ev_ctx == NULL) {
    1408           0 :                 SPDK_ERRLOG("Failed to alloc vhost event.\n");
    1409           0 :                 assert(false);
    1410             :                 return;
    1411             :         }
    1412             : 
    1413           0 :         ev_ctx->vdev = vdev;
    1414           0 :         ev_ctx->cb_fn = fn;
    1415           0 :         ev_ctx->cpl_fn = cpl_fn;
    1416           0 :         ev_ctx->user_ctx = arg;
    1417             : 
    1418           0 :         pthread_mutex_lock(&user_dev->lock);
    1419           0 :         assert(user_dev->pending_async_op_num < UINT32_MAX);
    1420           0 :         user_dev->pending_async_op_num++;
    1421           0 :         pthread_mutex_unlock(&user_dev->lock);
    1422             : 
    1423           0 :         spdk_thread_send_msg(vdev->thread, foreach_session, ev_ctx);
    1424             : }
    1425             : 
    1426             : void
    1427           0 : vhost_user_session_set_interrupt_mode(struct spdk_vhost_session *vsession, bool interrupt_mode)
    1428             : {
    1429             :         uint16_t i;
    1430           0 :         int rc = 0;
    1431             : 
    1432           0 :         for (i = 0; i < vsession->max_queues; i++) {
    1433           0 :                 struct spdk_vhost_virtqueue *q = &vsession->virtqueue[i];
    1434           0 :                 uint64_t num_events = 1;
    1435             : 
    1436             :                 /* vring.desc and vring.desc_packed are in a union struct
    1437             :                  * so q->vring.desc can replace q->vring.desc_packed.
    1438             :                  */
    1439           0 :                 if (q->vring.desc == NULL || q->vring.size == 0) {
    1440           0 :                         continue;
    1441             :                 }
    1442             : 
    1443           0 :                 if (interrupt_mode) {
    1444             : 
    1445             :                         /* In case of race condition, always kick vring when switch to intr */
    1446           0 :                         rc = write(q->vring.kickfd, &num_events, sizeof(num_events));
    1447           0 :                         if (rc < 0) {
    1448           0 :                                 SPDK_ERRLOG("failed to kick vring: %s.\n", spdk_strerror(errno));
    1449             :                         }
    1450             : 
    1451           0 :                         vsession->interrupt_mode = true;
    1452             :                 } else {
    1453             : 
    1454           0 :                         vsession->interrupt_mode = false;
    1455             :                 }
    1456             :         }
    1457           0 : }
    1458             : 
    1459             : static int
    1460           0 : extern_vhost_pre_msg_handler(int vid, void *_msg)
    1461             : {
    1462           0 :         struct vhost_user_msg *msg = _msg;
    1463             :         struct spdk_vhost_session *vsession;
    1464             :         struct spdk_vhost_user_dev *user_dev;
    1465             : 
    1466           0 :         vsession = vhost_session_find_by_vid(vid);
    1467           0 :         if (vsession == NULL) {
    1468           0 :                 SPDK_ERRLOG("Received a message to uninitialized session (vid %d).\n", vid);
    1469           0 :                 assert(false);
    1470             :                 return RTE_VHOST_MSG_RESULT_ERR;
    1471             :         }
    1472           0 :         user_dev = to_user_dev(vsession->vdev);
    1473             : 
    1474           0 :         switch (msg->request) {
    1475           0 :         case VHOST_USER_GET_VRING_BASE:
    1476           0 :                 pthread_mutex_lock(&user_dev->lock);
    1477           0 :                 if (vsession->started || vsession->starting) {
    1478           0 :                         pthread_mutex_unlock(&user_dev->lock);
    1479           0 :                         g_spdk_vhost_ops.destroy_device(vid);
    1480           0 :                         break;
    1481             :                 }
    1482           0 :                 pthread_mutex_unlock(&user_dev->lock);
    1483           0 :                 break;
    1484           0 :         case VHOST_USER_SET_MEM_TABLE:
    1485           0 :                 pthread_mutex_lock(&user_dev->lock);
    1486           0 :                 if (vsession->started || vsession->starting) {
    1487           0 :                         vsession->original_max_queues = vsession->max_queues;
    1488           0 :                         pthread_mutex_unlock(&user_dev->lock);
    1489           0 :                         g_spdk_vhost_ops.destroy_device(vid);
    1490           0 :                         vsession->needs_restart = true;
    1491           0 :                         break;
    1492             :                 }
    1493           0 :                 pthread_mutex_unlock(&user_dev->lock);
    1494           0 :                 break;
    1495           0 :         case VHOST_USER_GET_CONFIG: {
    1496           0 :                 int rc = 0;
    1497             : 
    1498           0 :                 pthread_mutex_lock(&user_dev->lock);
    1499           0 :                 if (vsession->vdev->backend->vhost_get_config) {
    1500           0 :                         rc = vsession->vdev->backend->vhost_get_config(vsession->vdev,
    1501           0 :                                         msg->payload.cfg.region, msg->payload.cfg.size);
    1502           0 :                         if (rc != 0) {
    1503           0 :                                 msg->size = 0;
    1504             :                         }
    1505             :                 }
    1506           0 :                 pthread_mutex_unlock(&user_dev->lock);
    1507             : 
    1508           0 :                 return RTE_VHOST_MSG_RESULT_REPLY;
    1509             :         }
    1510           0 :         case VHOST_USER_SET_CONFIG: {
    1511           0 :                 int rc = 0;
    1512             : 
    1513           0 :                 pthread_mutex_lock(&user_dev->lock);
    1514           0 :                 if (vsession->vdev->backend->vhost_set_config) {
    1515           0 :                         rc = vsession->vdev->backend->vhost_set_config(vsession->vdev,
    1516           0 :                                         msg->payload.cfg.region, msg->payload.cfg.offset,
    1517             :                                         msg->payload.cfg.size, msg->payload.cfg.flags);
    1518             :                 }
    1519           0 :                 pthread_mutex_unlock(&user_dev->lock);
    1520             : 
    1521           0 :                 return rc == 0 ? RTE_VHOST_MSG_RESULT_OK : RTE_VHOST_MSG_RESULT_ERR;
    1522             :         }
    1523           0 :         default:
    1524           0 :                 break;
    1525             :         }
    1526             : 
    1527           0 :         return RTE_VHOST_MSG_RESULT_NOT_HANDLED;
    1528             : }
    1529             : 
    1530             : static int
    1531           0 : extern_vhost_post_msg_handler(int vid, void *_msg)
    1532             : {
    1533           0 :         struct vhost_user_msg *msg = _msg;
    1534             :         struct spdk_vhost_session *vsession;
    1535             :         struct spdk_vhost_user_dev *user_dev;
    1536             :         uint16_t qid;
    1537             :         int rc;
    1538             : 
    1539           0 :         vsession = vhost_session_find_by_vid(vid);
    1540           0 :         if (vsession == NULL) {
    1541           0 :                 SPDK_ERRLOG("Received a message to uninitialized session (vid %d).\n", vid);
    1542           0 :                 assert(false);
    1543             :                 return RTE_VHOST_MSG_RESULT_ERR;
    1544             :         }
    1545           0 :         user_dev = to_user_dev(vsession->vdev);
    1546             : 
    1547           0 :         switch (msg->request) {
    1548           0 :         case VHOST_USER_SET_FEATURES:
    1549           0 :                 rc = vhost_get_negotiated_features(vid, &vsession->negotiated_features);
    1550           0 :                 if (rc) {
    1551           0 :                         SPDK_ERRLOG("vhost device %d: Failed to get negotiated driver features\n", vid);
    1552           0 :                         return RTE_VHOST_MSG_RESULT_ERR;
    1553             :                 }
    1554           0 :                 break;
    1555           0 :         case VHOST_USER_SET_VRING_CALL:
    1556           0 :                 qid = (uint16_t)msg->payload.u64;
    1557           0 :                 rc = set_device_vq_callfd(vsession, qid);
    1558           0 :                 if (rc) {
    1559           0 :                         return RTE_VHOST_MSG_RESULT_ERR;
    1560             :                 }
    1561           0 :                 break;
    1562           0 :         case VHOST_USER_SET_VRING_KICK:
    1563           0 :                 qid = (uint16_t)msg->payload.u64;
    1564           0 :                 rc = enable_device_vq(vsession, qid);
    1565           0 :                 if (rc) {
    1566           0 :                         return RTE_VHOST_MSG_RESULT_ERR;
    1567             :                 }
    1568             : 
    1569             :                 /* vhost-user spec tells us to start polling a queue after receiving
    1570             :                  * its SET_VRING_KICK message. Let's do it!
    1571             :                  */
    1572           0 :                 pthread_mutex_lock(&user_dev->lock);
    1573           0 :                 if (!vsession->started && !vsession->starting) {
    1574           0 :                         pthread_mutex_unlock(&user_dev->lock);
    1575           0 :                         g_spdk_vhost_ops.new_device(vid);
    1576           0 :                         return RTE_VHOST_MSG_RESULT_NOT_HANDLED;
    1577             :                 }
    1578           0 :                 pthread_mutex_unlock(&user_dev->lock);
    1579           0 :                 break;
    1580           0 :         case VHOST_USER_SET_MEM_TABLE:
    1581           0 :                 vhost_register_memtable_if_required(vsession, vid);
    1582           0 :                 pthread_mutex_lock(&user_dev->lock);
    1583           0 :                 if (vsession->needs_restart) {
    1584           0 :                         pthread_mutex_unlock(&user_dev->lock);
    1585           0 :                         for (qid = 0; qid < vsession->original_max_queues; qid++) {
    1586           0 :                                 enable_device_vq(vsession, qid);
    1587             :                         }
    1588           0 :                         vsession->original_max_queues = 0;
    1589           0 :                         vsession->needs_restart = false;
    1590           0 :                         g_spdk_vhost_ops.new_device(vid);
    1591           0 :                         break;
    1592             :                 }
    1593           0 :                 pthread_mutex_unlock(&user_dev->lock);
    1594           0 :                 break;
    1595           0 :         default:
    1596           0 :                 break;
    1597             :         }
    1598             : 
    1599           0 :         return RTE_VHOST_MSG_RESULT_NOT_HANDLED;
    1600             : }
    1601             : 
    1602             : struct rte_vhost_user_extern_ops g_spdk_extern_vhost_ops = {
    1603             :         .pre_msg_handle = extern_vhost_pre_msg_handler,
    1604             :         .post_msg_handle = extern_vhost_post_msg_handler,
    1605             : };
    1606             : 
    1607             : void
    1608           0 : vhost_session_install_rte_compat_hooks(struct spdk_vhost_session *vsession)
    1609             : {
    1610             :         int rc;
    1611             : 
    1612           0 :         rc = rte_vhost_extern_callback_register(vsession->vid, &g_spdk_extern_vhost_ops, NULL);
    1613           0 :         if (rc != 0) {
    1614           0 :                 SPDK_ERRLOG("rte_vhost_extern_callback_register() failed for vid = %d\n",
    1615             :                             vsession->vid);
    1616           0 :                 return;
    1617             :         }
    1618             : }
    1619             : 
    1620             : int
    1621           9 : vhost_register_unix_socket(const char *path, const char *ctrl_name,
    1622             :                            uint64_t virtio_features, uint64_t disabled_features, uint64_t protocol_features)
    1623             : {
    1624           9 :         struct stat file_stat;
    1625           9 :         uint64_t features = 0;
    1626           9 :         uint64_t flags = 0;
    1627             : 
    1628             :         /* Register vhost driver to handle vhost messages. */
    1629           9 :         if (stat(path, &file_stat) != -1) {
    1630           0 :                 if (!S_ISSOCK(file_stat.st_mode)) {
    1631           0 :                         SPDK_ERRLOG("Cannot create a domain socket at path \"%s\": "
    1632             :                                     "The file already exists and is not a socket.\n",
    1633             :                                     path);
    1634           0 :                         return -EIO;
    1635           0 :                 } else if (unlink(path) != 0) {
    1636           0 :                         SPDK_ERRLOG("Cannot create a domain socket at path \"%s\": "
    1637             :                                     "The socket already exists and failed to unlink.\n",
    1638             :                                     path);
    1639           0 :                         return -EIO;
    1640             :                 }
    1641             :         }
    1642             : 
    1643           9 :         flags = spdk_iommu_is_enabled() ? 0 : RTE_VHOST_USER_ASYNC_COPY;
    1644           9 :         if (rte_vhost_driver_register(path, flags) != 0) {
    1645           0 :                 SPDK_ERRLOG("Could not register controller %s with vhost library\n", ctrl_name);
    1646           0 :                 SPDK_ERRLOG("Check if domain socket %s already exists\n", path);
    1647           0 :                 return -EIO;
    1648             :         }
    1649          18 :         if (rte_vhost_driver_set_features(path, virtio_features) ||
    1650           9 :             rte_vhost_driver_disable_features(path, disabled_features)) {
    1651           0 :                 SPDK_ERRLOG("Couldn't set vhost features for controller %s\n", ctrl_name);
    1652             : 
    1653           0 :                 rte_vhost_driver_unregister(path);
    1654           0 :                 return -EIO;
    1655             :         }
    1656             : 
    1657           9 :         if (rte_vhost_driver_callback_register(path, &g_spdk_vhost_ops) != 0) {
    1658           0 :                 rte_vhost_driver_unregister(path);
    1659           0 :                 SPDK_ERRLOG("Couldn't register callbacks for controller %s\n", ctrl_name);
    1660           0 :                 return -EIO;
    1661             :         }
    1662             : 
    1663           9 :         rte_vhost_driver_get_protocol_features(path, &features);
    1664           9 :         features |= protocol_features;
    1665           9 :         rte_vhost_driver_set_protocol_features(path, features);
    1666             : 
    1667           9 :         if (rte_vhost_driver_start(path) != 0) {
    1668           0 :                 SPDK_ERRLOG("Failed to start vhost driver for controller %s (%d): %s\n",
    1669             :                             ctrl_name, errno, spdk_strerror(errno));
    1670           0 :                 rte_vhost_driver_unregister(path);
    1671           0 :                 return -EIO;
    1672             :         }
    1673             : 
    1674           9 :         return 0;
    1675             : }
    1676             : 
    1677             : int
    1678           0 : vhost_get_mem_table(int vid, struct rte_vhost_memory **mem)
    1679             : {
    1680           0 :         return rte_vhost_get_mem_table(vid, mem);
    1681             : }
    1682             : 
    1683             : int
    1684           9 : vhost_driver_unregister(const char *path)
    1685             : {
    1686           9 :         return rte_vhost_driver_unregister(path);
    1687             : }
    1688             : 
    1689             : int
    1690           0 : vhost_get_negotiated_features(int vid, uint64_t *negotiated_features)
    1691             : {
    1692           0 :         return rte_vhost_get_negotiated_features(vid, negotiated_features);
    1693             : }
    1694             : 
    1695             : int
    1696           9 : vhost_user_dev_set_coalescing(struct spdk_vhost_user_dev *user_dev, uint32_t delay_base_us,
    1697             :                               uint32_t iops_threshold)
    1698             : {
    1699           9 :         uint64_t delay_time_base = delay_base_us * spdk_get_ticks_hz() / 1000000ULL;
    1700           9 :         uint32_t io_rate = iops_threshold * SPDK_VHOST_STATS_CHECK_INTERVAL_MS / 1000U;
    1701             : 
    1702           9 :         if (delay_time_base >= UINT32_MAX) {
    1703           0 :                 SPDK_ERRLOG("Delay time of %"PRIu32" is to big\n", delay_base_us);
    1704           0 :                 return -EINVAL;
    1705           9 :         } else if (io_rate == 0) {
    1706           0 :                 SPDK_ERRLOG("IOPS rate of %"PRIu32" is too low. Min is %u\n", io_rate,
    1707             :                             1000U / SPDK_VHOST_STATS_CHECK_INTERVAL_MS);
    1708           0 :                 return -EINVAL;
    1709             :         }
    1710             : 
    1711           9 :         user_dev->coalescing_delay_us = delay_base_us;
    1712           9 :         user_dev->coalescing_iops_threshold = iops_threshold;
    1713           9 :         return 0;
    1714             : }
    1715             : 
    1716             : int
    1717           0 : vhost_user_session_set_coalescing(struct spdk_vhost_dev *vdev,
    1718             :                                   struct spdk_vhost_session *vsession, void *ctx)
    1719             : {
    1720           0 :         vsession->coalescing_delay_time_base =
    1721           0 :                 to_user_dev(vdev)->coalescing_delay_us * spdk_get_ticks_hz() / 1000000ULL;
    1722           0 :         vsession->coalescing_io_rate_threshold =
    1723           0 :                 to_user_dev(vdev)->coalescing_iops_threshold * SPDK_VHOST_STATS_CHECK_INTERVAL_MS / 1000U;
    1724           0 :         return 0;
    1725             : }
    1726             : 
    1727             : int
    1728           0 : vhost_user_set_coalescing(struct spdk_vhost_dev *vdev, uint32_t delay_base_us,
    1729             :                           uint32_t iops_threshold)
    1730             : {
    1731             :         int rc;
    1732             : 
    1733           0 :         rc = vhost_user_dev_set_coalescing(to_user_dev(vdev), delay_base_us, iops_threshold);
    1734           0 :         if (rc != 0) {
    1735           0 :                 return rc;
    1736             :         }
    1737             : 
    1738           0 :         vhost_user_dev_foreach_session(vdev, vhost_user_session_set_coalescing, NULL, NULL);
    1739             : 
    1740           0 :         return 0;
    1741             : }
    1742             : 
    1743             : void
    1744           0 : vhost_user_get_coalescing(struct spdk_vhost_dev *vdev, uint32_t *delay_base_us,
    1745             :                           uint32_t *iops_threshold)
    1746             : {
    1747           0 :         struct spdk_vhost_user_dev *user_dev = to_user_dev(vdev);
    1748             : 
    1749           0 :         if (delay_base_us) {
    1750           0 :                 *delay_base_us = user_dev->coalescing_delay_us;
    1751             :         }
    1752             : 
    1753           0 :         if (iops_threshold) {
    1754           0 :                 *iops_threshold = user_dev->coalescing_iops_threshold;
    1755             :         }
    1756           0 : }
    1757             : 
    1758             : int
    1759           0 : spdk_vhost_set_socket_path(const char *basename)
    1760             : {
    1761             :         int ret;
    1762             : 
    1763           0 :         if (basename && strlen(basename) > 0) {
    1764           0 :                 ret = snprintf(g_vhost_user_dev_dirname, sizeof(g_vhost_user_dev_dirname) - 2, "%s", basename);
    1765           0 :                 if (ret <= 0) {
    1766           0 :                         return -EINVAL;
    1767             :                 }
    1768           0 :                 if ((size_t)ret >= sizeof(g_vhost_user_dev_dirname) - 2) {
    1769           0 :                         SPDK_ERRLOG("Char dev dir path length %d is too long\n", ret);
    1770           0 :                         return -EINVAL;
    1771             :                 }
    1772             : 
    1773           0 :                 if (g_vhost_user_dev_dirname[ret - 1] != '/') {
    1774           0 :                         g_vhost_user_dev_dirname[ret] = '/';
    1775           0 :                         g_vhost_user_dev_dirname[ret + 1]  = '\0';
    1776             :                 }
    1777             :         }
    1778             : 
    1779           0 :         return 0;
    1780             : }
    1781             : 
    1782             : static void
    1783           0 : vhost_dev_thread_exit(void *arg1)
    1784             : {
    1785           0 :         spdk_thread_exit(spdk_get_thread());
    1786           0 : }
    1787             : 
    1788             : static bool g_vhost_user_started = false;
    1789             : 
    1790             : int
    1791          10 : vhost_user_dev_init(struct spdk_vhost_dev *vdev, const char *name,
    1792             :                     struct spdk_cpuset *cpumask, const struct spdk_vhost_user_dev_backend *user_backend)
    1793             : {
    1794          10 :         char path[PATH_MAX];
    1795             :         struct spdk_vhost_user_dev *user_dev;
    1796             : 
    1797          10 :         if (snprintf(path, sizeof(path), "%s%s", g_vhost_user_dev_dirname, name) >= (int)sizeof(path)) {
    1798           1 :                 SPDK_ERRLOG("Resulting socket path for controller %s is too long: %s%s\n",
    1799             :                             name, g_vhost_user_dev_dirname, name);
    1800           1 :                 return -EINVAL;
    1801             :         }
    1802             : 
    1803           9 :         vdev->path = strdup(path);
    1804           9 :         if (vdev->path == NULL) {
    1805           0 :                 return -EIO;
    1806             :         }
    1807             : 
    1808           9 :         user_dev = calloc(1, sizeof(*user_dev));
    1809           9 :         if (user_dev == NULL) {
    1810           0 :                 free(vdev->path);
    1811           0 :                 return -ENOMEM;
    1812             :         }
    1813           9 :         vdev->ctxt = user_dev;
    1814             : 
    1815           9 :         vdev->thread = spdk_thread_create(vdev->name, cpumask);
    1816           9 :         if (vdev->thread == NULL) {
    1817           0 :                 free(user_dev);
    1818           0 :                 free(vdev->path);
    1819           0 :                 SPDK_ERRLOG("Failed to create thread for vhost controller %s.\n", name);
    1820           0 :                 return -EIO;
    1821             :         }
    1822             : 
    1823           9 :         user_dev->user_backend = user_backend;
    1824           9 :         user_dev->vdev = vdev;
    1825           9 :         user_dev->registered = true;
    1826           9 :         TAILQ_INIT(&user_dev->vsessions);
    1827           9 :         pthread_mutex_init(&user_dev->lock, NULL);
    1828             : 
    1829           9 :         vhost_user_dev_set_coalescing(user_dev, SPDK_VHOST_COALESCING_DELAY_BASE_US,
    1830             :                                       SPDK_VHOST_VQ_IOPS_COALESCING_THRESHOLD);
    1831             : 
    1832           9 :         return 0;
    1833             : }
    1834             : 
    1835             : int
    1836           9 : vhost_user_dev_start(struct spdk_vhost_dev *vdev)
    1837             : {
    1838           9 :         return vhost_register_unix_socket(vdev->path, vdev->name, vdev->virtio_features,
    1839             :                                           vdev->disabled_features,
    1840             :                                           vdev->protocol_features);
    1841             : }
    1842             : 
    1843             : int
    1844          10 : vhost_user_dev_create(struct spdk_vhost_dev *vdev, const char *name, struct spdk_cpuset *cpumask,
    1845             :                       const struct spdk_vhost_user_dev_backend *user_backend, bool delay)
    1846             : {
    1847             :         int rc;
    1848             :         struct spdk_vhost_user_dev *user_dev;
    1849             : 
    1850          10 :         rc = vhost_user_dev_init(vdev, name, cpumask, user_backend);
    1851          10 :         if (rc != 0) {
    1852           1 :                 return rc;
    1853             :         }
    1854             : 
    1855           9 :         if (delay == false) {
    1856           9 :                 rc = vhost_user_dev_start(vdev);
    1857           9 :                 if (rc != 0) {
    1858           0 :                         user_dev = to_user_dev(vdev);
    1859           0 :                         spdk_thread_send_msg(vdev->thread, vhost_dev_thread_exit, NULL);
    1860           0 :                         pthread_mutex_destroy(&user_dev->lock);
    1861           0 :                         free(user_dev);
    1862           0 :                         free(vdev->path);
    1863             :                 }
    1864             :         }
    1865             : 
    1866           9 :         return rc;
    1867             : }
    1868             : 
    1869             : int
    1870          10 : vhost_user_dev_unregister(struct spdk_vhost_dev *vdev)
    1871             : {
    1872          10 :         struct spdk_vhost_user_dev *user_dev = to_user_dev(vdev);
    1873             :         struct spdk_vhost_session *vsession, *tmp_vsession;
    1874             : 
    1875          10 :         pthread_mutex_lock(&user_dev->lock);
    1876          10 :         if (user_dev->pending_async_op_num) {
    1877           0 :                 pthread_mutex_unlock(&user_dev->lock);
    1878           0 :                 return -EBUSY;
    1879             :         }
    1880             : 
    1881             :         /* This is the case that uses RPC call `vhost_delete_controller` while VM is connected */
    1882          10 :         if (!TAILQ_EMPTY(&user_dev->vsessions) && g_vhost_user_started) {
    1883           1 :                 SPDK_ERRLOG("Controller %s has still valid connection.\n", vdev->name);
    1884           1 :                 pthread_mutex_unlock(&user_dev->lock);
    1885           1 :                 return -EBUSY;
    1886             :         }
    1887             : 
    1888             :         /* This is the case that quits the subsystem while VM is connected, the VM
    1889             :          * should be stopped by the shutdown thread.
    1890             :          */
    1891           9 :         if (!g_vhost_user_started) {
    1892           0 :                 TAILQ_FOREACH_SAFE(vsession, &user_dev->vsessions, tailq, tmp_vsession) {
    1893           0 :                         assert(vsession->started == false);
    1894           0 :                         TAILQ_REMOVE(&user_dev->vsessions, vsession, tailq);
    1895           0 :                         if (vsession->mem) {
    1896           0 :                                 vhost_session_mem_unregister(vsession->mem);
    1897           0 :                                 free(vsession->mem);
    1898             :                         }
    1899           0 :                         free(vsession->name);
    1900           0 :                         free(vsession);
    1901             :                 }
    1902             :         }
    1903             : 
    1904           9 :         user_dev->registered = false;
    1905           9 :         pthread_mutex_unlock(&user_dev->lock);
    1906             : 
    1907             :         /* There are no valid connections now, and it's not an error if the domain
    1908             :          * socket was already removed by shutdown thread.
    1909             :          */
    1910           9 :         vhost_driver_unregister(vdev->path);
    1911             : 
    1912           9 :         spdk_thread_send_msg(vdev->thread, vhost_dev_thread_exit, NULL);
    1913           9 :         pthread_mutex_destroy(&user_dev->lock);
    1914             : 
    1915           9 :         free(user_dev);
    1916           9 :         free(vdev->path);
    1917             : 
    1918           9 :         return 0;
    1919             : }
    1920             : 
    1921             : int
    1922           2 : vhost_user_init(void)
    1923             : {
    1924             :         size_t len;
    1925             : 
    1926           2 :         if (g_vhost_user_started) {
    1927           1 :                 return 0;
    1928             :         }
    1929             : 
    1930           1 :         if (g_vhost_user_dev_dirname[0] == '\0') {
    1931           1 :                 if (getcwd(g_vhost_user_dev_dirname, sizeof(g_vhost_user_dev_dirname) - 1) == NULL) {
    1932           0 :                         SPDK_ERRLOG("getcwd failed (%d): %s\n", errno, spdk_strerror(errno));
    1933           0 :                         return -1;
    1934             :                 }
    1935             : 
    1936           1 :                 len = strlen(g_vhost_user_dev_dirname);
    1937           1 :                 if (g_vhost_user_dev_dirname[len - 1] != '/') {
    1938           1 :                         g_vhost_user_dev_dirname[len] = '/';
    1939           1 :                         g_vhost_user_dev_dirname[len + 1] = '\0';
    1940             :                 }
    1941             :         }
    1942             : 
    1943           1 :         g_vhost_user_started = true;
    1944             : 
    1945           1 :         g_vhost_user_init_thread = spdk_get_thread();
    1946           1 :         assert(g_vhost_user_init_thread != NULL);
    1947             : 
    1948           1 :         return 0;
    1949             : }
    1950             : 
    1951             : static void
    1952           1 : vhost_user_session_shutdown_on_init(void *vhost_cb)
    1953             : {
    1954           1 :         spdk_vhost_fini_cb fn = vhost_cb;
    1955             : 
    1956           1 :         fn();
    1957           1 : }
    1958             : 
    1959             : static void *
    1960           1 : vhost_user_session_shutdown(void *vhost_cb)
    1961             : {
    1962           1 :         struct spdk_vhost_dev *vdev = NULL;
    1963             :         struct spdk_vhost_session *vsession;
    1964             :         struct spdk_vhost_user_dev *user_dev;
    1965             :         int ret;
    1966             : 
    1967           1 :         for (vdev = spdk_vhost_dev_next(NULL); vdev != NULL;
    1968           0 :              vdev = spdk_vhost_dev_next(vdev)) {
    1969           0 :                 user_dev = to_user_dev(vdev);
    1970           0 :                 ret = 0;
    1971           0 :                 pthread_mutex_lock(&user_dev->lock);
    1972           0 :                 TAILQ_FOREACH(vsession, &user_dev->vsessions, tailq) {
    1973           0 :                         if (vsession->started || vsession->starting) {
    1974           0 :                                 ret += _stop_session(vsession);
    1975             :                         }
    1976             :                 }
    1977           0 :                 pthread_mutex_unlock(&user_dev->lock);
    1978           0 :                 if (ret == 0) {
    1979           0 :                         vhost_driver_unregister(vdev->path);
    1980             :                 }
    1981             :         }
    1982             : 
    1983           1 :         SPDK_INFOLOG(vhost, "Exiting\n");
    1984           1 :         spdk_thread_send_msg(g_vhost_user_init_thread, vhost_user_session_shutdown_on_init, vhost_cb);
    1985           1 :         return NULL;
    1986             : }
    1987             : 
    1988             : void
    1989           2 : vhost_user_fini(spdk_vhost_fini_cb vhost_cb)
    1990             : {
    1991           2 :         pthread_t tid;
    1992             :         int rc;
    1993             : 
    1994           2 :         if (!g_vhost_user_started) {
    1995           1 :                 vhost_cb();
    1996           1 :                 return;
    1997             :         }
    1998             : 
    1999           1 :         g_vhost_user_started = false;
    2000             : 
    2001             :         /* rte_vhost API for removing sockets is not asynchronous. Since it may call SPDK
    2002             :          * ops for stopping a device or removing a connection, we need to call it from
    2003             :          * a separate thread to avoid deadlock.
    2004             :          */
    2005           1 :         rc = pthread_create(&tid, NULL, &vhost_user_session_shutdown, vhost_cb);
    2006           1 :         if (rc < 0) {
    2007           0 :                 SPDK_ERRLOG("Failed to start session shutdown thread (%d): %s\n", rc, spdk_strerror(rc));
    2008           0 :                 abort();
    2009             :         }
    2010           1 :         pthread_detach(tid);
    2011             : }
    2012             : 
    2013             : void
    2014           0 : vhost_session_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w)
    2015             : {
    2016             :         struct spdk_vhost_session *vsession;
    2017             :         struct spdk_vhost_user_dev *user_dev;
    2018             : 
    2019           0 :         user_dev = to_user_dev(vdev);
    2020           0 :         pthread_mutex_lock(&user_dev->lock);
    2021           0 :         TAILQ_FOREACH(vsession, &user_dev->vsessions, tailq) {
    2022           0 :                 spdk_json_write_object_begin(w);
    2023           0 :                 spdk_json_write_named_uint32(w, "vid", vsession->vid);
    2024           0 :                 spdk_json_write_named_uint32(w, "id", vsession->id);
    2025           0 :                 spdk_json_write_named_string(w, "name", vsession->name);
    2026           0 :                 spdk_json_write_named_bool(w, "started", vsession->started);
    2027           0 :                 spdk_json_write_named_uint32(w, "max_queues", vsession->max_queues);
    2028           0 :                 spdk_json_write_named_uint32(w, "inflight_task_cnt", vsession->task_cnt);
    2029           0 :                 spdk_json_write_object_end(w);
    2030             :         }
    2031           0 :         pthread_mutex_unlock(&user_dev->lock);
    2032           0 : }

Generated by: LCOV version 1.15