Line data Source code
1 : /* SPDX-License-Identifier: BSD-3-Clause
2 : * Copyright (C) 2019 Intel Corporation.
3 : * All rights reserved.
4 : * Copyright (c) 2021 Mellanox Technologies LTD. All rights reserved.
5 : */
6 :
7 : #include "spdk/stdinc.h"
8 :
9 : #include "spdk/env.h"
10 : #include "spdk/likely.h"
11 : #include "spdk/string.h"
12 : #include "spdk/util.h"
13 : #include "spdk/memory.h"
14 : #include "spdk/barrier.h"
15 : #include "spdk/vhost.h"
16 : #include "vhost_internal.h"
17 : #include <rte_version.h>
18 :
19 : #include "spdk_internal/vhost_user.h"
20 :
21 : /* Path to folder where character device will be created. Can be set by user. */
22 : static char g_vhost_user_dev_dirname[PATH_MAX] = "";
23 :
24 : static struct spdk_thread *g_vhost_user_init_thread;
25 :
26 : /**
27 : * DPDK calls our callbacks synchronously but the work those callbacks
28 : * perform needs to be async. Luckily, all DPDK callbacks are called on
29 : * a DPDK-internal pthread, so we'll just wait on a semaphore in there.
30 : */
31 : static sem_t g_dpdk_sem;
32 :
33 : /** Return code for the current DPDK callback */
34 : static int g_dpdk_response;
35 :
36 : struct vhost_session_fn_ctx {
37 : /** Device pointer obtained before enqueueing the event */
38 : struct spdk_vhost_dev *vdev;
39 :
40 : /** ID of the session to send event to. */
41 : uint32_t vsession_id;
42 :
43 : /** User provided function to be executed on session's thread. */
44 : spdk_vhost_session_fn cb_fn;
45 :
46 : /**
47 : * User provided function to be called on the init thread
48 : * after iterating through all sessions.
49 : */
50 : spdk_vhost_dev_fn cpl_fn;
51 :
52 : /** Custom user context */
53 : void *user_ctx;
54 : };
55 :
56 : static int vhost_user_wait_for_session_stop(struct spdk_vhost_session *vsession,
57 : unsigned timeout_sec, const char *errmsg);
58 :
59 : static void
60 : __attribute__((constructor))
61 1 : _vhost_user_sem_init(void)
62 : {
63 1 : if (sem_init(&g_dpdk_sem, 0, 0) != 0) {
64 0 : SPDK_ERRLOG("Failed to initialize semaphore for rte_vhost pthread.\n");
65 0 : abort();
66 : }
67 1 : }
68 :
69 : static void
70 : __attribute__((destructor))
71 1 : _vhost_user_sem_destroy(void)
72 : {
73 1 : sem_destroy(&g_dpdk_sem);
74 1 : }
75 :
76 : void *
77 0 : vhost_gpa_to_vva(struct spdk_vhost_session *vsession, uint64_t addr, uint64_t len)
78 : {
79 : void *vva;
80 0 : uint64_t newlen;
81 :
82 0 : newlen = len;
83 0 : vva = (void *)rte_vhost_va_from_guest_pa(vsession->mem, addr, &newlen);
84 0 : if (newlen != len) {
85 0 : return NULL;
86 : }
87 :
88 0 : return vva;
89 :
90 : }
91 :
92 : static void
93 0 : vhost_log_req_desc(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *virtqueue,
94 : uint16_t req_id)
95 : {
96 0 : struct vring_desc *desc, *desc_table;
97 0 : uint32_t desc_table_size;
98 : int rc;
99 :
100 0 : if (spdk_likely(!vhost_dev_has_feature(vsession, VHOST_F_LOG_ALL))) {
101 0 : return;
102 : }
103 :
104 0 : rc = vhost_vq_get_desc(vsession, virtqueue, req_id, &desc, &desc_table, &desc_table_size);
105 0 : if (spdk_unlikely(rc != 0)) {
106 0 : SPDK_ERRLOG("Can't log used ring descriptors!\n");
107 0 : return;
108 : }
109 :
110 : do {
111 0 : if (vhost_vring_desc_is_wr(desc)) {
112 : /* To be honest, only pages really touched should be logged, but
113 : * doing so would require tracking those changes in each backed.
114 : * Also backend most likely will touch all/most of those pages so
115 : * for lets assume we touched all pages passed to as writeable buffers. */
116 0 : rte_vhost_log_write(vsession->vid, desc->addr, desc->len);
117 : }
118 0 : vhost_vring_desc_get_next(&desc, desc_table, desc_table_size);
119 0 : } while (desc);
120 : }
121 :
122 : static void
123 7 : vhost_log_used_vring_elem(struct spdk_vhost_session *vsession,
124 : struct spdk_vhost_virtqueue *virtqueue,
125 : uint16_t idx)
126 : {
127 : uint64_t offset, len;
128 :
129 7 : if (spdk_likely(!vhost_dev_has_feature(vsession, VHOST_F_LOG_ALL))) {
130 7 : return;
131 : }
132 :
133 0 : if (spdk_unlikely(virtqueue->packed.packed_ring)) {
134 0 : offset = idx * sizeof(struct vring_packed_desc);
135 0 : len = sizeof(struct vring_packed_desc);
136 : } else {
137 0 : offset = offsetof(struct vring_used, ring[idx]);
138 0 : len = sizeof(virtqueue->vring.used->ring[idx]);
139 : }
140 :
141 0 : rte_vhost_log_used_vring(vsession->vid, virtqueue->vring_idx, offset, len);
142 : }
143 :
144 : static void
145 0 : vhost_log_used_vring_idx(struct spdk_vhost_session *vsession,
146 : struct spdk_vhost_virtqueue *virtqueue)
147 : {
148 : uint64_t offset, len;
149 : uint16_t vq_idx;
150 :
151 0 : if (spdk_likely(!vhost_dev_has_feature(vsession, VHOST_F_LOG_ALL))) {
152 0 : return;
153 : }
154 :
155 0 : offset = offsetof(struct vring_used, idx);
156 0 : len = sizeof(virtqueue->vring.used->idx);
157 0 : vq_idx = virtqueue - vsession->virtqueue;
158 :
159 0 : rte_vhost_log_used_vring(vsession->vid, vq_idx, offset, len);
160 : }
161 :
162 : /*
163 : * Get available requests from avail ring.
164 : */
165 : uint16_t
166 4 : vhost_vq_avail_ring_get(struct spdk_vhost_virtqueue *virtqueue, uint16_t *reqs,
167 : uint16_t reqs_len)
168 : {
169 4 : struct rte_vhost_vring *vring = &virtqueue->vring;
170 4 : struct vring_avail *avail = vring->avail;
171 4 : uint16_t size_mask = vring->size - 1;
172 4 : uint16_t last_idx = virtqueue->last_avail_idx, avail_idx = avail->idx;
173 : uint16_t count, i;
174 : int rc;
175 4 : uint64_t u64_value;
176 :
177 4 : spdk_smp_rmb();
178 :
179 4 : if (virtqueue->vsession && spdk_unlikely(virtqueue->vsession->interrupt_mode)) {
180 : /* Read to clear vring's kickfd */
181 0 : rc = read(vring->kickfd, &u64_value, sizeof(u64_value));
182 0 : if (rc < 0) {
183 0 : SPDK_ERRLOG("failed to acknowledge kickfd: %s.\n", spdk_strerror(errno));
184 0 : return -errno;
185 : }
186 : }
187 :
188 4 : count = avail_idx - last_idx;
189 4 : if (spdk_likely(count == 0)) {
190 0 : return 0;
191 : }
192 :
193 4 : if (spdk_unlikely(count > vring->size)) {
194 : /* TODO: the queue is unrecoverably broken and should be marked so.
195 : * For now we will fail silently and report there are no new avail entries.
196 : */
197 1 : return 0;
198 : }
199 :
200 3 : count = spdk_min(count, reqs_len);
201 :
202 3 : virtqueue->last_avail_idx += count;
203 : /* Check whether there are unprocessed reqs in vq, then kick vq manually */
204 3 : if (virtqueue->vsession && spdk_unlikely(virtqueue->vsession->interrupt_mode)) {
205 : /* If avail_idx is larger than virtqueue's last_avail_idx, then there is unprocessed reqs.
206 : * avail_idx should get updated here from memory, in case of race condition with guest.
207 : */
208 0 : avail_idx = * (volatile uint16_t *) &avail->idx;
209 0 : if (avail_idx > virtqueue->last_avail_idx) {
210 : /* Write to notify vring's kickfd */
211 0 : rc = write(vring->kickfd, &u64_value, sizeof(u64_value));
212 0 : if (rc < 0) {
213 0 : SPDK_ERRLOG("failed to kick vring: %s.\n", spdk_strerror(errno));
214 0 : return -errno;
215 : }
216 : }
217 : }
218 :
219 19 : for (i = 0; i < count; i++) {
220 16 : reqs[i] = vring->avail->ring[(last_idx + i) & size_mask];
221 : }
222 :
223 3 : SPDK_DEBUGLOG(vhost_ring,
224 : "AVAIL: last_idx=%"PRIu16" avail_idx=%"PRIu16" count=%"PRIu16"\n",
225 : last_idx, avail_idx, count);
226 :
227 3 : return count;
228 : }
229 :
230 : static bool
231 0 : vhost_vring_desc_is_indirect(struct vring_desc *cur_desc)
232 : {
233 0 : return !!(cur_desc->flags & VRING_DESC_F_INDIRECT);
234 : }
235 :
236 : static bool
237 7 : vhost_vring_packed_desc_is_indirect(struct vring_packed_desc *cur_desc)
238 : {
239 7 : return (cur_desc->flags & VRING_DESC_F_INDIRECT) != 0;
240 : }
241 :
242 : static bool
243 0 : vhost_inflight_packed_desc_is_indirect(spdk_vhost_inflight_desc *cur_desc)
244 : {
245 0 : return (cur_desc->flags & VRING_DESC_F_INDIRECT) != 0;
246 : }
247 :
248 : int
249 0 : vhost_vq_get_desc(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *virtqueue,
250 : uint16_t req_idx, struct vring_desc **desc, struct vring_desc **desc_table,
251 : uint32_t *desc_table_size)
252 : {
253 0 : if (spdk_unlikely(req_idx >= virtqueue->vring.size)) {
254 0 : return -1;
255 : }
256 :
257 0 : *desc = &virtqueue->vring.desc[req_idx];
258 :
259 0 : if (vhost_vring_desc_is_indirect(*desc)) {
260 0 : *desc_table_size = (*desc)->len / sizeof(**desc);
261 0 : *desc_table = vhost_gpa_to_vva(vsession, (*desc)->addr,
262 0 : sizeof(**desc) * *desc_table_size);
263 0 : *desc = *desc_table;
264 0 : if (*desc == NULL) {
265 0 : return -1;
266 : }
267 :
268 0 : return 0;
269 : }
270 :
271 0 : *desc_table = virtqueue->vring.desc;
272 0 : *desc_table_size = virtqueue->vring.size;
273 :
274 0 : return 0;
275 : }
276 :
277 : static bool
278 0 : vhost_packed_desc_indirect_to_desc_table(struct spdk_vhost_session *vsession,
279 : uint64_t addr, uint32_t len,
280 : struct vring_packed_desc **desc_table,
281 : uint32_t *desc_table_size)
282 : {
283 0 : *desc_table_size = len / sizeof(struct vring_packed_desc);
284 :
285 0 : *desc_table = vhost_gpa_to_vva(vsession, addr, len);
286 0 : if (spdk_unlikely(*desc_table == NULL)) {
287 0 : return false;
288 : }
289 :
290 0 : return true;
291 : }
292 :
293 : int
294 0 : vhost_vq_get_desc_packed(struct spdk_vhost_session *vsession,
295 : struct spdk_vhost_virtqueue *virtqueue,
296 : uint16_t req_idx, struct vring_packed_desc **desc,
297 : struct vring_packed_desc **desc_table, uint32_t *desc_table_size)
298 : {
299 0 : *desc = &virtqueue->vring.desc_packed[req_idx];
300 :
301 : /* In packed ring when the desc is non-indirect we get next desc
302 : * by judging (desc->flag & VRING_DESC_F_NEXT) != 0. When the desc
303 : * is indirect we get next desc by idx and desc_table_size. It's
304 : * different from split ring.
305 : */
306 0 : if (vhost_vring_packed_desc_is_indirect(*desc)) {
307 0 : if (!vhost_packed_desc_indirect_to_desc_table(vsession, (*desc)->addr, (*desc)->len,
308 : desc_table, desc_table_size)) {
309 0 : return -1;
310 : }
311 :
312 0 : *desc = *desc_table;
313 : } else {
314 0 : *desc_table = NULL;
315 0 : *desc_table_size = 0;
316 : }
317 :
318 0 : return 0;
319 : }
320 :
321 : int
322 0 : vhost_inflight_queue_get_desc(struct spdk_vhost_session *vsession,
323 : spdk_vhost_inflight_desc *desc_array,
324 : uint16_t req_idx, spdk_vhost_inflight_desc **desc,
325 : struct vring_packed_desc **desc_table, uint32_t *desc_table_size)
326 : {
327 0 : *desc = &desc_array[req_idx];
328 :
329 0 : if (vhost_inflight_packed_desc_is_indirect(*desc)) {
330 0 : if (!vhost_packed_desc_indirect_to_desc_table(vsession, (*desc)->addr, (*desc)->len,
331 : desc_table, desc_table_size)) {
332 0 : return -1;
333 : }
334 :
335 : /* This desc is the inflight desc not the packed desc.
336 : * When set the F_INDIRECT the table entry should be the packed desc
337 : * so set the inflight desc NULL.
338 : */
339 0 : *desc = NULL;
340 : } else {
341 : /* When not set the F_INDIRECT means there is no packed desc table */
342 0 : *desc_table = NULL;
343 0 : *desc_table_size = 0;
344 : }
345 :
346 0 : return 0;
347 : }
348 :
349 : int
350 0 : vhost_vq_used_signal(struct spdk_vhost_session *vsession,
351 : struct spdk_vhost_virtqueue *virtqueue)
352 : {
353 0 : if (virtqueue->used_req_cnt == 0) {
354 0 : return 0;
355 : }
356 :
357 0 : SPDK_DEBUGLOG(vhost_ring,
358 : "Queue %td - USED RING: sending IRQ: last used %"PRIu16"\n",
359 : virtqueue - vsession->virtqueue, virtqueue->last_used_idx);
360 :
361 : #if RTE_VERSION < RTE_VERSION_NUM(22, 11, 0, 0)
362 : if (rte_vhost_vring_call(vsession->vid, virtqueue->vring_idx) == 0) {
363 : #else
364 0 : if (rte_vhost_vring_call_nonblock(vsession->vid, virtqueue->vring_idx) == 0) {
365 : #endif
366 : /* interrupt signalled */
367 0 : virtqueue->req_cnt += virtqueue->used_req_cnt;
368 0 : virtqueue->used_req_cnt = 0;
369 0 : return 1;
370 : } else {
371 : /* interrupt not signalled */
372 0 : return 0;
373 : }
374 : }
375 :
376 : static void
377 0 : session_vq_io_stats_update(struct spdk_vhost_session *vsession,
378 : struct spdk_vhost_virtqueue *virtqueue, uint64_t now)
379 : {
380 0 : uint32_t irq_delay_base = vsession->coalescing_delay_time_base;
381 0 : uint32_t io_threshold = vsession->coalescing_io_rate_threshold;
382 : int32_t irq_delay;
383 : uint32_t req_cnt;
384 :
385 0 : req_cnt = virtqueue->req_cnt + virtqueue->used_req_cnt;
386 0 : if (req_cnt <= io_threshold) {
387 0 : return;
388 : }
389 :
390 0 : irq_delay = (irq_delay_base * (req_cnt - io_threshold)) / io_threshold;
391 0 : virtqueue->irq_delay_time = (uint32_t) spdk_max(0, irq_delay);
392 :
393 0 : virtqueue->req_cnt = 0;
394 0 : virtqueue->next_event_time = now;
395 : }
396 :
397 : static void
398 0 : check_session_vq_io_stats(struct spdk_vhost_session *vsession,
399 : struct spdk_vhost_virtqueue *virtqueue, uint64_t now)
400 : {
401 0 : if (now < vsession->next_stats_check_time) {
402 0 : return;
403 : }
404 :
405 0 : vsession->next_stats_check_time = now + vsession->stats_check_interval;
406 0 : session_vq_io_stats_update(vsession, virtqueue, now);
407 : }
408 :
409 : static inline bool
410 0 : vhost_vq_event_is_suppressed(struct spdk_vhost_virtqueue *vq)
411 : {
412 0 : spdk_smp_mb();
413 :
414 0 : if (spdk_unlikely(vq->packed.packed_ring)) {
415 0 : if (vq->vring.driver_event->flags & VRING_PACKED_EVENT_FLAG_DISABLE) {
416 0 : return true;
417 : }
418 : } else {
419 0 : if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) {
420 0 : return true;
421 : }
422 : }
423 :
424 0 : return false;
425 : }
426 :
427 : void
428 0 : vhost_session_vq_used_signal(struct spdk_vhost_virtqueue *virtqueue)
429 : {
430 0 : struct spdk_vhost_session *vsession = virtqueue->vsession;
431 : uint64_t now;
432 :
433 0 : if (vsession->coalescing_delay_time_base == 0) {
434 0 : if (virtqueue->vring.desc == NULL) {
435 0 : return;
436 : }
437 :
438 0 : if (vhost_vq_event_is_suppressed(virtqueue)) {
439 0 : return;
440 : }
441 :
442 0 : vhost_vq_used_signal(vsession, virtqueue);
443 : } else {
444 0 : now = spdk_get_ticks();
445 0 : check_session_vq_io_stats(vsession, virtqueue, now);
446 :
447 : /* No need for event right now */
448 0 : if (now < virtqueue->next_event_time) {
449 0 : return;
450 : }
451 :
452 0 : if (vhost_vq_event_is_suppressed(virtqueue)) {
453 0 : return;
454 : }
455 :
456 0 : if (!vhost_vq_used_signal(vsession, virtqueue)) {
457 0 : return;
458 : }
459 :
460 : /* Syscall is quite long so update time */
461 0 : now = spdk_get_ticks();
462 0 : virtqueue->next_event_time = now + virtqueue->irq_delay_time;
463 : }
464 : }
465 :
466 : /*
467 : * Enqueue id and len to used ring.
468 : */
469 : void
470 0 : vhost_vq_used_ring_enqueue(struct spdk_vhost_session *vsession,
471 : struct spdk_vhost_virtqueue *virtqueue,
472 : uint16_t id, uint32_t len)
473 : {
474 0 : struct rte_vhost_vring *vring = &virtqueue->vring;
475 0 : struct vring_used *used = vring->used;
476 0 : uint16_t last_idx = virtqueue->last_used_idx & (vring->size - 1);
477 0 : uint16_t vq_idx = virtqueue->vring_idx;
478 :
479 0 : SPDK_DEBUGLOG(vhost_ring,
480 : "Queue %td - USED RING: last_idx=%"PRIu16" req id=%"PRIu16" len=%"PRIu32"\n",
481 : virtqueue - vsession->virtqueue, virtqueue->last_used_idx, id, len);
482 :
483 0 : vhost_log_req_desc(vsession, virtqueue, id);
484 :
485 0 : virtqueue->last_used_idx++;
486 0 : used->ring[last_idx].id = id;
487 0 : used->ring[last_idx].len = len;
488 :
489 : /* Ensure the used ring is updated before we log it or increment used->idx. */
490 0 : spdk_smp_wmb();
491 :
492 0 : rte_vhost_set_last_inflight_io_split(vsession->vid, vq_idx, id);
493 :
494 0 : vhost_log_used_vring_elem(vsession, virtqueue, last_idx);
495 0 : * (volatile uint16_t *) &used->idx = virtqueue->last_used_idx;
496 0 : vhost_log_used_vring_idx(vsession, virtqueue);
497 :
498 0 : rte_vhost_clr_inflight_desc_split(vsession->vid, vq_idx, virtqueue->last_used_idx, id);
499 :
500 0 : virtqueue->used_req_cnt++;
501 :
502 0 : if (vsession->interrupt_mode) {
503 0 : if (virtqueue->vring.desc == NULL || vhost_vq_event_is_suppressed(virtqueue)) {
504 0 : return;
505 : }
506 :
507 0 : vhost_vq_used_signal(vsession, virtqueue);
508 : }
509 : }
510 :
511 : void
512 7 : vhost_vq_packed_ring_enqueue(struct spdk_vhost_session *vsession,
513 : struct spdk_vhost_virtqueue *virtqueue,
514 : uint16_t num_descs, uint16_t buffer_id,
515 : uint32_t length, uint16_t inflight_head)
516 : {
517 7 : struct vring_packed_desc *desc = &virtqueue->vring.desc_packed[virtqueue->last_used_idx];
518 : bool used, avail;
519 :
520 7 : SPDK_DEBUGLOG(vhost_ring,
521 : "Queue %td - RING: buffer_id=%"PRIu16"\n",
522 : virtqueue - vsession->virtqueue, buffer_id);
523 :
524 : /* When the descriptor is used, two flags in descriptor
525 : * avail flag and used flag are set to equal
526 : * and used flag value == used_wrap_counter.
527 : */
528 7 : used = !!(desc->flags & VRING_DESC_F_USED);
529 7 : avail = !!(desc->flags & VRING_DESC_F_AVAIL);
530 7 : if (spdk_unlikely(used == virtqueue->packed.used_phase && used == avail)) {
531 0 : SPDK_ERRLOG("descriptor has been used before\n");
532 0 : return;
533 : }
534 :
535 : /* In used desc addr is unused and len specifies the buffer length
536 : * that has been written to by the device.
537 : */
538 7 : desc->addr = 0;
539 7 : desc->len = length;
540 :
541 : /* This bit specifies whether any data has been written by the device */
542 7 : if (length != 0) {
543 7 : desc->flags |= VRING_DESC_F_WRITE;
544 : }
545 :
546 : /* Buffer ID is included in the last descriptor in the list.
547 : * The driver needs to keep track of the size of the list corresponding
548 : * to each buffer ID.
549 : */
550 7 : desc->id = buffer_id;
551 :
552 : /* A device MUST NOT make the descriptor used before buffer_id is
553 : * written to the descriptor.
554 : */
555 7 : spdk_smp_wmb();
556 :
557 7 : rte_vhost_set_last_inflight_io_packed(vsession->vid, virtqueue->vring_idx, inflight_head);
558 : /* To mark a desc as used, the device sets the F_USED bit in flags to match
559 : * the internal Device ring wrap counter. It also sets the F_AVAIL bit to
560 : * match the same value.
561 : */
562 7 : if (virtqueue->packed.used_phase) {
563 4 : desc->flags |= VRING_DESC_F_AVAIL_USED;
564 : } else {
565 3 : desc->flags &= ~VRING_DESC_F_AVAIL_USED;
566 : }
567 7 : rte_vhost_clr_inflight_desc_packed(vsession->vid, virtqueue->vring_idx, inflight_head);
568 :
569 7 : vhost_log_used_vring_elem(vsession, virtqueue, virtqueue->last_used_idx);
570 7 : virtqueue->last_used_idx += num_descs;
571 7 : if (virtqueue->last_used_idx >= virtqueue->vring.size) {
572 1 : virtqueue->last_used_idx -= virtqueue->vring.size;
573 1 : virtqueue->packed.used_phase = !virtqueue->packed.used_phase;
574 : }
575 :
576 7 : virtqueue->used_req_cnt++;
577 : }
578 :
579 : bool
580 12 : vhost_vq_packed_ring_is_avail(struct spdk_vhost_virtqueue *virtqueue)
581 : {
582 12 : uint16_t flags = virtqueue->vring.desc_packed[virtqueue->last_avail_idx].flags;
583 :
584 : /* To mark a desc as available, the driver sets the F_AVAIL bit in flags
585 : * to match the internal avail wrap counter. It also sets the F_USED bit to
586 : * match the inverse value but it's not mandatory.
587 : */
588 12 : return (!!(flags & VRING_DESC_F_AVAIL) == virtqueue->packed.avail_phase);
589 : }
590 :
591 : bool
592 0 : vhost_vring_packed_desc_is_wr(struct vring_packed_desc *cur_desc)
593 : {
594 0 : return (cur_desc->flags & VRING_DESC_F_WRITE) != 0;
595 : }
596 :
597 : bool
598 0 : vhost_vring_inflight_desc_is_wr(spdk_vhost_inflight_desc *cur_desc)
599 : {
600 0 : return (cur_desc->flags & VRING_DESC_F_WRITE) != 0;
601 : }
602 :
603 : int
604 0 : vhost_vring_packed_desc_get_next(struct vring_packed_desc **desc, uint16_t *req_idx,
605 : struct spdk_vhost_virtqueue *vq,
606 : struct vring_packed_desc *desc_table,
607 : uint32_t desc_table_size)
608 : {
609 0 : if (desc_table != NULL) {
610 : /* When the desc_table isn't NULL means it's indirect and we get the next
611 : * desc by req_idx and desc_table_size. The return value is NULL means
612 : * we reach the last desc of this request.
613 : */
614 0 : (*req_idx)++;
615 0 : if (*req_idx < desc_table_size) {
616 0 : *desc = &desc_table[*req_idx];
617 : } else {
618 0 : *desc = NULL;
619 : }
620 : } else {
621 : /* When the desc_table is NULL means it's non-indirect and we get the next
622 : * desc by req_idx and F_NEXT in flags. The return value is NULL means
623 : * we reach the last desc of this request. When return new desc
624 : * we update the req_idx too.
625 : */
626 0 : if (((*desc)->flags & VRING_DESC_F_NEXT) == 0) {
627 0 : *desc = NULL;
628 0 : return 0;
629 : }
630 :
631 0 : *req_idx = (*req_idx + 1) % vq->vring.size;
632 0 : *desc = &vq->vring.desc_packed[*req_idx];
633 : }
634 :
635 0 : return 0;
636 : }
637 :
638 : static int
639 6 : vhost_vring_desc_payload_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov,
640 : uint16_t *iov_index, uintptr_t payload, uint64_t remaining)
641 : {
642 : uintptr_t vva;
643 6 : uint64_t len;
644 :
645 : do {
646 7 : if (*iov_index >= SPDK_VHOST_IOVS_MAX) {
647 1 : SPDK_ERRLOG("SPDK_VHOST_IOVS_MAX(%d) reached\n", SPDK_VHOST_IOVS_MAX);
648 1 : return -1;
649 : }
650 6 : len = remaining;
651 6 : vva = (uintptr_t)rte_vhost_va_from_guest_pa(vsession->mem, payload, &len);
652 6 : if (vva == 0 || len == 0) {
653 0 : SPDK_ERRLOG("gpa_to_vva(%p) == NULL\n", (void *)payload);
654 0 : return -1;
655 : }
656 6 : iov[*iov_index].iov_base = (void *)vva;
657 6 : iov[*iov_index].iov_len = len;
658 6 : remaining -= len;
659 6 : payload += len;
660 6 : (*iov_index)++;
661 6 : } while (remaining);
662 :
663 5 : return 0;
664 : }
665 :
666 : int
667 0 : vhost_vring_packed_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov,
668 : uint16_t *iov_index, const struct vring_packed_desc *desc)
669 : {
670 0 : return vhost_vring_desc_payload_to_iov(vsession, iov, iov_index,
671 0 : desc->addr, desc->len);
672 : }
673 :
674 : int
675 0 : vhost_vring_inflight_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov,
676 : uint16_t *iov_index, const spdk_vhost_inflight_desc *desc)
677 : {
678 0 : return vhost_vring_desc_payload_to_iov(vsession, iov, iov_index,
679 0 : desc->addr, desc->len);
680 : }
681 :
682 : /* 1, Traverse the desc chain to get the buffer_id and return buffer_id as task_idx.
683 : * 2, Update the vq->last_avail_idx to point next available desc chain.
684 : * 3, Update the avail_wrap_counter if last_avail_idx overturn.
685 : */
686 : uint16_t
687 7 : vhost_vring_packed_desc_get_buffer_id(struct spdk_vhost_virtqueue *vq, uint16_t req_idx,
688 : uint16_t *num_descs)
689 : {
690 : struct vring_packed_desc *desc;
691 7 : uint16_t desc_head = req_idx;
692 :
693 7 : *num_descs = 1;
694 :
695 7 : desc = &vq->vring.desc_packed[req_idx];
696 7 : if (!vhost_vring_packed_desc_is_indirect(desc)) {
697 7 : while ((desc->flags & VRING_DESC_F_NEXT) != 0) {
698 0 : req_idx = (req_idx + 1) % vq->vring.size;
699 0 : desc = &vq->vring.desc_packed[req_idx];
700 0 : (*num_descs)++;
701 : }
702 : }
703 :
704 : /* Queue Size doesn't have to be a power of 2
705 : * Device maintains last_avail_idx so we can make sure
706 : * the value is valid(0 ~ vring.size - 1)
707 : */
708 7 : vq->last_avail_idx = (req_idx + 1) % vq->vring.size;
709 7 : if (vq->last_avail_idx < desc_head) {
710 1 : vq->packed.avail_phase = !vq->packed.avail_phase;
711 : }
712 :
713 7 : return desc->id;
714 : }
715 :
716 : int
717 0 : vhost_vring_desc_get_next(struct vring_desc **desc,
718 : struct vring_desc *desc_table, uint32_t desc_table_size)
719 : {
720 0 : struct vring_desc *old_desc = *desc;
721 : uint16_t next_idx;
722 :
723 0 : if ((old_desc->flags & VRING_DESC_F_NEXT) == 0) {
724 0 : *desc = NULL;
725 0 : return 0;
726 : }
727 :
728 0 : next_idx = old_desc->next;
729 0 : if (spdk_unlikely(next_idx >= desc_table_size)) {
730 0 : *desc = NULL;
731 0 : return -1;
732 : }
733 :
734 0 : *desc = &desc_table[next_idx];
735 0 : return 0;
736 : }
737 :
738 : int
739 6 : vhost_vring_desc_to_iov(struct spdk_vhost_session *vsession, struct iovec *iov,
740 : uint16_t *iov_index, const struct vring_desc *desc)
741 : {
742 12 : return vhost_vring_desc_payload_to_iov(vsession, iov, iov_index,
743 6 : desc->addr, desc->len);
744 : }
745 :
746 : static inline void
747 0 : vhost_session_mem_region_calc(uint64_t *previous_start, uint64_t *start, uint64_t *end,
748 : uint64_t *len, struct rte_vhost_mem_region *region)
749 : {
750 0 : *start = FLOOR_2MB(region->mmap_addr);
751 0 : *end = CEIL_2MB(region->mmap_addr + region->mmap_size);
752 0 : if (*start == *previous_start) {
753 0 : *start += (size_t) VALUE_2MB;
754 : }
755 0 : *previous_start = *start;
756 0 : *len = *end - *start;
757 0 : }
758 :
759 : void
760 0 : vhost_session_mem_register(struct rte_vhost_memory *mem)
761 : {
762 0 : uint64_t start, end, len;
763 : uint32_t i;
764 0 : uint64_t previous_start = UINT64_MAX;
765 :
766 :
767 0 : for (i = 0; i < mem->nregions; i++) {
768 0 : vhost_session_mem_region_calc(&previous_start, &start, &end, &len, &mem->regions[i]);
769 0 : SPDK_INFOLOG(vhost, "Registering VM memory for vtophys translation - 0x%jx len:0x%jx\n",
770 : start, len);
771 :
772 0 : if (spdk_mem_register((void *)start, len) != 0) {
773 0 : SPDK_WARNLOG("Failed to register memory region %"PRIu32". Future vtophys translation might fail.\n",
774 : i);
775 0 : continue;
776 : }
777 : }
778 0 : }
779 :
780 : void
781 0 : vhost_session_mem_unregister(struct rte_vhost_memory *mem)
782 : {
783 0 : uint64_t start, end, len;
784 : uint32_t i;
785 0 : uint64_t previous_start = UINT64_MAX;
786 :
787 0 : for (i = 0; i < mem->nregions; i++) {
788 0 : vhost_session_mem_region_calc(&previous_start, &start, &end, &len, &mem->regions[i]);
789 0 : if (spdk_vtophys((void *) start, NULL) == SPDK_VTOPHYS_ERROR) {
790 0 : continue; /* region has not been registered */
791 : }
792 :
793 0 : if (spdk_mem_unregister((void *)start, len) != 0) {
794 0 : assert(false);
795 : }
796 : }
797 0 : }
798 :
799 : static bool
800 0 : vhost_memory_changed(struct rte_vhost_memory *new,
801 : struct rte_vhost_memory *old)
802 : {
803 : uint32_t i;
804 :
805 0 : if (new->nregions != old->nregions) {
806 0 : return true;
807 : }
808 :
809 0 : for (i = 0; i < new->nregions; ++i) {
810 0 : struct rte_vhost_mem_region *new_r = &new->regions[i];
811 0 : struct rte_vhost_mem_region *old_r = &old->regions[i];
812 :
813 0 : if (new_r->guest_phys_addr != old_r->guest_phys_addr) {
814 0 : return true;
815 : }
816 0 : if (new_r->size != old_r->size) {
817 0 : return true;
818 : }
819 0 : if (new_r->guest_user_addr != old_r->guest_user_addr) {
820 0 : return true;
821 : }
822 0 : if (new_r->mmap_addr != old_r->mmap_addr) {
823 0 : return true;
824 : }
825 0 : if (new_r->fd != old_r->fd) {
826 0 : return true;
827 : }
828 : }
829 :
830 0 : return false;
831 : }
832 :
833 : static int
834 0 : vhost_register_memtable_if_required(struct spdk_vhost_session *vsession, int vid)
835 : {
836 0 : struct rte_vhost_memory *new_mem;
837 :
838 0 : if (vhost_get_mem_table(vid, &new_mem) != 0) {
839 0 : SPDK_ERRLOG("vhost device %d: Failed to get guest memory table\n", vid);
840 0 : return -1;
841 : }
842 :
843 0 : if (vsession->mem == NULL) {
844 0 : SPDK_INFOLOG(vhost, "Start to set memtable\n");
845 0 : vsession->mem = new_mem;
846 0 : vhost_session_mem_register(vsession->mem);
847 0 : return 0;
848 : }
849 :
850 0 : if (vhost_memory_changed(new_mem, vsession->mem)) {
851 0 : SPDK_INFOLOG(vhost, "Memtable is changed\n");
852 0 : vhost_session_mem_unregister(vsession->mem);
853 0 : free(vsession->mem);
854 :
855 0 : vsession->mem = new_mem;
856 0 : vhost_session_mem_register(vsession->mem);
857 0 : return 0;
858 :
859 : }
860 :
861 0 : SPDK_INFOLOG(vhost, "Memtable is unchanged\n");
862 0 : free(new_mem);
863 0 : return 0;
864 : }
865 :
866 : static int
867 0 : _stop_session(struct spdk_vhost_session *vsession)
868 : {
869 : struct spdk_vhost_virtqueue *q;
870 : int rc;
871 : uint16_t i;
872 :
873 0 : rc = vhost_user_wait_for_session_stop(vsession, 3, "stop session");
874 0 : if (rc != 0) {
875 0 : SPDK_ERRLOG("Couldn't stop device with vid %d.\n", vsession->vid);
876 0 : return rc;
877 : }
878 :
879 0 : for (i = 0; i < vsession->max_queues; i++) {
880 0 : q = &vsession->virtqueue[i];
881 :
882 : /* vring.desc and vring.desc_packed are in a union struct
883 : * so q->vring.desc can replace q->vring.desc_packed.
884 : */
885 0 : if (q->vring.desc == NULL) {
886 0 : continue;
887 : }
888 :
889 : /* Packed virtqueues support up to 2^15 entries each
890 : * so left one bit can be used as wrap counter.
891 : */
892 0 : if (q->packed.packed_ring) {
893 0 : q->last_avail_idx = q->last_avail_idx |
894 0 : ((uint16_t)q->packed.avail_phase << 15);
895 0 : q->last_used_idx = q->last_used_idx |
896 0 : ((uint16_t)q->packed.used_phase << 15);
897 : }
898 :
899 0 : rte_vhost_set_vring_base(vsession->vid, i, q->last_avail_idx, q->last_used_idx);
900 0 : q->vring.desc = NULL;
901 : }
902 0 : vsession->max_queues = 0;
903 :
904 0 : return 0;
905 : }
906 :
907 : static int
908 0 : new_connection(int vid)
909 : {
910 : struct spdk_vhost_dev *vdev;
911 : struct spdk_vhost_user_dev *user_dev;
912 0 : struct spdk_vhost_session *vsession;
913 : size_t dev_dirname_len;
914 0 : char ifname[PATH_MAX];
915 : char *ctrlr_name;
916 :
917 0 : if (rte_vhost_get_ifname(vid, ifname, PATH_MAX) < 0) {
918 0 : SPDK_ERRLOG("Couldn't get a valid ifname for device with vid %d\n", vid);
919 0 : return -1;
920 : }
921 :
922 0 : ctrlr_name = &ifname[0];
923 0 : dev_dirname_len = strlen(g_vhost_user_dev_dirname);
924 0 : if (strncmp(ctrlr_name, g_vhost_user_dev_dirname, dev_dirname_len) == 0) {
925 0 : ctrlr_name += dev_dirname_len;
926 : }
927 :
928 0 : spdk_vhost_lock();
929 0 : vdev = spdk_vhost_dev_find(ctrlr_name);
930 0 : if (vdev == NULL) {
931 0 : SPDK_ERRLOG("Couldn't find device with vid %d to create connection for.\n", vid);
932 0 : spdk_vhost_unlock();
933 0 : return -1;
934 : }
935 0 : spdk_vhost_unlock();
936 :
937 0 : user_dev = to_user_dev(vdev);
938 0 : pthread_mutex_lock(&user_dev->lock);
939 0 : if (user_dev->registered == false) {
940 0 : SPDK_ERRLOG("Device %s is unregistered\n", ctrlr_name);
941 0 : pthread_mutex_unlock(&user_dev->lock);
942 0 : return -1;
943 : }
944 :
945 : /* We expect sessions inside user_dev->vsessions to be sorted in ascending
946 : * order in regard of vsession->id. For now we always set id = vsessions_num++
947 : * and append each session to the very end of the vsessions list.
948 : * This is required for vhost_user_dev_foreach_session() to work.
949 : */
950 0 : if (user_dev->vsessions_num == UINT_MAX) {
951 0 : pthread_mutex_unlock(&user_dev->lock);
952 0 : assert(false);
953 : return -EINVAL;
954 : }
955 :
956 0 : if (posix_memalign((void **)&vsession, SPDK_CACHE_LINE_SIZE, sizeof(*vsession) +
957 0 : user_dev->user_backend->session_ctx_size)) {
958 0 : SPDK_ERRLOG("vsession alloc failed\n");
959 0 : pthread_mutex_unlock(&user_dev->lock);
960 0 : return -1;
961 : }
962 0 : memset(vsession, 0, sizeof(*vsession) + user_dev->user_backend->session_ctx_size);
963 :
964 0 : vsession->vdev = vdev;
965 0 : vsession->vid = vid;
966 0 : vsession->id = user_dev->vsessions_num++;
967 0 : vsession->name = spdk_sprintf_alloc("%ss%u", vdev->name, vsession->vid);
968 0 : if (vsession->name == NULL) {
969 0 : SPDK_ERRLOG("vsession alloc failed\n");
970 0 : free(vsession);
971 0 : pthread_mutex_unlock(&user_dev->lock);
972 0 : return -1;
973 : }
974 0 : vsession->started = false;
975 0 : vsession->starting = false;
976 0 : vsession->next_stats_check_time = 0;
977 0 : vsession->stats_check_interval = SPDK_VHOST_STATS_CHECK_INTERVAL_MS *
978 0 : spdk_get_ticks_hz() / 1000UL;
979 0 : TAILQ_INSERT_TAIL(&user_dev->vsessions, vsession, tailq);
980 0 : vhost_session_install_rte_compat_hooks(vsession);
981 0 : pthread_mutex_unlock(&user_dev->lock);
982 :
983 0 : return 0;
984 : }
985 :
986 : static void
987 0 : vhost_user_session_start(void *arg1)
988 : {
989 0 : struct spdk_vhost_session *vsession = arg1;
990 0 : struct spdk_vhost_dev *vdev = vsession->vdev;
991 0 : struct spdk_vhost_user_dev *user_dev = to_user_dev(vsession->vdev);
992 : const struct spdk_vhost_user_dev_backend *backend;
993 : int rc;
994 :
995 0 : SPDK_INFOLOG(vhost, "Starting new session for device %s with vid %d\n", vdev->name, vsession->vid);
996 0 : pthread_mutex_lock(&user_dev->lock);
997 0 : vsession->starting = false;
998 0 : backend = user_dev->user_backend;
999 0 : rc = backend->start_session(vdev, vsession, NULL);
1000 0 : if (rc == 0) {
1001 0 : vsession->started = true;
1002 : }
1003 0 : pthread_mutex_unlock(&user_dev->lock);
1004 0 : }
1005 :
1006 : static int
1007 0 : set_device_vq_callfd(struct spdk_vhost_session *vsession, uint16_t qid)
1008 : {
1009 : struct spdk_vhost_virtqueue *q;
1010 :
1011 0 : if (qid >= SPDK_VHOST_MAX_VQUEUES) {
1012 0 : return -EINVAL;
1013 : }
1014 :
1015 0 : q = &vsession->virtqueue[qid];
1016 : /* vq isn't enabled yet */
1017 0 : if (q->vring_idx != qid) {
1018 0 : return 0;
1019 : }
1020 :
1021 : /* vring.desc and vring.desc_packed are in a union struct
1022 : * so q->vring.desc can replace q->vring.desc_packed.
1023 : */
1024 0 : if (q->vring.desc == NULL || q->vring.size == 0) {
1025 0 : return 0;
1026 : }
1027 :
1028 : /*
1029 : * Not sure right now but this look like some kind of QEMU bug and guest IO
1030 : * might be frozed without kicking all queues after live-migration. This look like
1031 : * the previous vhost instance failed to effectively deliver all interrupts before
1032 : * the GET_VRING_BASE message. This shouldn't harm guest since spurious interrupts
1033 : * should be ignored by guest virtio driver.
1034 : *
1035 : * Tested on QEMU 2.10.91 and 2.11.50.
1036 : *
1037 : * Make sure a successful call of
1038 : * `rte_vhost_vring_call` will happen
1039 : * after starting the device.
1040 : */
1041 0 : q->used_req_cnt += 1;
1042 :
1043 0 : return 0;
1044 : }
1045 :
1046 : static int
1047 0 : enable_device_vq(struct spdk_vhost_session *vsession, uint16_t qid)
1048 : {
1049 : struct spdk_vhost_virtqueue *q;
1050 : bool packed_ring;
1051 : const struct spdk_vhost_user_dev_backend *backend;
1052 : int rc;
1053 :
1054 0 : if (qid >= SPDK_VHOST_MAX_VQUEUES) {
1055 0 : return -EINVAL;
1056 : }
1057 :
1058 0 : q = &vsession->virtqueue[qid];
1059 0 : memset(q, 0, sizeof(*q));
1060 0 : packed_ring = ((vsession->negotiated_features & (1ULL << VIRTIO_F_RING_PACKED)) != 0);
1061 :
1062 0 : q->vsession = vsession;
1063 0 : q->vring_idx = -1;
1064 0 : if (rte_vhost_get_vhost_vring(vsession->vid, qid, &q->vring)) {
1065 0 : return 0;
1066 : }
1067 0 : q->vring_idx = qid;
1068 0 : rte_vhost_get_vhost_ring_inflight(vsession->vid, qid, &q->vring_inflight);
1069 :
1070 : /* vring.desc and vring.desc_packed are in a union struct
1071 : * so q->vring.desc can replace q->vring.desc_packed.
1072 : */
1073 0 : if (q->vring.desc == NULL || q->vring.size == 0) {
1074 0 : return 0;
1075 : }
1076 :
1077 0 : if (rte_vhost_get_vring_base(vsession->vid, qid, &q->last_avail_idx, &q->last_used_idx)) {
1078 0 : q->vring.desc = NULL;
1079 0 : return 0;
1080 : }
1081 :
1082 0 : backend = to_user_dev(vsession->vdev)->user_backend;
1083 0 : rc = backend->alloc_vq_tasks(vsession, qid);
1084 0 : if (rc) {
1085 0 : return rc;
1086 : }
1087 :
1088 : /*
1089 : * This shouldn't harm guest since spurious interrupts should be ignored by
1090 : * guest virtio driver.
1091 : *
1092 : * Make sure a successful call of `rte_vhost_vring_call` will happen after
1093 : * restarting the device.
1094 : */
1095 0 : if (vsession->needs_restart) {
1096 0 : q->used_req_cnt += 1;
1097 : }
1098 :
1099 0 : if (packed_ring) {
1100 : /* Since packed ring flag is already negociated between SPDK and VM, VM doesn't
1101 : * restore `last_avail_idx` and `last_used_idx` for packed ring, so use the
1102 : * inflight mem to restore the `last_avail_idx` and `last_used_idx`.
1103 : */
1104 0 : rte_vhost_get_vring_base_from_inflight(vsession->vid, qid, &q->last_avail_idx,
1105 : &q->last_used_idx);
1106 :
1107 : /* Packed virtqueues support up to 2^15 entries each
1108 : * so left one bit can be used as wrap counter.
1109 : */
1110 0 : q->packed.avail_phase = q->last_avail_idx >> 15;
1111 0 : q->last_avail_idx = q->last_avail_idx & 0x7FFF;
1112 0 : q->packed.used_phase = q->last_used_idx >> 15;
1113 0 : q->last_used_idx = q->last_used_idx & 0x7FFF;
1114 :
1115 0 : if (!spdk_interrupt_mode_is_enabled()) {
1116 : /* Disable I/O submission notifications, we'll be polling. */
1117 0 : q->vring.device_event->flags = VRING_PACKED_EVENT_FLAG_DISABLE;
1118 : } else {
1119 : /* Enable I/O submission notifications, we'll be interrupting. */
1120 0 : q->vring.device_event->flags = VRING_PACKED_EVENT_FLAG_ENABLE;
1121 : }
1122 : } else {
1123 0 : if (!spdk_interrupt_mode_is_enabled()) {
1124 : /* Disable I/O submission notifications, we'll be polling. */
1125 0 : q->vring.used->flags = VRING_USED_F_NO_NOTIFY;
1126 : } else {
1127 : /* Enable I/O submission notifications, we'll be interrupting. */
1128 0 : q->vring.used->flags = 0;
1129 : }
1130 : }
1131 :
1132 0 : if (spdk_interrupt_mode_is_enabled() && backend->register_vq_interrupt) {
1133 0 : backend->register_vq_interrupt(vsession, q);
1134 : }
1135 :
1136 0 : q->packed.packed_ring = packed_ring;
1137 0 : vsession->max_queues = spdk_max(vsession->max_queues, qid + 1);
1138 :
1139 0 : return 0;
1140 : }
1141 :
1142 : static int
1143 0 : start_device(int vid)
1144 : {
1145 : struct spdk_vhost_dev *vdev;
1146 : struct spdk_vhost_session *vsession;
1147 : struct spdk_vhost_user_dev *user_dev;
1148 0 : int rc = 0;
1149 :
1150 0 : vsession = vhost_session_find_by_vid(vid);
1151 0 : if (vsession == NULL) {
1152 0 : SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid);
1153 0 : return -1;
1154 : }
1155 0 : vdev = vsession->vdev;
1156 0 : user_dev = to_user_dev(vdev);
1157 :
1158 0 : pthread_mutex_lock(&user_dev->lock);
1159 0 : if (vsession->started) {
1160 : /* already started, nothing to do */
1161 0 : goto out;
1162 : }
1163 :
1164 0 : if (!vsession->mem) {
1165 0 : rc = -1;
1166 0 : SPDK_ERRLOG("Session %s doesn't set memory table yet\n", vsession->name);
1167 0 : goto out;
1168 : }
1169 :
1170 0 : vsession->starting = true;
1171 0 : SPDK_INFOLOG(vhost, "Session %s is scheduled to start\n", vsession->name);
1172 0 : vhost_user_session_set_coalescing(vdev, vsession, NULL);
1173 0 : spdk_thread_send_msg(vdev->thread, vhost_user_session_start, vsession);
1174 :
1175 0 : out:
1176 0 : pthread_mutex_unlock(&user_dev->lock);
1177 0 : return rc;
1178 : }
1179 :
1180 : static void
1181 0 : stop_device(int vid)
1182 : {
1183 : struct spdk_vhost_session *vsession;
1184 : struct spdk_vhost_user_dev *user_dev;
1185 :
1186 0 : vsession = vhost_session_find_by_vid(vid);
1187 0 : if (vsession == NULL) {
1188 0 : SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid);
1189 0 : return;
1190 : }
1191 0 : user_dev = to_user_dev(vsession->vdev);
1192 :
1193 0 : pthread_mutex_lock(&user_dev->lock);
1194 0 : if (!vsession->started && !vsession->starting) {
1195 0 : pthread_mutex_unlock(&user_dev->lock);
1196 : /* already stopped, nothing to do */
1197 0 : return;
1198 : }
1199 :
1200 0 : _stop_session(vsession);
1201 0 : pthread_mutex_unlock(&user_dev->lock);
1202 : }
1203 :
1204 : static void
1205 0 : destroy_connection(int vid)
1206 : {
1207 : struct spdk_vhost_session *vsession;
1208 : struct spdk_vhost_user_dev *user_dev;
1209 :
1210 0 : vsession = vhost_session_find_by_vid(vid);
1211 0 : if (vsession == NULL) {
1212 0 : SPDK_ERRLOG("Couldn't find session with vid %d.\n", vid);
1213 0 : return;
1214 : }
1215 0 : user_dev = to_user_dev(vsession->vdev);
1216 :
1217 0 : pthread_mutex_lock(&user_dev->lock);
1218 0 : if (vsession->started || vsession->starting) {
1219 0 : if (_stop_session(vsession) != 0) {
1220 0 : pthread_mutex_unlock(&user_dev->lock);
1221 0 : return;
1222 : }
1223 : }
1224 :
1225 0 : if (vsession->mem) {
1226 0 : vhost_session_mem_unregister(vsession->mem);
1227 0 : free(vsession->mem);
1228 : }
1229 :
1230 0 : TAILQ_REMOVE(&to_user_dev(vsession->vdev)->vsessions, vsession, tailq);
1231 0 : free(vsession->name);
1232 0 : free(vsession);
1233 0 : pthread_mutex_unlock(&user_dev->lock);
1234 : }
1235 :
1236 : static const struct rte_vhost_device_ops g_spdk_vhost_ops = {
1237 : .new_device = start_device,
1238 : .destroy_device = stop_device,
1239 : .new_connection = new_connection,
1240 : .destroy_connection = destroy_connection,
1241 : };
1242 :
1243 : static struct spdk_vhost_session *
1244 0 : vhost_session_find_by_id(struct spdk_vhost_dev *vdev, unsigned id)
1245 : {
1246 : struct spdk_vhost_session *vsession;
1247 :
1248 0 : TAILQ_FOREACH(vsession, &to_user_dev(vdev)->vsessions, tailq) {
1249 0 : if (vsession->id == id) {
1250 0 : return vsession;
1251 : }
1252 : }
1253 :
1254 0 : return NULL;
1255 : }
1256 :
1257 : struct spdk_vhost_session *
1258 2 : vhost_session_find_by_vid(int vid)
1259 : {
1260 : struct spdk_vhost_dev *vdev;
1261 : struct spdk_vhost_session *vsession;
1262 : struct spdk_vhost_user_dev *user_dev;
1263 :
1264 2 : spdk_vhost_lock();
1265 3 : for (vdev = spdk_vhost_dev_next(NULL); vdev != NULL;
1266 1 : vdev = spdk_vhost_dev_next(vdev)) {
1267 2 : user_dev = to_user_dev(vdev);
1268 :
1269 2 : pthread_mutex_lock(&user_dev->lock);
1270 3 : TAILQ_FOREACH(vsession, &user_dev->vsessions, tailq) {
1271 2 : if (vsession->vid == vid) {
1272 1 : pthread_mutex_unlock(&user_dev->lock);
1273 1 : spdk_vhost_unlock();
1274 1 : return vsession;
1275 : }
1276 : }
1277 1 : pthread_mutex_unlock(&user_dev->lock);
1278 : }
1279 1 : spdk_vhost_unlock();
1280 :
1281 1 : return NULL;
1282 : }
1283 :
1284 : static void
1285 0 : wait_for_semaphore(int timeout_sec, const char *errmsg)
1286 : {
1287 0 : struct timespec timeout;
1288 : int rc;
1289 :
1290 0 : clock_gettime(CLOCK_REALTIME, &timeout);
1291 0 : timeout.tv_sec += timeout_sec;
1292 0 : rc = sem_timedwait(&g_dpdk_sem, &timeout);
1293 0 : if (rc != 0) {
1294 0 : SPDK_ERRLOG("Timeout waiting for event: %s.\n", errmsg);
1295 0 : sem_wait(&g_dpdk_sem);
1296 : }
1297 0 : }
1298 :
1299 : void
1300 0 : vhost_user_session_stop_done(struct spdk_vhost_session *vsession, int response)
1301 : {
1302 0 : if (response == 0) {
1303 0 : vsession->started = false;
1304 : }
1305 :
1306 0 : g_dpdk_response = response;
1307 0 : sem_post(&g_dpdk_sem);
1308 0 : }
1309 :
1310 : static void
1311 0 : vhost_user_session_stop_event(void *arg1)
1312 : {
1313 0 : struct vhost_session_fn_ctx *ctx = arg1;
1314 0 : struct spdk_vhost_dev *vdev = ctx->vdev;
1315 0 : struct spdk_vhost_user_dev *user_dev = to_user_dev(vdev);
1316 : struct spdk_vhost_session *vsession;
1317 :
1318 0 : if (pthread_mutex_trylock(&user_dev->lock) != 0) {
1319 0 : spdk_thread_send_msg(spdk_get_thread(), vhost_user_session_stop_event, arg1);
1320 0 : return;
1321 : }
1322 :
1323 0 : vsession = vhost_session_find_by_id(vdev, ctx->vsession_id);
1324 0 : user_dev->user_backend->stop_session(vdev, vsession, NULL);
1325 0 : pthread_mutex_unlock(&user_dev->lock);
1326 : }
1327 :
1328 : static int
1329 0 : vhost_user_wait_for_session_stop(struct spdk_vhost_session *vsession,
1330 : unsigned timeout_sec, const char *errmsg)
1331 : {
1332 0 : struct vhost_session_fn_ctx ev_ctx = {0};
1333 0 : struct spdk_vhost_dev *vdev = vsession->vdev;
1334 0 : struct spdk_vhost_user_dev *user_dev = to_user_dev(vdev);
1335 :
1336 0 : ev_ctx.vdev = vdev;
1337 0 : ev_ctx.vsession_id = vsession->id;
1338 :
1339 0 : spdk_thread_send_msg(vdev->thread, vhost_user_session_stop_event, &ev_ctx);
1340 :
1341 0 : pthread_mutex_unlock(&user_dev->lock);
1342 0 : wait_for_semaphore(timeout_sec, errmsg);
1343 0 : pthread_mutex_lock(&user_dev->lock);
1344 :
1345 0 : return g_dpdk_response;
1346 : }
1347 :
1348 : static void
1349 0 : foreach_session_finish_cb(void *arg1)
1350 : {
1351 0 : struct vhost_session_fn_ctx *ev_ctx = arg1;
1352 0 : struct spdk_vhost_dev *vdev = ev_ctx->vdev;
1353 0 : struct spdk_vhost_user_dev *user_dev = to_user_dev(vdev);
1354 :
1355 0 : if (pthread_mutex_trylock(&user_dev->lock) != 0) {
1356 0 : spdk_thread_send_msg(spdk_get_thread(),
1357 : foreach_session_finish_cb, arg1);
1358 0 : return;
1359 : }
1360 :
1361 0 : assert(user_dev->pending_async_op_num > 0);
1362 0 : user_dev->pending_async_op_num--;
1363 0 : if (ev_ctx->cpl_fn != NULL) {
1364 0 : ev_ctx->cpl_fn(vdev, ev_ctx->user_ctx);
1365 : }
1366 :
1367 0 : pthread_mutex_unlock(&user_dev->lock);
1368 0 : free(ev_ctx);
1369 : }
1370 :
1371 : static void
1372 0 : foreach_session(void *arg1)
1373 : {
1374 0 : struct vhost_session_fn_ctx *ev_ctx = arg1;
1375 0 : struct spdk_vhost_dev *vdev = ev_ctx->vdev;
1376 0 : struct spdk_vhost_user_dev *user_dev = to_user_dev(vdev);
1377 : struct spdk_vhost_session *vsession;
1378 : int rc;
1379 :
1380 0 : if (pthread_mutex_trylock(&user_dev->lock) != 0) {
1381 0 : spdk_thread_send_msg(spdk_get_thread(), foreach_session, arg1);
1382 0 : return;
1383 : }
1384 :
1385 0 : TAILQ_FOREACH(vsession, &user_dev->vsessions, tailq) {
1386 0 : rc = ev_ctx->cb_fn(vdev, vsession, ev_ctx->user_ctx);
1387 0 : if (rc < 0) {
1388 0 : goto out;
1389 : }
1390 : }
1391 :
1392 0 : out:
1393 0 : pthread_mutex_unlock(&user_dev->lock);
1394 0 : spdk_thread_send_msg(g_vhost_user_init_thread, foreach_session_finish_cb, arg1);
1395 : }
1396 :
1397 : void
1398 0 : vhost_user_dev_foreach_session(struct spdk_vhost_dev *vdev,
1399 : spdk_vhost_session_fn fn,
1400 : spdk_vhost_dev_fn cpl_fn,
1401 : void *arg)
1402 : {
1403 : struct vhost_session_fn_ctx *ev_ctx;
1404 0 : struct spdk_vhost_user_dev *user_dev = to_user_dev(vdev);
1405 :
1406 0 : ev_ctx = calloc(1, sizeof(*ev_ctx));
1407 0 : if (ev_ctx == NULL) {
1408 0 : SPDK_ERRLOG("Failed to alloc vhost event.\n");
1409 0 : assert(false);
1410 : return;
1411 : }
1412 :
1413 0 : ev_ctx->vdev = vdev;
1414 0 : ev_ctx->cb_fn = fn;
1415 0 : ev_ctx->cpl_fn = cpl_fn;
1416 0 : ev_ctx->user_ctx = arg;
1417 :
1418 0 : pthread_mutex_lock(&user_dev->lock);
1419 0 : assert(user_dev->pending_async_op_num < UINT32_MAX);
1420 0 : user_dev->pending_async_op_num++;
1421 0 : pthread_mutex_unlock(&user_dev->lock);
1422 :
1423 0 : spdk_thread_send_msg(vdev->thread, foreach_session, ev_ctx);
1424 : }
1425 :
1426 : void
1427 0 : vhost_user_session_set_interrupt_mode(struct spdk_vhost_session *vsession, bool interrupt_mode)
1428 : {
1429 : uint16_t i;
1430 0 : int rc = 0;
1431 :
1432 0 : for (i = 0; i < vsession->max_queues; i++) {
1433 0 : struct spdk_vhost_virtqueue *q = &vsession->virtqueue[i];
1434 0 : uint64_t num_events = 1;
1435 :
1436 : /* vring.desc and vring.desc_packed are in a union struct
1437 : * so q->vring.desc can replace q->vring.desc_packed.
1438 : */
1439 0 : if (q->vring.desc == NULL || q->vring.size == 0) {
1440 0 : continue;
1441 : }
1442 :
1443 0 : if (interrupt_mode) {
1444 :
1445 : /* In case of race condition, always kick vring when switch to intr */
1446 0 : rc = write(q->vring.kickfd, &num_events, sizeof(num_events));
1447 0 : if (rc < 0) {
1448 0 : SPDK_ERRLOG("failed to kick vring: %s.\n", spdk_strerror(errno));
1449 : }
1450 :
1451 0 : vsession->interrupt_mode = true;
1452 : } else {
1453 :
1454 0 : vsession->interrupt_mode = false;
1455 : }
1456 : }
1457 0 : }
1458 :
1459 : static int
1460 0 : extern_vhost_pre_msg_handler(int vid, void *_msg)
1461 : {
1462 0 : struct vhost_user_msg *msg = _msg;
1463 : struct spdk_vhost_session *vsession;
1464 : struct spdk_vhost_user_dev *user_dev;
1465 :
1466 0 : vsession = vhost_session_find_by_vid(vid);
1467 0 : if (vsession == NULL) {
1468 0 : SPDK_ERRLOG("Received a message to uninitialized session (vid %d).\n", vid);
1469 0 : assert(false);
1470 : return RTE_VHOST_MSG_RESULT_ERR;
1471 : }
1472 0 : user_dev = to_user_dev(vsession->vdev);
1473 :
1474 0 : switch (msg->request) {
1475 0 : case VHOST_USER_GET_VRING_BASE:
1476 0 : pthread_mutex_lock(&user_dev->lock);
1477 0 : if (vsession->started || vsession->starting) {
1478 0 : pthread_mutex_unlock(&user_dev->lock);
1479 0 : g_spdk_vhost_ops.destroy_device(vid);
1480 0 : break;
1481 : }
1482 0 : pthread_mutex_unlock(&user_dev->lock);
1483 0 : break;
1484 0 : case VHOST_USER_SET_MEM_TABLE:
1485 0 : pthread_mutex_lock(&user_dev->lock);
1486 0 : if (vsession->started || vsession->starting) {
1487 0 : vsession->original_max_queues = vsession->max_queues;
1488 0 : pthread_mutex_unlock(&user_dev->lock);
1489 0 : g_spdk_vhost_ops.destroy_device(vid);
1490 0 : vsession->needs_restart = true;
1491 0 : break;
1492 : }
1493 0 : pthread_mutex_unlock(&user_dev->lock);
1494 0 : break;
1495 0 : case VHOST_USER_GET_CONFIG: {
1496 0 : int rc = 0;
1497 :
1498 0 : pthread_mutex_lock(&user_dev->lock);
1499 0 : if (vsession->vdev->backend->vhost_get_config) {
1500 0 : rc = vsession->vdev->backend->vhost_get_config(vsession->vdev,
1501 0 : msg->payload.cfg.region, msg->payload.cfg.size);
1502 0 : if (rc != 0) {
1503 0 : msg->size = 0;
1504 : }
1505 : }
1506 0 : pthread_mutex_unlock(&user_dev->lock);
1507 :
1508 0 : return RTE_VHOST_MSG_RESULT_REPLY;
1509 : }
1510 0 : case VHOST_USER_SET_CONFIG: {
1511 0 : int rc = 0;
1512 :
1513 0 : pthread_mutex_lock(&user_dev->lock);
1514 0 : if (vsession->vdev->backend->vhost_set_config) {
1515 0 : rc = vsession->vdev->backend->vhost_set_config(vsession->vdev,
1516 0 : msg->payload.cfg.region, msg->payload.cfg.offset,
1517 : msg->payload.cfg.size, msg->payload.cfg.flags);
1518 : }
1519 0 : pthread_mutex_unlock(&user_dev->lock);
1520 :
1521 0 : return rc == 0 ? RTE_VHOST_MSG_RESULT_OK : RTE_VHOST_MSG_RESULT_ERR;
1522 : }
1523 0 : default:
1524 0 : break;
1525 : }
1526 :
1527 0 : return RTE_VHOST_MSG_RESULT_NOT_HANDLED;
1528 : }
1529 :
1530 : static int
1531 0 : extern_vhost_post_msg_handler(int vid, void *_msg)
1532 : {
1533 0 : struct vhost_user_msg *msg = _msg;
1534 : struct spdk_vhost_session *vsession;
1535 : struct spdk_vhost_user_dev *user_dev;
1536 : uint16_t qid;
1537 : int rc;
1538 :
1539 0 : vsession = vhost_session_find_by_vid(vid);
1540 0 : if (vsession == NULL) {
1541 0 : SPDK_ERRLOG("Received a message to uninitialized session (vid %d).\n", vid);
1542 0 : assert(false);
1543 : return RTE_VHOST_MSG_RESULT_ERR;
1544 : }
1545 0 : user_dev = to_user_dev(vsession->vdev);
1546 :
1547 0 : switch (msg->request) {
1548 0 : case VHOST_USER_SET_FEATURES:
1549 0 : rc = vhost_get_negotiated_features(vid, &vsession->negotiated_features);
1550 0 : if (rc) {
1551 0 : SPDK_ERRLOG("vhost device %d: Failed to get negotiated driver features\n", vid);
1552 0 : return RTE_VHOST_MSG_RESULT_ERR;
1553 : }
1554 0 : break;
1555 0 : case VHOST_USER_SET_VRING_CALL:
1556 0 : qid = (uint16_t)msg->payload.u64;
1557 0 : rc = set_device_vq_callfd(vsession, qid);
1558 0 : if (rc) {
1559 0 : return RTE_VHOST_MSG_RESULT_ERR;
1560 : }
1561 0 : break;
1562 0 : case VHOST_USER_SET_VRING_KICK:
1563 0 : qid = (uint16_t)msg->payload.u64;
1564 0 : rc = enable_device_vq(vsession, qid);
1565 0 : if (rc) {
1566 0 : return RTE_VHOST_MSG_RESULT_ERR;
1567 : }
1568 :
1569 : /* vhost-user spec tells us to start polling a queue after receiving
1570 : * its SET_VRING_KICK message. Let's do it!
1571 : */
1572 0 : pthread_mutex_lock(&user_dev->lock);
1573 0 : if (!vsession->started && !vsession->starting) {
1574 0 : pthread_mutex_unlock(&user_dev->lock);
1575 0 : g_spdk_vhost_ops.new_device(vid);
1576 0 : return RTE_VHOST_MSG_RESULT_NOT_HANDLED;
1577 : }
1578 0 : pthread_mutex_unlock(&user_dev->lock);
1579 0 : break;
1580 0 : case VHOST_USER_SET_MEM_TABLE:
1581 0 : vhost_register_memtable_if_required(vsession, vid);
1582 0 : pthread_mutex_lock(&user_dev->lock);
1583 0 : if (vsession->needs_restart) {
1584 0 : pthread_mutex_unlock(&user_dev->lock);
1585 0 : for (qid = 0; qid < vsession->original_max_queues; qid++) {
1586 0 : enable_device_vq(vsession, qid);
1587 : }
1588 0 : vsession->original_max_queues = 0;
1589 0 : vsession->needs_restart = false;
1590 0 : g_spdk_vhost_ops.new_device(vid);
1591 0 : break;
1592 : }
1593 0 : pthread_mutex_unlock(&user_dev->lock);
1594 0 : break;
1595 0 : default:
1596 0 : break;
1597 : }
1598 :
1599 0 : return RTE_VHOST_MSG_RESULT_NOT_HANDLED;
1600 : }
1601 :
1602 : struct rte_vhost_user_extern_ops g_spdk_extern_vhost_ops = {
1603 : .pre_msg_handle = extern_vhost_pre_msg_handler,
1604 : .post_msg_handle = extern_vhost_post_msg_handler,
1605 : };
1606 :
1607 : void
1608 0 : vhost_session_install_rte_compat_hooks(struct spdk_vhost_session *vsession)
1609 : {
1610 : int rc;
1611 :
1612 0 : rc = rte_vhost_extern_callback_register(vsession->vid, &g_spdk_extern_vhost_ops, NULL);
1613 0 : if (rc != 0) {
1614 0 : SPDK_ERRLOG("rte_vhost_extern_callback_register() failed for vid = %d\n",
1615 : vsession->vid);
1616 0 : return;
1617 : }
1618 : }
1619 :
1620 : int
1621 9 : vhost_register_unix_socket(const char *path, const char *ctrl_name,
1622 : uint64_t virtio_features, uint64_t disabled_features, uint64_t protocol_features)
1623 : {
1624 9 : struct stat file_stat;
1625 9 : uint64_t features = 0;
1626 9 : uint64_t flags = 0;
1627 :
1628 : /* Register vhost driver to handle vhost messages. */
1629 9 : if (stat(path, &file_stat) != -1) {
1630 0 : if (!S_ISSOCK(file_stat.st_mode)) {
1631 0 : SPDK_ERRLOG("Cannot create a domain socket at path \"%s\": "
1632 : "The file already exists and is not a socket.\n",
1633 : path);
1634 0 : return -EIO;
1635 0 : } else if (unlink(path) != 0) {
1636 0 : SPDK_ERRLOG("Cannot create a domain socket at path \"%s\": "
1637 : "The socket already exists and failed to unlink.\n",
1638 : path);
1639 0 : return -EIO;
1640 : }
1641 : }
1642 :
1643 9 : flags = spdk_iommu_is_enabled() ? 0 : RTE_VHOST_USER_ASYNC_COPY;
1644 9 : if (rte_vhost_driver_register(path, flags) != 0) {
1645 0 : SPDK_ERRLOG("Could not register controller %s with vhost library\n", ctrl_name);
1646 0 : SPDK_ERRLOG("Check if domain socket %s already exists\n", path);
1647 0 : return -EIO;
1648 : }
1649 18 : if (rte_vhost_driver_set_features(path, virtio_features) ||
1650 9 : rte_vhost_driver_disable_features(path, disabled_features)) {
1651 0 : SPDK_ERRLOG("Couldn't set vhost features for controller %s\n", ctrl_name);
1652 :
1653 0 : rte_vhost_driver_unregister(path);
1654 0 : return -EIO;
1655 : }
1656 :
1657 9 : if (rte_vhost_driver_callback_register(path, &g_spdk_vhost_ops) != 0) {
1658 0 : rte_vhost_driver_unregister(path);
1659 0 : SPDK_ERRLOG("Couldn't register callbacks for controller %s\n", ctrl_name);
1660 0 : return -EIO;
1661 : }
1662 :
1663 9 : rte_vhost_driver_get_protocol_features(path, &features);
1664 9 : features |= protocol_features;
1665 9 : rte_vhost_driver_set_protocol_features(path, features);
1666 :
1667 9 : if (rte_vhost_driver_start(path) != 0) {
1668 0 : SPDK_ERRLOG("Failed to start vhost driver for controller %s (%d): %s\n",
1669 : ctrl_name, errno, spdk_strerror(errno));
1670 0 : rte_vhost_driver_unregister(path);
1671 0 : return -EIO;
1672 : }
1673 :
1674 9 : return 0;
1675 : }
1676 :
1677 : int
1678 0 : vhost_get_mem_table(int vid, struct rte_vhost_memory **mem)
1679 : {
1680 0 : return rte_vhost_get_mem_table(vid, mem);
1681 : }
1682 :
1683 : int
1684 9 : vhost_driver_unregister(const char *path)
1685 : {
1686 9 : return rte_vhost_driver_unregister(path);
1687 : }
1688 :
1689 : int
1690 0 : vhost_get_negotiated_features(int vid, uint64_t *negotiated_features)
1691 : {
1692 0 : return rte_vhost_get_negotiated_features(vid, negotiated_features);
1693 : }
1694 :
1695 : int
1696 9 : vhost_user_dev_set_coalescing(struct spdk_vhost_user_dev *user_dev, uint32_t delay_base_us,
1697 : uint32_t iops_threshold)
1698 : {
1699 9 : uint64_t delay_time_base = delay_base_us * spdk_get_ticks_hz() / 1000000ULL;
1700 9 : uint32_t io_rate = iops_threshold * SPDK_VHOST_STATS_CHECK_INTERVAL_MS / 1000U;
1701 :
1702 9 : if (delay_time_base >= UINT32_MAX) {
1703 0 : SPDK_ERRLOG("Delay time of %"PRIu32" is to big\n", delay_base_us);
1704 0 : return -EINVAL;
1705 9 : } else if (io_rate == 0) {
1706 0 : SPDK_ERRLOG("IOPS rate of %"PRIu32" is too low. Min is %u\n", io_rate,
1707 : 1000U / SPDK_VHOST_STATS_CHECK_INTERVAL_MS);
1708 0 : return -EINVAL;
1709 : }
1710 :
1711 9 : user_dev->coalescing_delay_us = delay_base_us;
1712 9 : user_dev->coalescing_iops_threshold = iops_threshold;
1713 9 : return 0;
1714 : }
1715 :
1716 : int
1717 0 : vhost_user_session_set_coalescing(struct spdk_vhost_dev *vdev,
1718 : struct spdk_vhost_session *vsession, void *ctx)
1719 : {
1720 0 : vsession->coalescing_delay_time_base =
1721 0 : to_user_dev(vdev)->coalescing_delay_us * spdk_get_ticks_hz() / 1000000ULL;
1722 0 : vsession->coalescing_io_rate_threshold =
1723 0 : to_user_dev(vdev)->coalescing_iops_threshold * SPDK_VHOST_STATS_CHECK_INTERVAL_MS / 1000U;
1724 0 : return 0;
1725 : }
1726 :
1727 : int
1728 0 : vhost_user_set_coalescing(struct spdk_vhost_dev *vdev, uint32_t delay_base_us,
1729 : uint32_t iops_threshold)
1730 : {
1731 : int rc;
1732 :
1733 0 : rc = vhost_user_dev_set_coalescing(to_user_dev(vdev), delay_base_us, iops_threshold);
1734 0 : if (rc != 0) {
1735 0 : return rc;
1736 : }
1737 :
1738 0 : vhost_user_dev_foreach_session(vdev, vhost_user_session_set_coalescing, NULL, NULL);
1739 :
1740 0 : return 0;
1741 : }
1742 :
1743 : void
1744 0 : vhost_user_get_coalescing(struct spdk_vhost_dev *vdev, uint32_t *delay_base_us,
1745 : uint32_t *iops_threshold)
1746 : {
1747 0 : struct spdk_vhost_user_dev *user_dev = to_user_dev(vdev);
1748 :
1749 0 : if (delay_base_us) {
1750 0 : *delay_base_us = user_dev->coalescing_delay_us;
1751 : }
1752 :
1753 0 : if (iops_threshold) {
1754 0 : *iops_threshold = user_dev->coalescing_iops_threshold;
1755 : }
1756 0 : }
1757 :
1758 : int
1759 0 : spdk_vhost_set_socket_path(const char *basename)
1760 : {
1761 : int ret;
1762 :
1763 0 : if (basename && strlen(basename) > 0) {
1764 0 : ret = snprintf(g_vhost_user_dev_dirname, sizeof(g_vhost_user_dev_dirname) - 2, "%s", basename);
1765 0 : if (ret <= 0) {
1766 0 : return -EINVAL;
1767 : }
1768 0 : if ((size_t)ret >= sizeof(g_vhost_user_dev_dirname) - 2) {
1769 0 : SPDK_ERRLOG("Char dev dir path length %d is too long\n", ret);
1770 0 : return -EINVAL;
1771 : }
1772 :
1773 0 : if (g_vhost_user_dev_dirname[ret - 1] != '/') {
1774 0 : g_vhost_user_dev_dirname[ret] = '/';
1775 0 : g_vhost_user_dev_dirname[ret + 1] = '\0';
1776 : }
1777 : }
1778 :
1779 0 : return 0;
1780 : }
1781 :
1782 : static void
1783 0 : vhost_dev_thread_exit(void *arg1)
1784 : {
1785 0 : spdk_thread_exit(spdk_get_thread());
1786 0 : }
1787 :
1788 : static bool g_vhost_user_started = false;
1789 :
1790 : int
1791 10 : vhost_user_dev_init(struct spdk_vhost_dev *vdev, const char *name,
1792 : struct spdk_cpuset *cpumask, const struct spdk_vhost_user_dev_backend *user_backend)
1793 : {
1794 10 : char path[PATH_MAX];
1795 : struct spdk_vhost_user_dev *user_dev;
1796 :
1797 10 : if (snprintf(path, sizeof(path), "%s%s", g_vhost_user_dev_dirname, name) >= (int)sizeof(path)) {
1798 1 : SPDK_ERRLOG("Resulting socket path for controller %s is too long: %s%s\n",
1799 : name, g_vhost_user_dev_dirname, name);
1800 1 : return -EINVAL;
1801 : }
1802 :
1803 9 : vdev->path = strdup(path);
1804 9 : if (vdev->path == NULL) {
1805 0 : return -EIO;
1806 : }
1807 :
1808 9 : user_dev = calloc(1, sizeof(*user_dev));
1809 9 : if (user_dev == NULL) {
1810 0 : free(vdev->path);
1811 0 : return -ENOMEM;
1812 : }
1813 9 : vdev->ctxt = user_dev;
1814 :
1815 9 : vdev->thread = spdk_thread_create(vdev->name, cpumask);
1816 9 : if (vdev->thread == NULL) {
1817 0 : free(user_dev);
1818 0 : free(vdev->path);
1819 0 : SPDK_ERRLOG("Failed to create thread for vhost controller %s.\n", name);
1820 0 : return -EIO;
1821 : }
1822 :
1823 9 : user_dev->user_backend = user_backend;
1824 9 : user_dev->vdev = vdev;
1825 9 : user_dev->registered = true;
1826 9 : TAILQ_INIT(&user_dev->vsessions);
1827 9 : pthread_mutex_init(&user_dev->lock, NULL);
1828 :
1829 9 : vhost_user_dev_set_coalescing(user_dev, SPDK_VHOST_COALESCING_DELAY_BASE_US,
1830 : SPDK_VHOST_VQ_IOPS_COALESCING_THRESHOLD);
1831 :
1832 9 : return 0;
1833 : }
1834 :
1835 : int
1836 9 : vhost_user_dev_start(struct spdk_vhost_dev *vdev)
1837 : {
1838 9 : return vhost_register_unix_socket(vdev->path, vdev->name, vdev->virtio_features,
1839 : vdev->disabled_features,
1840 : vdev->protocol_features);
1841 : }
1842 :
1843 : int
1844 10 : vhost_user_dev_create(struct spdk_vhost_dev *vdev, const char *name, struct spdk_cpuset *cpumask,
1845 : const struct spdk_vhost_user_dev_backend *user_backend, bool delay)
1846 : {
1847 : int rc;
1848 : struct spdk_vhost_user_dev *user_dev;
1849 :
1850 10 : rc = vhost_user_dev_init(vdev, name, cpumask, user_backend);
1851 10 : if (rc != 0) {
1852 1 : return rc;
1853 : }
1854 :
1855 9 : if (delay == false) {
1856 9 : rc = vhost_user_dev_start(vdev);
1857 9 : if (rc != 0) {
1858 0 : user_dev = to_user_dev(vdev);
1859 0 : spdk_thread_send_msg(vdev->thread, vhost_dev_thread_exit, NULL);
1860 0 : pthread_mutex_destroy(&user_dev->lock);
1861 0 : free(user_dev);
1862 0 : free(vdev->path);
1863 : }
1864 : }
1865 :
1866 9 : return rc;
1867 : }
1868 :
1869 : int
1870 10 : vhost_user_dev_unregister(struct spdk_vhost_dev *vdev)
1871 : {
1872 10 : struct spdk_vhost_user_dev *user_dev = to_user_dev(vdev);
1873 : struct spdk_vhost_session *vsession, *tmp_vsession;
1874 :
1875 10 : pthread_mutex_lock(&user_dev->lock);
1876 10 : if (user_dev->pending_async_op_num) {
1877 0 : pthread_mutex_unlock(&user_dev->lock);
1878 0 : return -EBUSY;
1879 : }
1880 :
1881 : /* This is the case that uses RPC call `vhost_delete_controller` while VM is connected */
1882 10 : if (!TAILQ_EMPTY(&user_dev->vsessions) && g_vhost_user_started) {
1883 1 : SPDK_ERRLOG("Controller %s has still valid connection.\n", vdev->name);
1884 1 : pthread_mutex_unlock(&user_dev->lock);
1885 1 : return -EBUSY;
1886 : }
1887 :
1888 : /* This is the case that quits the subsystem while VM is connected, the VM
1889 : * should be stopped by the shutdown thread.
1890 : */
1891 9 : if (!g_vhost_user_started) {
1892 0 : TAILQ_FOREACH_SAFE(vsession, &user_dev->vsessions, tailq, tmp_vsession) {
1893 0 : assert(vsession->started == false);
1894 0 : TAILQ_REMOVE(&user_dev->vsessions, vsession, tailq);
1895 0 : if (vsession->mem) {
1896 0 : vhost_session_mem_unregister(vsession->mem);
1897 0 : free(vsession->mem);
1898 : }
1899 0 : free(vsession->name);
1900 0 : free(vsession);
1901 : }
1902 : }
1903 :
1904 9 : user_dev->registered = false;
1905 9 : pthread_mutex_unlock(&user_dev->lock);
1906 :
1907 : /* There are no valid connections now, and it's not an error if the domain
1908 : * socket was already removed by shutdown thread.
1909 : */
1910 9 : vhost_driver_unregister(vdev->path);
1911 :
1912 9 : spdk_thread_send_msg(vdev->thread, vhost_dev_thread_exit, NULL);
1913 9 : pthread_mutex_destroy(&user_dev->lock);
1914 :
1915 9 : free(user_dev);
1916 9 : free(vdev->path);
1917 :
1918 9 : return 0;
1919 : }
1920 :
1921 : int
1922 2 : vhost_user_init(void)
1923 : {
1924 : size_t len;
1925 :
1926 2 : if (g_vhost_user_started) {
1927 1 : return 0;
1928 : }
1929 :
1930 1 : if (g_vhost_user_dev_dirname[0] == '\0') {
1931 1 : if (getcwd(g_vhost_user_dev_dirname, sizeof(g_vhost_user_dev_dirname) - 1) == NULL) {
1932 0 : SPDK_ERRLOG("getcwd failed (%d): %s\n", errno, spdk_strerror(errno));
1933 0 : return -1;
1934 : }
1935 :
1936 1 : len = strlen(g_vhost_user_dev_dirname);
1937 1 : if (g_vhost_user_dev_dirname[len - 1] != '/') {
1938 1 : g_vhost_user_dev_dirname[len] = '/';
1939 1 : g_vhost_user_dev_dirname[len + 1] = '\0';
1940 : }
1941 : }
1942 :
1943 1 : g_vhost_user_started = true;
1944 :
1945 1 : g_vhost_user_init_thread = spdk_get_thread();
1946 1 : assert(g_vhost_user_init_thread != NULL);
1947 :
1948 1 : return 0;
1949 : }
1950 :
1951 : static void
1952 1 : vhost_user_session_shutdown_on_init(void *vhost_cb)
1953 : {
1954 1 : spdk_vhost_fini_cb fn = vhost_cb;
1955 :
1956 1 : fn();
1957 1 : }
1958 :
1959 : static void *
1960 1 : vhost_user_session_shutdown(void *vhost_cb)
1961 : {
1962 1 : struct spdk_vhost_dev *vdev = NULL;
1963 : struct spdk_vhost_session *vsession;
1964 : struct spdk_vhost_user_dev *user_dev;
1965 : int ret;
1966 :
1967 1 : for (vdev = spdk_vhost_dev_next(NULL); vdev != NULL;
1968 0 : vdev = spdk_vhost_dev_next(vdev)) {
1969 0 : user_dev = to_user_dev(vdev);
1970 0 : ret = 0;
1971 0 : pthread_mutex_lock(&user_dev->lock);
1972 0 : TAILQ_FOREACH(vsession, &user_dev->vsessions, tailq) {
1973 0 : if (vsession->started || vsession->starting) {
1974 0 : ret += _stop_session(vsession);
1975 : }
1976 : }
1977 0 : pthread_mutex_unlock(&user_dev->lock);
1978 0 : if (ret == 0) {
1979 0 : vhost_driver_unregister(vdev->path);
1980 : }
1981 : }
1982 :
1983 1 : SPDK_INFOLOG(vhost, "Exiting\n");
1984 1 : spdk_thread_send_msg(g_vhost_user_init_thread, vhost_user_session_shutdown_on_init, vhost_cb);
1985 1 : return NULL;
1986 : }
1987 :
1988 : void
1989 2 : vhost_user_fini(spdk_vhost_fini_cb vhost_cb)
1990 : {
1991 2 : pthread_t tid;
1992 : int rc;
1993 :
1994 2 : if (!g_vhost_user_started) {
1995 1 : vhost_cb();
1996 1 : return;
1997 : }
1998 :
1999 1 : g_vhost_user_started = false;
2000 :
2001 : /* rte_vhost API for removing sockets is not asynchronous. Since it may call SPDK
2002 : * ops for stopping a device or removing a connection, we need to call it from
2003 : * a separate thread to avoid deadlock.
2004 : */
2005 1 : rc = pthread_create(&tid, NULL, &vhost_user_session_shutdown, vhost_cb);
2006 1 : if (rc < 0) {
2007 0 : SPDK_ERRLOG("Failed to start session shutdown thread (%d): %s\n", rc, spdk_strerror(rc));
2008 0 : abort();
2009 : }
2010 1 : pthread_detach(tid);
2011 : }
2012 :
2013 : void
2014 0 : vhost_session_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w)
2015 : {
2016 : struct spdk_vhost_session *vsession;
2017 : struct spdk_vhost_user_dev *user_dev;
2018 :
2019 0 : user_dev = to_user_dev(vdev);
2020 0 : pthread_mutex_lock(&user_dev->lock);
2021 0 : TAILQ_FOREACH(vsession, &user_dev->vsessions, tailq) {
2022 0 : spdk_json_write_object_begin(w);
2023 0 : spdk_json_write_named_uint32(w, "vid", vsession->vid);
2024 0 : spdk_json_write_named_uint32(w, "id", vsession->id);
2025 0 : spdk_json_write_named_string(w, "name", vsession->name);
2026 0 : spdk_json_write_named_bool(w, "started", vsession->started);
2027 0 : spdk_json_write_named_uint32(w, "max_queues", vsession->max_queues);
2028 0 : spdk_json_write_named_uint32(w, "inflight_task_cnt", vsession->task_cnt);
2029 0 : spdk_json_write_object_end(w);
2030 : }
2031 0 : pthread_mutex_unlock(&user_dev->lock);
2032 0 : }
|