Line data Source code
1 : /* SPDX-License-Identifier: BSD-3-Clause
2 : * Copyright (C) 2017 Intel Corporation. All rights reserved.
3 : * All rights reserved.
4 : */
5 :
6 : #include <linux/virtio_blk.h>
7 :
8 : #include "spdk/env.h"
9 : #include "spdk/bdev.h"
10 : #include "spdk/bdev_module.h"
11 : #include "spdk/thread.h"
12 : #include "spdk/likely.h"
13 : #include "spdk/string.h"
14 : #include "spdk/util.h"
15 : #include "spdk/vhost.h"
16 : #include "spdk/json.h"
17 :
18 : #include "vhost_internal.h"
19 : #include <rte_version.h>
20 :
21 : /* Minimal set of features supported by every SPDK VHOST-BLK device */
22 : #define SPDK_VHOST_BLK_FEATURES_BASE (SPDK_VHOST_FEATURES | \
23 : (1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
24 : (1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
25 : (1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER) | \
26 : (1ULL << VIRTIO_BLK_F_SCSI) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
27 : (1ULL << VIRTIO_BLK_F_MQ))
28 :
29 : /* Not supported features */
30 : #define SPDK_VHOST_BLK_DISABLED_FEATURES (SPDK_VHOST_DISABLED_FEATURES | \
31 : (1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
32 : (1ULL << VIRTIO_BLK_F_BARRIER) | (1ULL << VIRTIO_BLK_F_SCSI))
33 :
34 : /* Vhost-blk support protocol features */
35 : #define SPDK_VHOST_BLK_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_CONFIG) | \
36 : (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))
37 :
38 : #define VIRTIO_BLK_DEFAULT_TRANSPORT "vhost_user_blk"
39 :
40 : struct spdk_vhost_user_blk_task {
41 : struct spdk_vhost_blk_task blk_task;
42 : struct spdk_vhost_blk_session *bvsession;
43 : struct spdk_vhost_virtqueue *vq;
44 :
45 : uint16_t req_idx;
46 : uint16_t num_descs;
47 : uint16_t buffer_id;
48 : uint16_t inflight_head;
49 :
50 : /* If set, the task is currently used for I/O processing. */
51 : bool used;
52 : };
53 :
54 : struct spdk_vhost_blk_dev {
55 : struct spdk_vhost_dev vdev;
56 : struct spdk_bdev *bdev;
57 : struct spdk_bdev_desc *bdev_desc;
58 : const struct spdk_virtio_blk_transport_ops *ops;
59 :
60 : bool readonly;
61 : /* Next poll group index to be assigned */
62 : uint32_t next_pg_index;
63 : };
64 :
65 : struct vhost_user_pg_vq_info {
66 : struct vhost_user_poll_group *pg;
67 : struct spdk_vhost_virtqueue *vq;
68 : struct spdk_vhost_session *vsession;
69 :
70 : TAILQ_ENTRY(vhost_user_pg_vq_info) link;
71 : };
72 :
73 : struct vhost_user_poll_group {
74 : struct spdk_vhost_dev *vdev;
75 : struct spdk_vhost_session *vsession;
76 :
77 : struct spdk_thread *thread;
78 : struct spdk_poller *requestq_poller;
79 : struct spdk_io_channel *io_channel;
80 :
81 : int task_cnt;
82 :
83 : TAILQ_HEAD(, vhost_user_pg_vq_info) vqs;
84 :
85 : struct spdk_poller *stop_poller;
86 : uint32_t stop_retry_count;
87 : };
88 :
89 : struct spdk_vhost_blk_session {
90 : /* The parent session must be the very first field in this struct */
91 : struct spdk_vhost_session vsession;
92 : struct spdk_vhost_blk_dev *bvdev;
93 : struct spdk_poller *stop_poller;
94 :
95 : struct spdk_thread *thread;
96 : struct vhost_user_poll_group *poll_groups;
97 : uint32_t num_poll_groups;
98 :
99 : uint32_t num_stopped_poll_groups;
100 : };
101 :
102 : /* forward declaration */
103 : static const struct spdk_vhost_dev_backend vhost_blk_device_backend;
104 :
105 : static void vhost_user_blk_request_finish(uint8_t status, struct spdk_vhost_blk_task *task,
106 : void *cb_arg);
107 :
108 : static void session_stop_poll_groups(struct spdk_vhost_blk_session *bvsession);
109 :
110 : static int
111 0 : vhost_user_process_blk_request(struct spdk_vhost_user_blk_task *user_task)
112 : {
113 0 : struct spdk_vhost_blk_session *bvsession = user_task->bvsession;
114 0 : struct spdk_vhost_dev *vdev = &bvsession->bvdev->vdev;
115 0 : struct vhost_user_poll_group *pg = (struct vhost_user_poll_group *)user_task->vq->poll_group;
116 :
117 0 : return virtio_blk_process_request(vdev, pg->io_channel, &user_task->blk_task,
118 : vhost_user_blk_request_finish, NULL);
119 : }
120 :
121 : static struct spdk_vhost_blk_dev *
122 4 : to_blk_dev(struct spdk_vhost_dev *vdev)
123 : {
124 4 : if (vdev == NULL) {
125 0 : return NULL;
126 : }
127 :
128 4 : if (vdev->backend->type != VHOST_BACKEND_BLK) {
129 0 : SPDK_ERRLOG("%s: not a vhost-blk device\n", vdev->name);
130 0 : return NULL;
131 : }
132 :
133 4 : return SPDK_CONTAINEROF(vdev, struct spdk_vhost_blk_dev, vdev);
134 : }
135 :
136 : struct spdk_bdev *
137 0 : vhost_blk_get_bdev(struct spdk_vhost_dev *vdev)
138 : {
139 0 : struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
140 :
141 0 : assert(bvdev != NULL);
142 :
143 0 : return bvdev->bdev;
144 : }
145 :
146 : static struct spdk_vhost_blk_session *
147 0 : to_blk_session(struct spdk_vhost_session *vsession)
148 : {
149 0 : assert(vsession->vdev->backend->type == VHOST_BACKEND_BLK);
150 0 : return (struct spdk_vhost_blk_session *)vsession;
151 : }
152 :
153 : static inline void
154 0 : blk_task_inc_task_cnt(struct spdk_vhost_user_blk_task *task)
155 : {
156 0 : struct spdk_vhost_virtqueue *vq = task->vq;
157 0 : struct vhost_user_poll_group *pg = (struct vhost_user_poll_group *)vq->poll_group;
158 :
159 0 : pg->task_cnt++;
160 0 : }
161 :
162 : static inline void
163 0 : blk_task_dec_task_cnt(struct spdk_vhost_user_blk_task *task)
164 : {
165 0 : struct spdk_vhost_virtqueue *vq = task->vq;
166 0 : struct vhost_user_poll_group *pg = (struct vhost_user_poll_group *)vq->poll_group;
167 :
168 0 : assert(pg->task_cnt > 0);
169 0 : pg->task_cnt--;
170 0 : }
171 :
172 : static void
173 0 : blk_task_finish(struct spdk_vhost_user_blk_task *task)
174 : {
175 0 : blk_task_dec_task_cnt(task);
176 0 : task->used = false;
177 0 : }
178 :
179 : static void
180 0 : blk_task_init(struct spdk_vhost_user_blk_task *task)
181 : {
182 0 : struct spdk_vhost_blk_task *blk_task = &task->blk_task;
183 :
184 0 : task->used = true;
185 0 : blk_task->iovcnt = SPDK_COUNTOF(blk_task->iovs);
186 0 : blk_task->status = NULL;
187 0 : blk_task->used_len = 0;
188 0 : blk_task->payload_size = 0;
189 0 : }
190 :
191 : static void
192 0 : blk_task_enqueue(struct spdk_vhost_user_blk_task *task)
193 : {
194 0 : if (task->vq->packed.packed_ring) {
195 0 : vhost_vq_packed_ring_enqueue(&task->bvsession->vsession, task->vq,
196 0 : task->num_descs,
197 0 : task->buffer_id, task->blk_task.used_len,
198 0 : task->inflight_head);
199 : } else {
200 0 : vhost_vq_used_ring_enqueue(&task->bvsession->vsession, task->vq,
201 0 : task->req_idx, task->blk_task.used_len);
202 : }
203 0 : }
204 :
205 : static void
206 0 : vhost_user_blk_request_finish(uint8_t status, struct spdk_vhost_blk_task *task, void *cb_arg)
207 : {
208 : struct spdk_vhost_user_blk_task *user_task;
209 :
210 0 : user_task = SPDK_CONTAINEROF(task, struct spdk_vhost_user_blk_task, blk_task);
211 :
212 0 : blk_task_enqueue(user_task);
213 :
214 0 : SPDK_DEBUGLOG(vhost_blk, "Finished task (%p) req_idx=%d\n status: %" PRIu8"\n",
215 : user_task, user_task->req_idx, status);
216 0 : blk_task_finish(user_task);
217 0 : }
218 :
219 : static void
220 0 : blk_request_finish(uint8_t status, struct spdk_vhost_blk_task *task)
221 : {
222 :
223 0 : if (task->status) {
224 0 : *task->status = status;
225 : }
226 :
227 0 : task->cb(status, task, task->cb_arg);
228 0 : }
229 :
230 : /*
231 : * Process task's descriptor chain and setup data related fields.
232 : * Return
233 : * total size of supplied buffers
234 : *
235 : * FIXME: Make this function return to rd_cnt and wr_cnt
236 : */
237 : static int
238 0 : blk_iovs_split_queue_setup(struct spdk_vhost_blk_session *bvsession,
239 : struct spdk_vhost_virtqueue *vq,
240 : uint16_t req_idx, struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length)
241 : {
242 0 : struct spdk_vhost_session *vsession = &bvsession->vsession;
243 0 : struct spdk_vhost_dev *vdev = vsession->vdev;
244 0 : struct vring_desc *desc, *desc_table;
245 0 : uint16_t out_cnt = 0, cnt = 0;
246 0 : uint32_t desc_table_size, len = 0;
247 : uint32_t desc_handled_cnt;
248 : int rc;
249 :
250 0 : rc = vhost_vq_get_desc(vsession, vq, req_idx, &desc, &desc_table, &desc_table_size);
251 0 : if (rc != 0) {
252 0 : SPDK_ERRLOG("%s: invalid descriptor at index %"PRIu16".\n", vdev->name, req_idx);
253 0 : return -1;
254 : }
255 :
256 0 : desc_handled_cnt = 0;
257 : while (1) {
258 : /*
259 : * Maximum cnt reached?
260 : * Should not happen if request is well formatted, otherwise this is a BUG.
261 : */
262 0 : if (spdk_unlikely(cnt == *iovs_cnt)) {
263 0 : SPDK_DEBUGLOG(vhost_blk, "%s: max IOVs in request reached (req_idx = %"PRIu16").\n",
264 : vsession->name, req_idx);
265 0 : return -1;
266 : }
267 :
268 0 : if (spdk_unlikely(vhost_vring_desc_to_iov(vsession, iovs, &cnt, desc))) {
269 0 : SPDK_DEBUGLOG(vhost_blk, "%s: invalid descriptor %" PRIu16" (req_idx = %"PRIu16").\n",
270 : vsession->name, req_idx, cnt);
271 0 : return -1;
272 : }
273 :
274 0 : len += desc->len;
275 :
276 0 : out_cnt += vhost_vring_desc_is_wr(desc);
277 :
278 0 : rc = vhost_vring_desc_get_next(&desc, desc_table, desc_table_size);
279 0 : if (rc != 0) {
280 0 : SPDK_ERRLOG("%s: descriptor chain at index %"PRIu16" terminated unexpectedly.\n",
281 : vsession->name, req_idx);
282 0 : return -1;
283 0 : } else if (desc == NULL) {
284 0 : break;
285 : }
286 :
287 0 : desc_handled_cnt++;
288 0 : if (spdk_unlikely(desc_handled_cnt > desc_table_size)) {
289 : /* Break a cycle and report an error, if any. */
290 0 : SPDK_ERRLOG("%s: found a cycle in the descriptor chain: desc_table_size = %d, desc_handled_cnt = %d.\n",
291 : vsession->name, desc_table_size, desc_handled_cnt);
292 0 : return -1;
293 : }
294 : }
295 :
296 : /*
297 : * There must be least two descriptors.
298 : * First contain request so it must be readable.
299 : * Last descriptor contain buffer for response so it must be writable.
300 : */
301 0 : if (spdk_unlikely(out_cnt == 0 || cnt < 2)) {
302 0 : return -1;
303 : }
304 :
305 0 : *length = len;
306 0 : *iovs_cnt = cnt;
307 0 : return 0;
308 : }
309 :
310 : static int
311 0 : blk_iovs_packed_desc_setup(struct spdk_vhost_session *vsession,
312 : struct spdk_vhost_virtqueue *vq, uint16_t req_idx,
313 : struct vring_packed_desc *desc_table, uint16_t desc_table_size,
314 : struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length)
315 : {
316 0 : struct vring_packed_desc *desc;
317 0 : uint16_t cnt = 0, out_cnt = 0;
318 0 : uint32_t len = 0;
319 :
320 0 : if (desc_table == NULL) {
321 0 : desc = &vq->vring.desc_packed[req_idx];
322 : } else {
323 0 : req_idx = 0;
324 0 : desc = desc_table;
325 : }
326 :
327 : while (1) {
328 : /*
329 : * Maximum cnt reached?
330 : * Should not happen if request is well formatted, otherwise this is a BUG.
331 : */
332 0 : if (spdk_unlikely(cnt == *iovs_cnt)) {
333 0 : SPDK_ERRLOG("%s: max IOVs in request reached (req_idx = %"PRIu16").\n",
334 : vsession->name, req_idx);
335 0 : return -EINVAL;
336 : }
337 :
338 0 : if (spdk_unlikely(vhost_vring_packed_desc_to_iov(vsession, iovs, &cnt, desc))) {
339 0 : SPDK_ERRLOG("%s: invalid descriptor %" PRIu16" (req_idx = %"PRIu16").\n",
340 : vsession->name, req_idx, cnt);
341 0 : return -EINVAL;
342 : }
343 :
344 0 : len += desc->len;
345 0 : out_cnt += vhost_vring_packed_desc_is_wr(desc);
346 :
347 : /* desc is NULL means we reach the last desc of this request */
348 0 : vhost_vring_packed_desc_get_next(&desc, &req_idx, vq, desc_table, desc_table_size);
349 0 : if (desc == NULL) {
350 0 : break;
351 : }
352 : }
353 :
354 : /*
355 : * There must be least two descriptors.
356 : * First contain request so it must be readable.
357 : * Last descriptor contain buffer for response so it must be writable.
358 : */
359 0 : if (spdk_unlikely(out_cnt == 0 || cnt < 2)) {
360 0 : return -EINVAL;
361 : }
362 :
363 0 : *length = len;
364 0 : *iovs_cnt = cnt;
365 :
366 0 : return 0;
367 : }
368 :
369 : static int
370 0 : blk_iovs_packed_queue_setup(struct spdk_vhost_blk_session *bvsession,
371 : struct spdk_vhost_virtqueue *vq, uint16_t req_idx,
372 : struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length)
373 : {
374 0 : struct spdk_vhost_session *vsession = &bvsession->vsession;
375 0 : struct spdk_vhost_dev *vdev = vsession->vdev;
376 0 : struct vring_packed_desc *desc = NULL, *desc_table;
377 0 : uint32_t desc_table_size;
378 : int rc;
379 :
380 0 : rc = vhost_vq_get_desc_packed(vsession, vq, req_idx, &desc,
381 : &desc_table, &desc_table_size);
382 0 : if (spdk_unlikely(rc != 0)) {
383 0 : SPDK_ERRLOG("%s: Invalid descriptor at index %"PRIu16".\n", vdev->name, req_idx);
384 0 : return rc;
385 : }
386 :
387 0 : return blk_iovs_packed_desc_setup(vsession, vq, req_idx, desc_table, desc_table_size,
388 : iovs, iovs_cnt, length);
389 : }
390 :
391 : static int
392 0 : blk_iovs_inflight_queue_setup(struct spdk_vhost_blk_session *bvsession,
393 : struct spdk_vhost_virtqueue *vq, uint16_t req_idx,
394 : struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length)
395 : {
396 0 : struct spdk_vhost_session *vsession = &bvsession->vsession;
397 0 : struct spdk_vhost_dev *vdev = vsession->vdev;
398 0 : spdk_vhost_inflight_desc *inflight_desc;
399 0 : struct vring_packed_desc *desc_table;
400 0 : uint16_t out_cnt = 0, cnt = 0;
401 0 : uint32_t desc_table_size, len = 0;
402 0 : int rc = 0;
403 :
404 0 : rc = vhost_inflight_queue_get_desc(vsession, vq->vring_inflight.inflight_packed->desc,
405 : req_idx, &inflight_desc, &desc_table, &desc_table_size);
406 0 : if (spdk_unlikely(rc != 0)) {
407 0 : SPDK_ERRLOG("%s: Invalid descriptor at index %"PRIu16".\n", vdev->name, req_idx);
408 0 : return rc;
409 : }
410 :
411 0 : if (desc_table != NULL) {
412 0 : return blk_iovs_packed_desc_setup(vsession, vq, req_idx, desc_table, desc_table_size,
413 : iovs, iovs_cnt, length);
414 : }
415 :
416 : while (1) {
417 : /*
418 : * Maximum cnt reached?
419 : * Should not happen if request is well formatted, otherwise this is a BUG.
420 : */
421 0 : if (spdk_unlikely(cnt == *iovs_cnt)) {
422 0 : SPDK_ERRLOG("%s: max IOVs in request reached (req_idx = %"PRIu16").\n",
423 : vsession->name, req_idx);
424 0 : return -EINVAL;
425 : }
426 :
427 0 : if (spdk_unlikely(vhost_vring_inflight_desc_to_iov(vsession, iovs, &cnt, inflight_desc))) {
428 0 : SPDK_ERRLOG("%s: invalid descriptor %" PRIu16" (req_idx = %"PRIu16").\n",
429 : vsession->name, req_idx, cnt);
430 0 : return -EINVAL;
431 : }
432 :
433 0 : len += inflight_desc->len;
434 0 : out_cnt += vhost_vring_inflight_desc_is_wr(inflight_desc);
435 :
436 : /* Without F_NEXT means it's the last desc */
437 0 : if ((inflight_desc->flags & VRING_DESC_F_NEXT) == 0) {
438 0 : break;
439 : }
440 :
441 0 : inflight_desc = &vq->vring_inflight.inflight_packed->desc[inflight_desc->next];
442 : }
443 :
444 : /*
445 : * There must be least two descriptors.
446 : * First contain request so it must be readable.
447 : * Last descriptor contain buffer for response so it must be writable.
448 : */
449 0 : if (spdk_unlikely(out_cnt == 0 || cnt < 2)) {
450 0 : return -EINVAL;
451 : }
452 :
453 0 : *length = len;
454 0 : *iovs_cnt = cnt;
455 :
456 0 : return 0;
457 : }
458 :
459 : static void
460 0 : blk_request_complete_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
461 : {
462 0 : struct spdk_vhost_blk_task *task = cb_arg;
463 :
464 0 : spdk_bdev_free_io(bdev_io);
465 0 : blk_request_finish(success ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR, task);
466 0 : }
467 :
468 : static void
469 0 : blk_request_resubmit(void *arg)
470 : {
471 0 : struct spdk_vhost_blk_task *task = arg;
472 0 : int rc = 0;
473 :
474 0 : rc = virtio_blk_process_request(task->bdev_io_wait_vdev, task->bdev_io_wait_ch, task,
475 : task->cb, task->cb_arg);
476 0 : if (rc == 0) {
477 0 : SPDK_DEBUGLOG(vhost_blk, "====== Task %p resubmitted ======\n", task);
478 : } else {
479 0 : SPDK_DEBUGLOG(vhost_blk, "====== Task %p failed ======\n", task);
480 : }
481 0 : }
482 :
483 : static inline void
484 0 : blk_request_queue_io(struct spdk_vhost_dev *vdev, struct spdk_io_channel *ch,
485 : struct spdk_vhost_blk_task *task)
486 : {
487 : int rc;
488 0 : struct spdk_bdev *bdev = vhost_blk_get_bdev(vdev);
489 :
490 0 : task->bdev_io_wait.bdev = bdev;
491 0 : task->bdev_io_wait.cb_fn = blk_request_resubmit;
492 0 : task->bdev_io_wait.cb_arg = task;
493 0 : task->bdev_io_wait_ch = ch;
494 0 : task->bdev_io_wait_vdev = vdev;
495 :
496 0 : rc = spdk_bdev_queue_io_wait(bdev, ch, &task->bdev_io_wait);
497 0 : if (rc != 0) {
498 0 : blk_request_finish(VIRTIO_BLK_S_IOERR, task);
499 : }
500 0 : }
501 :
502 : int
503 0 : virtio_blk_process_request(struct spdk_vhost_dev *vdev, struct spdk_io_channel *ch,
504 : struct spdk_vhost_blk_task *task, virtio_blk_request_cb cb, void *cb_arg)
505 : {
506 0 : struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
507 0 : struct virtio_blk_outhdr req;
508 : struct virtio_blk_discard_write_zeroes *desc;
509 : struct iovec *iov;
510 : uint32_t type;
511 : uint64_t flush_bytes;
512 : uint32_t payload_len;
513 : uint16_t iovcnt;
514 : int rc;
515 :
516 0 : assert(bvdev != NULL);
517 :
518 0 : task->cb = cb;
519 0 : task->cb_arg = cb_arg;
520 :
521 0 : iov = &task->iovs[0];
522 0 : if (spdk_unlikely(iov->iov_len != sizeof(req))) {
523 0 : SPDK_DEBUGLOG(vhost_blk,
524 : "First descriptor size is %zu but expected %zu (task = %p).\n",
525 : iov->iov_len, sizeof(req), task);
526 0 : blk_request_finish(VIRTIO_BLK_S_UNSUPP, task);
527 0 : return -1;
528 : }
529 :
530 : /* Some SeaBIOS versions don't align the virtio_blk_outhdr on an 8-byte boundary, which
531 : * triggers ubsan errors. So copy this small 16-byte structure to the stack to workaround
532 : * this problem.
533 : */
534 0 : memcpy(&req, iov->iov_base, sizeof(req));
535 :
536 0 : iov = &task->iovs[task->iovcnt - 1];
537 0 : if (spdk_unlikely(iov->iov_len != 1)) {
538 0 : SPDK_DEBUGLOG(vhost_blk,
539 : "Last descriptor size is %zu but expected %d (task = %p).\n",
540 : iov->iov_len, 1, task);
541 0 : blk_request_finish(VIRTIO_BLK_S_UNSUPP, task);
542 0 : return -1;
543 : }
544 :
545 0 : payload_len = task->payload_size;
546 0 : task->status = iov->iov_base;
547 0 : payload_len -= sizeof(req) + sizeof(*task->status);
548 0 : iovcnt = task->iovcnt - 2;
549 :
550 0 : type = req.type;
551 : #ifdef VIRTIO_BLK_T_BARRIER
552 : /* Don't care about barrier for now (as QEMU's virtio-blk do). */
553 0 : type &= ~VIRTIO_BLK_T_BARRIER;
554 : #endif
555 :
556 0 : switch (type) {
557 0 : case VIRTIO_BLK_T_IN:
558 : case VIRTIO_BLK_T_OUT:
559 0 : if (spdk_unlikely(payload_len == 0 || (payload_len & (512 - 1)) != 0)) {
560 0 : SPDK_ERRLOG("%s - passed IO buffer is not multiple of 512b (task = %p).\n",
561 : type ? "WRITE" : "READ", task);
562 0 : blk_request_finish(VIRTIO_BLK_S_UNSUPP, task);
563 0 : return -1;
564 : }
565 :
566 0 : if (type == VIRTIO_BLK_T_IN) {
567 0 : task->used_len = payload_len + sizeof(*task->status);
568 0 : rc = spdk_bdev_readv(bvdev->bdev_desc, ch,
569 0 : &task->iovs[1], iovcnt, req.sector * 512,
570 : payload_len, blk_request_complete_cb, task);
571 0 : } else if (!bvdev->readonly) {
572 0 : task->used_len = sizeof(*task->status);
573 0 : rc = spdk_bdev_writev(bvdev->bdev_desc, ch,
574 0 : &task->iovs[1], iovcnt, req.sector * 512,
575 : payload_len, blk_request_complete_cb, task);
576 : } else {
577 0 : SPDK_DEBUGLOG(vhost_blk, "Device is in read-only mode!\n");
578 0 : rc = -1;
579 : }
580 :
581 0 : if (rc) {
582 0 : if (rc == -ENOMEM) {
583 0 : SPDK_DEBUGLOG(vhost_blk, "No memory, start to queue io.\n");
584 0 : blk_request_queue_io(vdev, ch, task);
585 : } else {
586 0 : blk_request_finish(VIRTIO_BLK_S_IOERR, task);
587 0 : return -1;
588 : }
589 : }
590 0 : break;
591 0 : case VIRTIO_BLK_T_DISCARD:
592 0 : desc = task->iovs[1].iov_base;
593 0 : if (payload_len != sizeof(*desc)) {
594 0 : SPDK_NOTICELOG("Invalid discard payload size: %u\n", payload_len);
595 0 : blk_request_finish(VIRTIO_BLK_S_IOERR, task);
596 0 : return -1;
597 : }
598 :
599 0 : if (desc->flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
600 0 : SPDK_ERRLOG("UNMAP flag is only used for WRITE ZEROES command\n");
601 0 : blk_request_finish(VIRTIO_BLK_S_UNSUPP, task);
602 0 : return -1;
603 : }
604 :
605 0 : rc = spdk_bdev_unmap(bvdev->bdev_desc, ch,
606 0 : desc->sector * 512, desc->num_sectors * 512,
607 : blk_request_complete_cb, task);
608 0 : if (rc) {
609 0 : if (rc == -ENOMEM) {
610 0 : SPDK_DEBUGLOG(vhost_blk, "No memory, start to queue io.\n");
611 0 : blk_request_queue_io(vdev, ch, task);
612 : } else {
613 0 : blk_request_finish(VIRTIO_BLK_S_IOERR, task);
614 0 : return -1;
615 : }
616 : }
617 0 : break;
618 0 : case VIRTIO_BLK_T_WRITE_ZEROES:
619 0 : desc = task->iovs[1].iov_base;
620 0 : if (payload_len != sizeof(*desc)) {
621 0 : SPDK_NOTICELOG("Invalid write zeroes payload size: %u\n", payload_len);
622 0 : blk_request_finish(VIRTIO_BLK_S_IOERR, task);
623 0 : return -1;
624 : }
625 :
626 : /* Unmap this range, SPDK doesn't support it, kernel will enable this flag by default
627 : * without checking unmap feature is negotiated or not, the flag isn't mandatory, so
628 : * just print a warning.
629 : */
630 0 : if (desc->flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
631 0 : SPDK_WARNLOG("Ignore the unmap flag for WRITE ZEROES from %"PRIx64", len %"PRIx64"\n",
632 : (uint64_t)desc->sector * 512, (uint64_t)desc->num_sectors * 512);
633 : }
634 :
635 0 : rc = spdk_bdev_write_zeroes(bvdev->bdev_desc, ch,
636 0 : desc->sector * 512, desc->num_sectors * 512,
637 : blk_request_complete_cb, task);
638 0 : if (rc) {
639 0 : if (rc == -ENOMEM) {
640 0 : SPDK_DEBUGLOG(vhost_blk, "No memory, start to queue io.\n");
641 0 : blk_request_queue_io(vdev, ch, task);
642 : } else {
643 0 : blk_request_finish(VIRTIO_BLK_S_IOERR, task);
644 0 : return -1;
645 : }
646 : }
647 0 : break;
648 0 : case VIRTIO_BLK_T_FLUSH:
649 0 : flush_bytes = spdk_bdev_get_num_blocks(bvdev->bdev) * spdk_bdev_get_block_size(bvdev->bdev);
650 0 : if (req.sector != 0) {
651 0 : SPDK_NOTICELOG("sector must be zero for flush command\n");
652 0 : blk_request_finish(VIRTIO_BLK_S_IOERR, task);
653 0 : return -1;
654 : }
655 0 : rc = spdk_bdev_flush(bvdev->bdev_desc, ch,
656 : 0, flush_bytes,
657 : blk_request_complete_cb, task);
658 0 : if (rc) {
659 0 : if (rc == -ENOMEM) {
660 0 : SPDK_DEBUGLOG(vhost_blk, "No memory, start to queue io.\n");
661 0 : blk_request_queue_io(vdev, ch, task);
662 : } else {
663 0 : blk_request_finish(VIRTIO_BLK_S_IOERR, task);
664 0 : return -1;
665 : }
666 : }
667 0 : break;
668 0 : case VIRTIO_BLK_T_GET_ID:
669 0 : if (!iovcnt || !payload_len) {
670 0 : blk_request_finish(VIRTIO_BLK_S_UNSUPP, task);
671 0 : return -1;
672 : }
673 0 : task->used_len = spdk_min((size_t)VIRTIO_BLK_ID_BYTES, task->iovs[1].iov_len);
674 0 : spdk_strcpy_pad(task->iovs[1].iov_base, spdk_bdev_get_name(bvdev->bdev),
675 0 : task->used_len, ' ');
676 0 : blk_request_finish(VIRTIO_BLK_S_OK, task);
677 0 : break;
678 0 : default:
679 0 : SPDK_DEBUGLOG(vhost_blk, "Not supported request type '%"PRIu32"'.\n", type);
680 0 : blk_request_finish(VIRTIO_BLK_S_UNSUPP, task);
681 0 : return -1;
682 : }
683 :
684 0 : return 0;
685 : }
686 :
687 : static void
688 0 : process_blk_task(struct spdk_vhost_virtqueue *vq, uint16_t req_idx)
689 : {
690 : struct spdk_vhost_user_blk_task *task;
691 : struct spdk_vhost_blk_task *blk_task;
692 : int rc;
693 :
694 0 : assert(vq->packed.packed_ring == false);
695 :
696 0 : task = &((struct spdk_vhost_user_blk_task *)vq->tasks)[req_idx];
697 0 : blk_task = &task->blk_task;
698 0 : if (spdk_unlikely(task->used)) {
699 0 : SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n",
700 : task->bvsession->vsession.name, req_idx);
701 0 : blk_task->used_len = 0;
702 0 : blk_task_enqueue(task);
703 0 : return;
704 : }
705 :
706 0 : blk_task_inc_task_cnt(task);
707 :
708 0 : blk_task_init(task);
709 :
710 0 : rc = blk_iovs_split_queue_setup(task->bvsession, vq, task->req_idx,
711 0 : blk_task->iovs, &blk_task->iovcnt, &blk_task->payload_size);
712 :
713 0 : if (rc) {
714 0 : SPDK_DEBUGLOG(vhost_blk, "Invalid request (req_idx = %"PRIu16").\n", task->req_idx);
715 : /* Only READ and WRITE are supported for now. */
716 0 : vhost_user_blk_request_finish(VIRTIO_BLK_S_UNSUPP, blk_task, NULL);
717 0 : return;
718 : }
719 :
720 0 : if (vhost_user_process_blk_request(task) == 0) {
721 0 : SPDK_DEBUGLOG(vhost_blk, "====== Task %p req_idx %d submitted ======\n", task,
722 : req_idx);
723 : } else {
724 0 : SPDK_ERRLOG("====== Task %p req_idx %d failed ======\n", task, req_idx);
725 : }
726 : }
727 :
728 : static void
729 0 : process_packed_blk_task(struct spdk_vhost_virtqueue *vq, uint16_t req_idx)
730 : {
731 : struct spdk_vhost_user_blk_task *task;
732 : struct spdk_vhost_blk_task *blk_task;
733 0 : uint16_t task_idx = req_idx, num_descs;
734 : int rc;
735 :
736 0 : assert(vq->packed.packed_ring);
737 :
738 : /* Packed ring used the buffer_id as the task_idx to get task struct.
739 : * In kernel driver, it uses the vq->free_head to set the buffer_id so the value
740 : * must be in the range of 0 ~ vring.size. The free_head value must be unique
741 : * in the outstanding requests.
742 : * We can't use the req_idx as the task_idx because the desc can be reused in
743 : * the next phase even when it's not completed in the previous phase. For example,
744 : * At phase 0, last_used_idx was 2 and desc0 was not completed.Then after moving
745 : * phase 1, last_avail_idx is updated to 1. In this case, req_idx can not be used
746 : * as task_idx because we will know task[0]->used is true at phase 1.
747 : * The split queue is quite different, the desc would insert into the free list when
748 : * device completes the request, the driver gets the desc from the free list which
749 : * ensures the req_idx is unique in the outstanding requests.
750 : */
751 0 : task_idx = vhost_vring_packed_desc_get_buffer_id(vq, req_idx, &num_descs);
752 :
753 0 : task = &((struct spdk_vhost_user_blk_task *)vq->tasks)[task_idx];
754 0 : blk_task = &task->blk_task;
755 0 : if (spdk_unlikely(task->used)) {
756 0 : SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n",
757 : task->bvsession->vsession.name, task_idx);
758 0 : blk_task->used_len = 0;
759 0 : blk_task_enqueue(task);
760 0 : return;
761 : }
762 :
763 0 : task->req_idx = req_idx;
764 0 : task->num_descs = num_descs;
765 0 : task->buffer_id = task_idx;
766 :
767 0 : rte_vhost_set_inflight_desc_packed(task->bvsession->vsession.vid, vq->vring_idx,
768 0 : req_idx, (req_idx + num_descs - 1) % vq->vring.size,
769 : &task->inflight_head);
770 :
771 0 : blk_task_inc_task_cnt(task);
772 :
773 0 : blk_task_init(task);
774 :
775 0 : rc = blk_iovs_packed_queue_setup(task->bvsession, vq, task->req_idx, blk_task->iovs,
776 : &blk_task->iovcnt,
777 : &blk_task->payload_size);
778 0 : if (rc) {
779 0 : SPDK_DEBUGLOG(vhost_blk, "Invalid request (req_idx = %"PRIu16").\n", task->req_idx);
780 : /* Only READ and WRITE are supported for now. */
781 0 : vhost_user_blk_request_finish(VIRTIO_BLK_S_UNSUPP, blk_task, NULL);
782 0 : return;
783 : }
784 :
785 0 : if (vhost_user_process_blk_request(task) == 0) {
786 0 : SPDK_DEBUGLOG(vhost_blk, "====== Task %p req_idx %d submitted ======\n", task,
787 : task_idx);
788 : } else {
789 0 : SPDK_ERRLOG("====== Task %p req_idx %d failed ======\n", task, task_idx);
790 : }
791 : }
792 :
793 : static void
794 0 : process_packed_inflight_blk_task(struct spdk_vhost_virtqueue *vq,
795 : uint16_t req_idx)
796 : {
797 0 : spdk_vhost_inflight_desc *desc_array = vq->vring_inflight.inflight_packed->desc;
798 0 : spdk_vhost_inflight_desc *desc = &desc_array[req_idx];
799 : struct spdk_vhost_user_blk_task *task;
800 : struct spdk_vhost_blk_task *blk_task;
801 : uint16_t task_idx, num_descs;
802 : int rc;
803 :
804 0 : task_idx = desc_array[desc->last].id;
805 0 : num_descs = desc->num;
806 : /* In packed ring reconnection, we use the last_used_idx as the
807 : * initial value. So when we process the inflight descs we still
808 : * need to update the available ring index.
809 : */
810 0 : vq->last_avail_idx += num_descs;
811 0 : if (vq->last_avail_idx >= vq->vring.size) {
812 0 : vq->last_avail_idx -= vq->vring.size;
813 0 : vq->packed.avail_phase = !vq->packed.avail_phase;
814 : }
815 :
816 0 : task = &((struct spdk_vhost_user_blk_task *)vq->tasks)[task_idx];
817 0 : blk_task = &task->blk_task;
818 0 : if (spdk_unlikely(task->used)) {
819 0 : SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n",
820 : task->bvsession->vsession.name, task_idx);
821 0 : blk_task->used_len = 0;
822 0 : blk_task_enqueue(task);
823 0 : return;
824 : }
825 :
826 0 : task->req_idx = req_idx;
827 0 : task->num_descs = num_descs;
828 0 : task->buffer_id = task_idx;
829 : /* It's for cleaning inflight entries */
830 0 : task->inflight_head = req_idx;
831 :
832 0 : blk_task_inc_task_cnt(task);
833 :
834 0 : blk_task_init(task);
835 :
836 0 : rc = blk_iovs_inflight_queue_setup(task->bvsession, vq, task->req_idx, blk_task->iovs,
837 : &blk_task->iovcnt,
838 : &blk_task->payload_size);
839 0 : if (rc) {
840 0 : SPDK_DEBUGLOG(vhost_blk, "Invalid request (req_idx = %"PRIu16").\n", task->req_idx);
841 : /* Only READ and WRITE are supported for now. */
842 0 : vhost_user_blk_request_finish(VIRTIO_BLK_S_UNSUPP, blk_task, NULL);
843 0 : return;
844 : }
845 :
846 0 : if (vhost_user_process_blk_request(task) == 0) {
847 0 : SPDK_DEBUGLOG(vhost_blk, "====== Task %p req_idx %d submitted ======\n", task,
848 : task_idx);
849 : } else {
850 0 : SPDK_ERRLOG("====== Task %p req_idx %d failed ======\n", task, task_idx);
851 : }
852 : }
853 :
854 : static int
855 0 : submit_inflight_desc(struct spdk_vhost_blk_session *bvsession,
856 : struct spdk_vhost_virtqueue *vq)
857 : {
858 : struct spdk_vhost_session *vsession;
859 : spdk_vhost_resubmit_info *resubmit;
860 : spdk_vhost_resubmit_desc *resubmit_list;
861 : uint16_t req_idx;
862 : int i, resubmit_cnt;
863 :
864 0 : resubmit = vq->vring_inflight.resubmit_inflight;
865 0 : if (spdk_likely(resubmit == NULL || resubmit->resubmit_list == NULL ||
866 : resubmit->resubmit_num == 0)) {
867 0 : return 0;
868 : }
869 :
870 0 : resubmit_list = resubmit->resubmit_list;
871 0 : vsession = &bvsession->vsession;
872 :
873 0 : for (i = resubmit->resubmit_num - 1; i >= 0; --i) {
874 0 : req_idx = resubmit_list[i].index;
875 0 : SPDK_DEBUGLOG(vhost_blk, "====== Start processing resubmit request idx %"PRIu16"======\n",
876 : req_idx);
877 :
878 0 : if (spdk_unlikely(req_idx >= vq->vring.size)) {
879 0 : SPDK_ERRLOG("%s: request idx '%"PRIu16"' exceeds virtqueue size (%"PRIu16").\n",
880 : vsession->name, req_idx, vq->vring.size);
881 0 : vhost_vq_used_ring_enqueue(vsession, vq, req_idx, 0);
882 0 : continue;
883 : }
884 :
885 0 : if (vq->packed.packed_ring) {
886 0 : process_packed_inflight_blk_task(vq, req_idx);
887 : } else {
888 0 : process_blk_task(vq, req_idx);
889 : }
890 : }
891 0 : resubmit_cnt = resubmit->resubmit_num;
892 0 : resubmit->resubmit_num = 0;
893 0 : return resubmit_cnt;
894 : }
895 :
896 : static int
897 0 : process_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq)
898 : {
899 0 : struct spdk_vhost_session *vsession = &bvsession->vsession;
900 0 : uint16_t reqs[SPDK_VHOST_VQ_MAX_SUBMISSIONS];
901 : uint16_t reqs_cnt, i;
902 0 : int resubmit_cnt = 0;
903 :
904 0 : resubmit_cnt = submit_inflight_desc(bvsession, vq);
905 :
906 0 : reqs_cnt = vhost_vq_avail_ring_get(vq, reqs, SPDK_COUNTOF(reqs));
907 0 : if (!reqs_cnt) {
908 0 : return resubmit_cnt;
909 : }
910 :
911 0 : for (i = 0; i < reqs_cnt; i++) {
912 0 : SPDK_DEBUGLOG(vhost_blk, "====== Starting processing request idx %"PRIu16"======\n",
913 : reqs[i]);
914 :
915 0 : if (spdk_unlikely(reqs[i] >= vq->vring.size)) {
916 0 : SPDK_ERRLOG("%s: request idx '%"PRIu16"' exceeds virtqueue size (%"PRIu16").\n",
917 : vsession->name, reqs[i], vq->vring.size);
918 0 : vhost_vq_used_ring_enqueue(vsession, vq, reqs[i], 0);
919 0 : continue;
920 : }
921 :
922 0 : rte_vhost_set_inflight_desc_split(vsession->vid, vq->vring_idx, reqs[i]);
923 :
924 0 : process_blk_task(vq, reqs[i]);
925 : }
926 :
927 0 : return reqs_cnt;
928 : }
929 :
930 : static int
931 0 : process_packed_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq)
932 : {
933 0 : uint16_t i = 0;
934 0 : uint16_t count = 0;
935 0 : int resubmit_cnt = 0;
936 :
937 0 : resubmit_cnt = submit_inflight_desc(bvsession, vq);
938 :
939 0 : while (i++ < SPDK_VHOST_VQ_MAX_SUBMISSIONS &&
940 0 : vhost_vq_packed_ring_is_avail(vq)) {
941 0 : SPDK_DEBUGLOG(vhost_blk, "====== Starting processing request idx %"PRIu16"======\n",
942 : vq->last_avail_idx);
943 0 : count++;
944 0 : process_packed_blk_task(vq, vq->last_avail_idx);
945 : }
946 :
947 0 : return count > 0 ? count : resubmit_cnt;
948 : }
949 :
950 : static int
951 0 : _vdev_vq_worker(struct spdk_vhost_virtqueue *vq)
952 : {
953 0 : struct spdk_vhost_session *vsession = vq->vsession;
954 0 : struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession);
955 : bool packed_ring;
956 0 : int rc = 0;
957 :
958 0 : packed_ring = vq->packed.packed_ring;
959 0 : if (packed_ring) {
960 0 : rc = process_packed_vq(bvsession, vq);
961 : } else {
962 0 : rc = process_vq(bvsession, vq);
963 : }
964 :
965 0 : vhost_session_vq_used_signal(vq);
966 :
967 0 : return rc;
968 :
969 : }
970 :
971 : static int
972 0 : vdev_vq_worker(void *arg)
973 : {
974 0 : struct spdk_vhost_virtqueue *vq = arg;
975 :
976 0 : return _vdev_vq_worker(vq);
977 : }
978 :
979 : static int
980 0 : vdev_worker(void *arg)
981 : {
982 0 : struct vhost_user_poll_group *pg = arg;
983 : struct vhost_user_pg_vq_info *vq_info;
984 : struct spdk_vhost_virtqueue *vq;
985 0 : int rc = 0;
986 :
987 0 : TAILQ_FOREACH(vq_info, &pg->vqs, link) {
988 0 : vq = vq_info->vq;
989 0 : assert(vq->poll_group == pg);
990 0 : rc = _vdev_vq_worker(vq);
991 : }
992 :
993 0 : return rc > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
994 : }
995 :
996 : static void
997 0 : no_bdev_process_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq)
998 : {
999 0 : struct spdk_vhost_session *vsession = &bvsession->vsession;
1000 0 : struct iovec iovs[SPDK_VHOST_IOVS_MAX];
1001 0 : uint32_t length;
1002 0 : uint16_t iovcnt, req_idx;
1003 :
1004 0 : if (vhost_vq_avail_ring_get(vq, &req_idx, 1) != 1) {
1005 0 : return;
1006 : }
1007 :
1008 0 : iovcnt = SPDK_COUNTOF(iovs);
1009 0 : if (blk_iovs_split_queue_setup(bvsession, vq, req_idx, iovs, &iovcnt, &length) == 0) {
1010 0 : *(volatile uint8_t *)iovs[iovcnt - 1].iov_base = VIRTIO_BLK_S_IOERR;
1011 0 : SPDK_DEBUGLOG(vhost_blk_data, "Aborting request %" PRIu16"\n", req_idx);
1012 : }
1013 :
1014 0 : vhost_vq_used_ring_enqueue(vsession, vq, req_idx, 0);
1015 : }
1016 :
1017 : static void
1018 0 : no_bdev_process_packed_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq)
1019 : {
1020 0 : struct spdk_vhost_session *vsession = &bvsession->vsession;
1021 : struct spdk_vhost_user_blk_task *task;
1022 : struct spdk_vhost_blk_task *blk_task;
1023 0 : uint32_t length;
1024 0 : uint16_t req_idx = vq->last_avail_idx;
1025 0 : uint16_t task_idx, num_descs;
1026 :
1027 0 : if (!vhost_vq_packed_ring_is_avail(vq)) {
1028 0 : return;
1029 : }
1030 :
1031 0 : task_idx = vhost_vring_packed_desc_get_buffer_id(vq, req_idx, &num_descs);
1032 0 : task = &((struct spdk_vhost_user_blk_task *)vq->tasks)[task_idx];
1033 0 : blk_task = &task->blk_task;
1034 0 : if (spdk_unlikely(task->used)) {
1035 0 : SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n",
1036 : vsession->name, req_idx);
1037 0 : vhost_vq_packed_ring_enqueue(vsession, vq, num_descs,
1038 0 : task->buffer_id, blk_task->used_len,
1039 0 : task->inflight_head);
1040 0 : return;
1041 : }
1042 :
1043 0 : task->req_idx = req_idx;
1044 0 : task->num_descs = num_descs;
1045 0 : task->buffer_id = task_idx;
1046 0 : blk_task_init(task);
1047 :
1048 0 : if (blk_iovs_packed_queue_setup(bvsession, vq, task->req_idx, blk_task->iovs, &blk_task->iovcnt,
1049 : &length)) {
1050 0 : *(volatile uint8_t *)(blk_task->iovs[blk_task->iovcnt - 1].iov_base) = VIRTIO_BLK_S_IOERR;
1051 0 : SPDK_DEBUGLOG(vhost_blk_data, "Aborting request %" PRIu16"\n", req_idx);
1052 : }
1053 :
1054 0 : task->used = false;
1055 0 : vhost_vq_packed_ring_enqueue(vsession, vq, num_descs,
1056 0 : task->buffer_id, blk_task->used_len,
1057 0 : task->inflight_head);
1058 : }
1059 :
1060 : static int
1061 0 : _no_bdev_vdev_vq_worker(struct spdk_vhost_virtqueue *vq)
1062 : {
1063 0 : struct spdk_vhost_session *vsession = vq->vsession;
1064 0 : struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession);
1065 0 : struct vhost_user_poll_group *pg = (struct vhost_user_poll_group *)vq->poll_group;
1066 :
1067 : bool packed_ring;
1068 :
1069 0 : packed_ring = vq->packed.packed_ring;
1070 0 : if (packed_ring) {
1071 0 : no_bdev_process_packed_vq(bvsession, vq);
1072 : } else {
1073 0 : no_bdev_process_vq(bvsession, vq);
1074 : }
1075 :
1076 0 : vhost_session_vq_used_signal(vq);
1077 :
1078 0 : if (pg->task_cnt == 0 && pg->io_channel) {
1079 0 : vhost_blk_put_io_channel(pg->io_channel);
1080 0 : pg->io_channel = NULL;
1081 : }
1082 :
1083 0 : return SPDK_POLLER_BUSY;
1084 : }
1085 :
1086 : static int
1087 0 : no_bdev_vdev_vq_worker(void *arg)
1088 : {
1089 0 : struct spdk_vhost_virtqueue *vq = arg;
1090 :
1091 0 : return _no_bdev_vdev_vq_worker(vq);
1092 : }
1093 :
1094 : static int
1095 0 : no_bdev_vdev_worker(void *arg)
1096 : {
1097 0 : struct vhost_user_poll_group *pg = arg;
1098 : struct vhost_user_pg_vq_info *vq_info;
1099 0 : int rc = 0;
1100 :
1101 0 : TAILQ_FOREACH(vq_info, &pg->vqs, link) {
1102 0 : rc = _no_bdev_vdev_vq_worker(vq_info->vq);
1103 : }
1104 :
1105 0 : return rc > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
1106 : }
1107 :
1108 : static void
1109 0 : vhost_blk_pg_unregister_interrupts(struct vhost_user_poll_group *pg)
1110 : {
1111 : struct vhost_user_pg_vq_info *vq_info;
1112 : struct spdk_vhost_virtqueue *vq;
1113 :
1114 0 : TAILQ_FOREACH(vq_info, &pg->vqs, link) {
1115 0 : vq = vq_info->vq;
1116 0 : if (vq->intr == NULL) {
1117 0 : break;
1118 : }
1119 :
1120 0 : SPDK_DEBUGLOG(vhost_blk, "unregister vq[%d]'s kickfd is %d\n",
1121 : vq->vring_idx, vq->vring.kickfd);
1122 0 : spdk_interrupt_unregister(&vq->intr);
1123 : }
1124 0 : }
1125 :
1126 : static void
1127 0 : vhost_blk_vq_register_interrupt(struct spdk_vhost_virtqueue *vq)
1128 : {
1129 0 : struct spdk_vhost_session *vsession = vq->vsession;
1130 0 : struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vsession->vdev);
1131 :
1132 0 : assert(bvdev != NULL);
1133 :
1134 0 : if (bvdev->bdev) {
1135 0 : vq->intr = spdk_interrupt_register(vq->vring.kickfd, vdev_vq_worker, vq, "vdev_vq_worker");
1136 : } else {
1137 0 : vq->intr = spdk_interrupt_register(vq->vring.kickfd, no_bdev_vdev_vq_worker, vq,
1138 : "no_bdev_vdev_vq_worker");
1139 : }
1140 :
1141 0 : if (vq->intr == NULL) {
1142 0 : SPDK_ERRLOG("Fail to register req notifier handler.\n");
1143 0 : assert(false);
1144 : }
1145 0 : }
1146 :
1147 : static void
1148 0 : add_vq_to_poll_group(void *arg)
1149 : {
1150 0 : struct vhost_user_pg_vq_info *vq_info = arg;
1151 0 : struct vhost_user_poll_group *pg = vq_info->pg;
1152 :
1153 0 : SPDK_DEBUGLOG(vhost_blk, "%s: vring %u is added to pg %p, thread %s, lcore %u\n",
1154 : pg->vsession->name,
1155 : vq_info->vq->vring_idx, pg, spdk_thread_get_name(spdk_get_thread()), spdk_env_get_current_core());
1156 :
1157 0 : TAILQ_INSERT_TAIL(&pg->vqs, vq_info, link);
1158 :
1159 0 : if (spdk_interrupt_mode_is_enabled()) {
1160 0 : vhost_blk_vq_register_interrupt(vq_info->vq);
1161 : }
1162 0 : }
1163 :
1164 : static struct vhost_user_poll_group *
1165 0 : get_optimal_poll_group(struct spdk_vhost_blk_session *bvsession)
1166 : {
1167 : struct vhost_user_poll_group *pg;
1168 : struct spdk_vhost_blk_dev *bvdev;
1169 :
1170 0 : if (bvsession->bvdev == NULL) {
1171 0 : return NULL;
1172 : }
1173 :
1174 : /* round robin */
1175 0 : bvdev = bvsession->bvdev;
1176 0 : if (bvdev->next_pg_index >= bvsession->num_poll_groups) {
1177 0 : bvdev->next_pg_index = 0;
1178 : }
1179 :
1180 0 : pg = &bvsession->poll_groups[bvdev->next_pg_index];
1181 0 : bvdev->next_pg_index++;
1182 :
1183 0 : return pg;
1184 : }
1185 :
1186 : static int
1187 0 : vhost_blk_vq_enable(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *vq)
1188 : {
1189 0 : struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession);
1190 : struct spdk_vhost_dev *vdev;
1191 : struct spdk_vhost_user_dev *user_dev;
1192 : struct vhost_user_pg_vq_info *vq_info;
1193 :
1194 0 : vdev = vsession->vdev;
1195 0 : user_dev = to_user_dev(vdev);
1196 :
1197 0 : SPDK_DEBUGLOG(vhost_blk, "%s: enable vq %u\n", vsession->name, vq->vring_idx);
1198 :
1199 0 : pthread_mutex_lock(&user_dev->lock);
1200 0 : if (vsession->started || vsession->starting) {
1201 0 : pthread_mutex_unlock(&user_dev->lock);
1202 0 : vq_info = calloc(1, sizeof(*vq_info));
1203 0 : if (!vq_info) {
1204 0 : SPDK_ERRLOG("Failed to allocate vq_info\n");
1205 0 : return -ENOMEM;
1206 : }
1207 0 : vq_info->vq = vq;
1208 0 : vq_info->pg = get_optimal_poll_group(bvsession);
1209 0 : if (vq_info->pg == NULL) {
1210 0 : free(vq_info);
1211 0 : return -EFAULT;
1212 : }
1213 0 : vq->poll_group = (void *)vq_info->pg;
1214 0 : spdk_thread_send_msg(vq_info->pg->thread, add_vq_to_poll_group, vq_info);
1215 0 : return 0;
1216 : }
1217 0 : pthread_mutex_unlock(&user_dev->lock);
1218 :
1219 0 : return 0;
1220 : }
1221 :
1222 : static int
1223 0 : vhost_blk_pg_register_no_bdev_interrupts(struct vhost_user_poll_group *pg)
1224 : {
1225 : struct vhost_user_pg_vq_info *vq_info;
1226 : struct spdk_vhost_virtqueue *vq;
1227 :
1228 0 : TAILQ_FOREACH(vq_info, &pg->vqs, link) {
1229 0 : vq = vq_info->vq;
1230 0 : SPDK_DEBUGLOG(vhost_blk, "Register vq[%d]'s kickfd is %d\n",
1231 : vq->vring_idx, vq->vring.kickfd);
1232 0 : vq->intr = spdk_interrupt_register(vq->vring.kickfd, no_bdev_vdev_vq_worker, vq,
1233 : "no_bdev_vdev_vq_worker");
1234 0 : if (vq->intr == NULL) {
1235 0 : goto err;
1236 : }
1237 :
1238 : }
1239 :
1240 0 : return 0;
1241 :
1242 0 : err:
1243 0 : vhost_blk_pg_unregister_interrupts(pg);
1244 0 : return -1;
1245 : }
1246 :
1247 : static void
1248 0 : vhost_blk_poller_set_interrupt_mode(struct spdk_poller *poller, void *cb_arg, bool interrupt_mode)
1249 : {
1250 0 : struct spdk_vhost_blk_session *bvsession = cb_arg;
1251 :
1252 0 : vhost_user_session_set_interrupt_mode(&bvsession->vsession, interrupt_mode);
1253 0 : }
1254 :
1255 : static void
1256 0 : bdev_event_cpl_cb(struct spdk_vhost_dev *vdev, void *ctx)
1257 : {
1258 0 : enum spdk_bdev_event_type type = (enum spdk_bdev_event_type)(uintptr_t)ctx;
1259 : struct spdk_vhost_blk_dev *bvdev;
1260 :
1261 0 : if (type == SPDK_BDEV_EVENT_REMOVE) {
1262 : /* All sessions have been notified, time to close the bdev */
1263 0 : bvdev = to_blk_dev(vdev);
1264 0 : assert(bvdev != NULL);
1265 0 : spdk_bdev_close(bvdev->bdev_desc);
1266 0 : bvdev->bdev_desc = NULL;
1267 0 : bvdev->bdev = NULL;
1268 : }
1269 0 : }
1270 :
1271 : static int
1272 0 : vhost_session_bdev_resize_cb(struct spdk_vhost_dev *vdev,
1273 : struct spdk_vhost_session *vsession,
1274 : void *ctx)
1275 : {
1276 0 : SPDK_NOTICELOG("bdev send slave msg to vid(%d)\n", vsession->vid);
1277 : #if RTE_VERSION >= RTE_VERSION_NUM(23, 03, 0, 0)
1278 0 : rte_vhost_backend_config_change(vsession->vid, false);
1279 : #else
1280 : rte_vhost_slave_config_change(vsession->vid, false);
1281 : #endif
1282 :
1283 0 : return 0;
1284 : }
1285 :
1286 : static void
1287 0 : vhost_user_blk_resize_cb(struct spdk_vhost_dev *vdev, bdev_event_cb_complete cb, void *cb_arg)
1288 : {
1289 0 : vhost_user_dev_foreach_session(vdev, vhost_session_bdev_resize_cb,
1290 : cb, cb_arg);
1291 0 : }
1292 :
1293 : static void
1294 0 : _vhost_user_session_bdev_remove_cb(void *arg)
1295 : {
1296 0 : struct vhost_user_poll_group *pg = arg;
1297 0 : struct spdk_vhost_session *vsession = pg->vsession;
1298 0 : struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession);
1299 : int rc;
1300 :
1301 0 : if (pg->requestq_poller == NULL) {
1302 0 : return;
1303 : }
1304 :
1305 0 : spdk_poller_unregister(&pg->requestq_poller);
1306 0 : if (spdk_interrupt_mode_is_enabled()) {
1307 0 : vhost_blk_pg_unregister_interrupts(pg);
1308 0 : rc = vhost_blk_pg_register_no_bdev_interrupts(pg);
1309 0 : if (rc) {
1310 0 : SPDK_ERRLOG("Interrupt register failed\n");
1311 0 : return;
1312 : }
1313 : }
1314 :
1315 0 : pg->requestq_poller = SPDK_POLLER_REGISTER(no_bdev_vdev_worker, pg, 0);
1316 0 : spdk_poller_register_interrupt(pg->requestq_poller, vhost_blk_poller_set_interrupt_mode, bvsession);
1317 : }
1318 :
1319 : static int
1320 0 : vhost_user_session_bdev_remove_cb(struct spdk_vhost_dev *vdev,
1321 : struct spdk_vhost_session *vsession,
1322 : void *ctx)
1323 : {
1324 0 : struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession);
1325 : struct vhost_user_poll_group *pg;
1326 : uint32_t i;
1327 :
1328 0 : for (i = 0; i < bvsession->num_poll_groups; i++) {
1329 0 : pg = &bvsession->poll_groups[i];
1330 0 : spdk_thread_send_msg(pg->thread, _vhost_user_session_bdev_remove_cb, pg);
1331 : }
1332 :
1333 0 : return 0;
1334 : }
1335 :
1336 : static void
1337 0 : vhost_user_bdev_remove_cb(struct spdk_vhost_dev *vdev, bdev_event_cb_complete cb, void *cb_arg)
1338 : {
1339 0 : SPDK_WARNLOG("%s: hot-removing bdev - all further requests will fail.\n",
1340 : vdev->name);
1341 :
1342 0 : vhost_user_dev_foreach_session(vdev, vhost_user_session_bdev_remove_cb,
1343 : cb, cb_arg);
1344 0 : }
1345 :
1346 : static void
1347 0 : vhost_user_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_vhost_dev *vdev,
1348 : bdev_event_cb_complete cb, void *cb_arg)
1349 : {
1350 0 : switch (type) {
1351 0 : case SPDK_BDEV_EVENT_REMOVE:
1352 0 : vhost_user_bdev_remove_cb(vdev, cb, cb_arg);
1353 0 : break;
1354 0 : case SPDK_BDEV_EVENT_RESIZE:
1355 0 : vhost_user_blk_resize_cb(vdev, cb, cb_arg);
1356 0 : break;
1357 0 : default:
1358 0 : assert(false);
1359 : return;
1360 : }
1361 : }
1362 :
1363 : static void
1364 0 : bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev,
1365 : void *event_ctx)
1366 : {
1367 0 : struct spdk_vhost_dev *vdev = (struct spdk_vhost_dev *)event_ctx;
1368 0 : struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
1369 :
1370 0 : assert(bvdev != NULL);
1371 :
1372 0 : SPDK_DEBUGLOG(vhost_blk, "Bdev event: type %d, name %s\n",
1373 : type,
1374 : bdev->name);
1375 :
1376 0 : switch (type) {
1377 0 : case SPDK_BDEV_EVENT_REMOVE:
1378 : case SPDK_BDEV_EVENT_RESIZE:
1379 0 : bvdev->ops->bdev_event(type, vdev, bdev_event_cpl_cb, (void *)type);
1380 0 : break;
1381 0 : default:
1382 0 : SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type);
1383 0 : break;
1384 : }
1385 0 : }
1386 :
1387 : static void
1388 0 : free_task_pool(struct spdk_vhost_blk_session *bvsession)
1389 : {
1390 0 : struct spdk_vhost_session *vsession = &bvsession->vsession;
1391 : struct spdk_vhost_virtqueue *vq;
1392 : uint16_t i;
1393 :
1394 0 : for (i = 0; i < vsession->max_queues; i++) {
1395 0 : vq = &vsession->virtqueue[i];
1396 0 : if (vq->tasks == NULL) {
1397 0 : continue;
1398 : }
1399 :
1400 0 : spdk_free(vq->tasks);
1401 0 : vq->tasks = NULL;
1402 : }
1403 0 : }
1404 :
1405 : static int
1406 0 : alloc_vq_task_pool(struct spdk_vhost_session *vsession, uint16_t qid)
1407 : {
1408 0 : struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession);
1409 : struct spdk_vhost_virtqueue *vq;
1410 : struct spdk_vhost_user_blk_task *task;
1411 : uint32_t task_cnt;
1412 : uint32_t j;
1413 :
1414 0 : if (qid >= SPDK_VHOST_MAX_VQUEUES) {
1415 0 : return -EINVAL;
1416 : }
1417 :
1418 0 : vq = &vsession->virtqueue[qid];
1419 0 : if (vq->vring.desc == NULL) {
1420 0 : return 0;
1421 : }
1422 :
1423 0 : task_cnt = vq->vring.size;
1424 0 : if (task_cnt > SPDK_VHOST_MAX_VQ_SIZE) {
1425 : /* sanity check */
1426 0 : SPDK_ERRLOG("%s: virtqueue %"PRIu16" is too big. (size = %"PRIu32", max = %"PRIu32")\n",
1427 : vsession->name, qid, task_cnt, SPDK_VHOST_MAX_VQ_SIZE);
1428 0 : return -1;
1429 : }
1430 0 : vq->tasks = spdk_zmalloc(sizeof(struct spdk_vhost_user_blk_task) * task_cnt,
1431 : SPDK_CACHE_LINE_SIZE, NULL,
1432 : SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
1433 0 : if (vq->tasks == NULL) {
1434 0 : SPDK_ERRLOG("%s: failed to allocate %"PRIu32" tasks for virtqueue %"PRIu16"\n",
1435 : vsession->name, task_cnt, qid);
1436 0 : return -1;
1437 : }
1438 :
1439 0 : for (j = 0; j < task_cnt; j++) {
1440 0 : task = &((struct spdk_vhost_user_blk_task *)vq->tasks)[j];
1441 0 : task->bvsession = bvsession;
1442 0 : task->req_idx = j;
1443 0 : task->vq = vq;
1444 : }
1445 :
1446 0 : return 0;
1447 : }
1448 :
1449 : static void
1450 0 : session_start_poll_group(void *args)
1451 : {
1452 : struct vhost_user_pg_vq_info *vq_info;
1453 0 : struct vhost_user_poll_group *pg = args;
1454 0 : struct spdk_vhost_blk_dev *bvdev = to_blk_dev(pg->vdev);
1455 0 : struct spdk_vhost_blk_session *bvsession = to_blk_session(pg->vsession);
1456 :
1457 0 : assert(bvdev != NULL);
1458 :
1459 0 : if (bvdev->bdev) {
1460 0 : pg->io_channel = vhost_blk_get_io_channel(pg->vdev);
1461 0 : SPDK_DEBUGLOG(vhost_blk, "%s: pg %p, pg io channel %p, thread %s, lcore %u\n",
1462 : bvsession->vsession.name, pg,
1463 : pg->io_channel, spdk_thread_get_name(spdk_get_thread()), spdk_env_get_current_core());
1464 0 : if (!pg->io_channel) {
1465 0 : SPDK_ERRLOG("%s: I/O channel allocation failed\n", bvsession->vsession.name);
1466 0 : return;
1467 : }
1468 : }
1469 :
1470 0 : if (spdk_interrupt_mode_is_enabled()) {
1471 0 : TAILQ_FOREACH(vq_info, &pg->vqs, link) {
1472 0 : vhost_blk_vq_register_interrupt(vq_info->vq);
1473 : }
1474 : }
1475 :
1476 0 : if (bvdev->bdev) {
1477 0 : pg->requestq_poller = SPDK_POLLER_REGISTER(vdev_worker, pg, 0);
1478 : } else {
1479 0 : pg->requestq_poller = SPDK_POLLER_REGISTER(no_bdev_vdev_worker, pg, 0);
1480 : }
1481 0 : SPDK_INFOLOG(vhost, "%s: poller started on lcore %d\n",
1482 : bvsession->vsession.name, spdk_env_get_current_core());
1483 :
1484 0 : spdk_poller_register_interrupt(pg->requestq_poller, vhost_blk_poller_set_interrupt_mode, bvsession);
1485 : }
1486 :
1487 : static int
1488 0 : session_start_poll_groups(struct spdk_vhost_dev *vdev, struct spdk_vhost_session *vsession)
1489 : {
1490 0 : struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession);
1491 : struct vhost_user_poll_group *pg;
1492 : struct vhost_user_pg_vq_info *vq_info;
1493 : struct spdk_cpuset *cpumask;
1494 0 : char thread_name[128];
1495 0 : uint32_t i, index = 0;
1496 0 : int rc = 0;
1497 :
1498 0 : bvsession->thread = vdev->thread;
1499 0 : cpumask = spdk_thread_get_cpumask(vdev->thread);
1500 : /* If no cpumask is input by user, we still start one thread for the device */
1501 0 : if (vdev->use_default_cpumask) {
1502 0 : bvsession->num_poll_groups = 1;
1503 : } else {
1504 0 : bvsession->num_poll_groups = spdk_cpuset_count(cpumask);
1505 : }
1506 0 : bvsession->poll_groups = calloc(bvsession->num_poll_groups, sizeof(struct vhost_user_poll_group));
1507 0 : if (!bvsession->poll_groups) {
1508 0 : SPDK_ERRLOG("Failed to allocate poll groups\n");
1509 0 : return -ENOMEM;
1510 : }
1511 :
1512 0 : for (i = 0; i < bvsession->num_poll_groups; i++) {
1513 0 : pg = &bvsession->poll_groups[i];
1514 0 : TAILQ_INIT(&pg->vqs);
1515 : }
1516 :
1517 0 : for (i = 0; i < vsession->max_queues; i++) {
1518 0 : vq_info = calloc(1, sizeof(*vq_info));
1519 0 : if (!vq_info) {
1520 0 : SPDK_ERRLOG("Failed to allocate vq_info\n");
1521 0 : rc = -ENOMEM;
1522 0 : goto err;
1523 : }
1524 0 : vq_info->vq = &vsession->virtqueue[i];
1525 0 : vq_info->vsession = vsession;
1526 :
1527 0 : pg = get_optimal_poll_group(bvsession);
1528 0 : if (pg == NULL) {
1529 0 : free(vq_info);
1530 0 : rc = -EFAULT;
1531 0 : goto err;
1532 : }
1533 0 : vq_info->pg = pg;
1534 0 : vq_info->vq->poll_group = pg;
1535 :
1536 0 : SPDK_DEBUGLOG(vhost_blk, "%s: vring %u is added to pg %p\n", vsession->name, i, pg);
1537 0 : TAILQ_INSERT_TAIL(&pg->vqs, vq_info, link);
1538 : }
1539 :
1540 0 : SPDK_ENV_FOREACH_CORE(i) {
1541 0 : if (!spdk_cpuset_get_cpu(cpumask, i)) {
1542 0 : continue;
1543 : }
1544 :
1545 0 : snprintf(thread_name, sizeof(thread_name), "%s.%u_%u", vdev->name, vsession->vid, i);
1546 0 : pg = &bvsession->poll_groups[index];
1547 0 : pg->vdev = vdev;
1548 0 : pg->vsession = vsession;
1549 0 : pg->thread = spdk_thread_create(thread_name, cpumask);
1550 0 : if (!pg->thread) {
1551 0 : SPDK_ERRLOG("Failed to create %s session %d poll groups\n", vdev->name, vsession->vid);
1552 0 : rc = -EFAULT;
1553 0 : goto err;
1554 : }
1555 0 : spdk_thread_send_msg(pg->thread, session_start_poll_group, pg);
1556 0 : index++;
1557 0 : if (index == bvsession->num_poll_groups) {
1558 0 : break;
1559 : }
1560 : }
1561 :
1562 0 : return 0;
1563 :
1564 0 : err:
1565 0 : session_stop_poll_groups(bvsession);
1566 0 : return rc;
1567 : }
1568 :
1569 : static int
1570 0 : vhost_blk_start(struct spdk_vhost_dev *vdev,
1571 : struct spdk_vhost_session *vsession, void *unused)
1572 : {
1573 0 : struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession);
1574 : struct spdk_vhost_blk_dev *bvdev;
1575 : int i;
1576 :
1577 : /* return if start is already in progress */
1578 0 : if (vsession->started || vsession->starting) {
1579 0 : SPDK_INFOLOG(vhost, "%s: is starting or started\n", vsession->name);
1580 0 : return -EINPROGRESS;
1581 : }
1582 :
1583 : /* validate all I/O queues are in a contiguous index range */
1584 0 : for (i = 0; i < vsession->max_queues; i++) {
1585 : /* vring.desc and vring.desc_packed are in a union struct
1586 : * so q->vring.desc can replace q->vring.desc_packed.
1587 : */
1588 0 : if (vsession->virtqueue[i].vring.desc == NULL) {
1589 0 : SPDK_ERRLOG("%s: queue %"PRIu32" is empty\n", vsession->name, i);
1590 0 : return -1;
1591 : }
1592 : }
1593 :
1594 0 : bvdev = to_blk_dev(vdev);
1595 0 : assert(bvdev != NULL);
1596 0 : bvsession->bvdev = bvdev;
1597 :
1598 0 : return session_start_poll_groups(vdev, vsession);
1599 : }
1600 :
1601 : static void
1602 0 : session_stop_poll_group_done(void *arg)
1603 : {
1604 0 : struct spdk_vhost_blk_session *bvession = arg;
1605 :
1606 0 : bvession->num_stopped_poll_groups++;
1607 0 : }
1608 :
1609 : static int
1610 0 : pg_stop_poller_cb(void *args)
1611 : {
1612 0 : struct vhost_user_poll_group *pg = args;
1613 : struct spdk_vhost_blk_session *bvsession;
1614 : struct vhost_user_pg_vq_info *vq_info, *tmp;
1615 :
1616 0 : if (!pg->task_cnt) {
1617 0 : TAILQ_FOREACH_SAFE(vq_info, &pg->vqs, link, tmp) {
1618 0 : TAILQ_REMOVE(&pg->vqs, vq_info, link);
1619 0 : vq_info->vq->next_event_time = 0;
1620 0 : vhost_vq_used_signal(pg->vsession, vq_info->vq);
1621 0 : free(vq_info);
1622 : }
1623 0 : goto done;
1624 : }
1625 :
1626 0 : pg->stop_retry_count--;
1627 0 : if (pg->stop_retry_count) {
1628 0 : return SPDK_POLLER_IDLE;
1629 : }
1630 :
1631 0 : done:
1632 0 : SPDK_INFOLOG(vhost, "%s: stopping poller on lcore %d\n",
1633 : pg->vsession->name, spdk_env_get_current_core());
1634 :
1635 0 : spdk_poller_unregister(&pg->stop_poller);
1636 0 : if (pg->io_channel) {
1637 0 : vhost_blk_put_io_channel(pg->io_channel);
1638 0 : pg->io_channel = NULL;
1639 : }
1640 :
1641 0 : bvsession = to_blk_session(pg->vsession);
1642 0 : spdk_thread_exit(pg->thread);
1643 0 : spdk_thread_send_msg(bvsession->thread, session_stop_poll_group_done, bvsession);
1644 :
1645 0 : return SPDK_POLLER_BUSY;
1646 : }
1647 :
1648 : static void
1649 0 : session_stop_poll_group(void *args)
1650 : {
1651 0 : struct vhost_user_poll_group *pg = args;
1652 :
1653 0 : spdk_poller_unregister(&pg->requestq_poller);
1654 0 : vhost_blk_pg_unregister_interrupts(pg);
1655 :
1656 : /* Timeout value should be less than SPDK_VHOST_SESSION_STOP_RETRY_TIMEOUT_IN_SEC */
1657 0 : pg->stop_retry_count = (SPDK_VHOST_SESSION_STOP_TIMEOUT_IN_SEC * 1000 *
1658 : 1000) / SPDK_VHOST_SESSION_STOP_RETRY_PERIOD_IN_US;
1659 0 : pg->stop_poller = SPDK_POLLER_REGISTER(pg_stop_poller_cb, pg,
1660 : SPDK_VHOST_SESSION_STOP_RETRY_PERIOD_IN_US);
1661 0 : }
1662 :
1663 : static void
1664 0 : session_stop_poll_groups(struct spdk_vhost_blk_session *bvsession)
1665 : {
1666 : uint32_t i;
1667 : struct vhost_user_poll_group *pg;
1668 :
1669 0 : bvsession->num_stopped_poll_groups = 0;
1670 0 : for (i = 0; i < bvsession->num_poll_groups; i++) {
1671 0 : pg = &bvsession->poll_groups[i];
1672 0 : if (pg->thread) {
1673 0 : spdk_thread_send_msg(pg->thread, session_stop_poll_group, pg);
1674 : }
1675 : }
1676 0 : }
1677 :
1678 : static int
1679 0 : destroy_session_poller_cb(void *arg)
1680 : {
1681 0 : struct spdk_vhost_blk_session *bvsession = arg;
1682 0 : struct spdk_vhost_session *vsession = &bvsession->vsession;
1683 0 : struct spdk_vhost_user_dev *user_dev = to_user_dev(vsession->vdev);
1684 :
1685 0 : if ((bvsession->num_stopped_poll_groups != bvsession->num_poll_groups) ||
1686 0 : (pthread_mutex_trylock(&user_dev->lock) != 0)) {
1687 0 : assert(vsession->stop_retry_count > 0);
1688 0 : vsession->stop_retry_count--;
1689 0 : if (vsession->stop_retry_count == 0) {
1690 0 : SPDK_ERRLOG("%s: Timedout when destroy session (number of stopped pg %d)\n", vsession->name,
1691 : bvsession->num_stopped_poll_groups);
1692 0 : spdk_poller_unregister(&bvsession->stop_poller);
1693 0 : vhost_user_session_stop_done(vsession, -ETIMEDOUT);
1694 : }
1695 :
1696 0 : return SPDK_POLLER_BUSY;
1697 : }
1698 :
1699 0 : SPDK_DEBUGLOG(vhost_blk, "%s: session stoppped\n", vsession->name);
1700 0 : free(bvsession->poll_groups);
1701 0 : free_task_pool(bvsession);
1702 0 : spdk_poller_unregister(&bvsession->stop_poller);
1703 0 : vhost_user_session_stop_done(vsession, 0);
1704 :
1705 0 : pthread_mutex_unlock(&user_dev->lock);
1706 0 : return SPDK_POLLER_BUSY;
1707 : }
1708 :
1709 : static int
1710 0 : vhost_blk_stop(struct spdk_vhost_dev *vdev,
1711 : struct spdk_vhost_session *vsession, void *unused)
1712 : {
1713 0 : struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession);
1714 :
1715 : /* return if stop is already in progress */
1716 0 : if (bvsession->stop_poller) {
1717 0 : return -EINPROGRESS;
1718 : }
1719 :
1720 0 : session_stop_poll_groups(bvsession);
1721 :
1722 0 : bvsession->vsession.stop_retry_count = (SPDK_VHOST_SESSION_STOP_RETRY_TIMEOUT_IN_SEC * 1000 *
1723 : 1000) / SPDK_VHOST_SESSION_STOP_RETRY_PERIOD_IN_US;
1724 0 : bvsession->stop_poller = SPDK_POLLER_REGISTER(destroy_session_poller_cb,
1725 : bvsession, SPDK_VHOST_SESSION_STOP_RETRY_PERIOD_IN_US);
1726 0 : return 0;
1727 : }
1728 :
1729 : static void
1730 0 : vhost_blk_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w)
1731 : {
1732 : struct spdk_vhost_blk_dev *bvdev;
1733 :
1734 0 : bvdev = to_blk_dev(vdev);
1735 0 : assert(bvdev != NULL);
1736 :
1737 0 : spdk_json_write_named_object_begin(w, "block");
1738 :
1739 0 : spdk_json_write_named_bool(w, "readonly", bvdev->readonly);
1740 :
1741 0 : spdk_json_write_name(w, "bdev");
1742 0 : if (bvdev->bdev) {
1743 0 : spdk_json_write_string(w, spdk_bdev_get_name(bvdev->bdev));
1744 : } else {
1745 0 : spdk_json_write_null(w);
1746 : }
1747 0 : spdk_json_write_named_string(w, "transport", bvdev->ops->name);
1748 :
1749 0 : spdk_json_write_object_end(w);
1750 0 : }
1751 :
1752 : static void
1753 0 : vhost_blk_write_config_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w)
1754 : {
1755 : struct spdk_vhost_blk_dev *bvdev;
1756 :
1757 0 : bvdev = to_blk_dev(vdev);
1758 0 : assert(bvdev != NULL);
1759 :
1760 0 : if (!bvdev->bdev) {
1761 0 : return;
1762 : }
1763 :
1764 0 : spdk_json_write_object_begin(w);
1765 0 : spdk_json_write_named_string(w, "method", "vhost_create_blk_controller");
1766 :
1767 0 : spdk_json_write_named_object_begin(w, "params");
1768 0 : spdk_json_write_named_string(w, "ctrlr", vdev->name);
1769 0 : spdk_json_write_named_string(w, "dev_name", spdk_bdev_get_name(bvdev->bdev));
1770 0 : spdk_json_write_named_string(w, "cpumask",
1771 : spdk_cpuset_fmt(spdk_thread_get_cpumask(vdev->thread)));
1772 0 : spdk_json_write_named_bool(w, "readonly", bvdev->readonly);
1773 0 : spdk_json_write_named_string(w, "transport", bvdev->ops->name);
1774 0 : spdk_json_write_object_end(w);
1775 :
1776 0 : spdk_json_write_object_end(w);
1777 : }
1778 :
1779 : static int vhost_blk_destroy(struct spdk_vhost_dev *dev);
1780 :
1781 : static int
1782 0 : vhost_blk_get_config(struct spdk_vhost_dev *vdev, uint8_t *config,
1783 : uint32_t len)
1784 : {
1785 0 : struct virtio_blk_config blkcfg;
1786 : struct spdk_bdev *bdev;
1787 : uint32_t blk_size;
1788 : uint64_t blkcnt;
1789 :
1790 0 : memset(&blkcfg, 0, sizeof(blkcfg));
1791 0 : bdev = vhost_blk_get_bdev(vdev);
1792 0 : if (bdev == NULL) {
1793 : /* We can't just return -1 here as this GET_CONFIG message might
1794 : * be caused by a QEMU VM reboot. Returning -1 will indicate an
1795 : * error to QEMU, who might then decide to terminate itself.
1796 : * We don't want that. A simple reboot shouldn't break the system.
1797 : *
1798 : * Presenting a block device with block size 0 and block count 0
1799 : * doesn't cause any problems on QEMU side and the virtio-pci
1800 : * device is even still available inside the VM, but there will
1801 : * be no block device created for it - the kernel drivers will
1802 : * silently reject it.
1803 : */
1804 0 : blk_size = 0;
1805 0 : blkcnt = 0;
1806 : } else {
1807 0 : blk_size = spdk_bdev_get_block_size(bdev);
1808 0 : blkcnt = spdk_bdev_get_num_blocks(bdev);
1809 0 : if (spdk_bdev_get_buf_align(bdev) > 1) {
1810 0 : blkcfg.size_max = SPDK_BDEV_LARGE_BUF_MAX_SIZE;
1811 0 : blkcfg.seg_max = spdk_min(SPDK_VHOST_IOVS_MAX - 2 - 1, SPDK_BDEV_IO_NUM_CHILD_IOV - 2 - 1);
1812 : } else {
1813 0 : blkcfg.size_max = 131072;
1814 : /* -2 for REQ and RESP and -1 for region boundary splitting */
1815 0 : blkcfg.seg_max = SPDK_VHOST_IOVS_MAX - 2 - 1;
1816 : }
1817 : }
1818 :
1819 0 : blkcfg.blk_size = blk_size;
1820 : /* minimum I/O size in blocks */
1821 0 : blkcfg.min_io_size = 1;
1822 : /* expressed in 512 Bytes sectors */
1823 0 : blkcfg.capacity = (blkcnt * blk_size) / 512;
1824 : /* QEMU can overwrite this value when started */
1825 0 : blkcfg.num_queues = SPDK_VHOST_MAX_VQUEUES;
1826 :
1827 0 : if (bdev && spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) {
1828 : /* 16MiB, expressed in 512 Bytes */
1829 0 : blkcfg.max_discard_sectors = 32768;
1830 0 : blkcfg.max_discard_seg = 1;
1831 0 : blkcfg.discard_sector_alignment = blk_size / 512;
1832 : }
1833 0 : if (bdev && spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) {
1834 0 : blkcfg.max_write_zeroes_sectors = 32768;
1835 0 : blkcfg.max_write_zeroes_seg = 1;
1836 : }
1837 :
1838 0 : memcpy(config, &blkcfg, spdk_min(len, sizeof(blkcfg)));
1839 :
1840 0 : return 0;
1841 : }
1842 :
1843 : static int
1844 0 : vhost_blk_set_coalescing(struct spdk_vhost_dev *vdev, uint32_t delay_base_us,
1845 : uint32_t iops_threshold)
1846 : {
1847 0 : struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
1848 :
1849 0 : assert(bvdev != NULL);
1850 :
1851 0 : return bvdev->ops->set_coalescing(vdev, delay_base_us, iops_threshold);
1852 : }
1853 :
1854 : static void
1855 0 : vhost_blk_get_coalescing(struct spdk_vhost_dev *vdev, uint32_t *delay_base_us,
1856 : uint32_t *iops_threshold)
1857 : {
1858 0 : struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
1859 :
1860 0 : assert(bvdev != NULL);
1861 :
1862 0 : bvdev->ops->get_coalescing(vdev, delay_base_us, iops_threshold);
1863 0 : }
1864 :
1865 : static const struct spdk_vhost_user_dev_backend vhost_blk_user_device_backend = {
1866 : .session_ctx_size = sizeof(struct spdk_vhost_blk_session) - sizeof(struct spdk_vhost_session),
1867 : .start_session = vhost_blk_start,
1868 : .stop_session = vhost_blk_stop,
1869 : .alloc_vq_tasks = alloc_vq_task_pool,
1870 : .enable_vq = vhost_blk_vq_enable,
1871 : };
1872 :
1873 : static const struct spdk_vhost_dev_backend vhost_blk_device_backend = {
1874 : .type = VHOST_BACKEND_BLK,
1875 : .vhost_get_config = vhost_blk_get_config,
1876 : .dump_info_json = vhost_blk_dump_info_json,
1877 : .write_config_json = vhost_blk_write_config_json,
1878 : .remove_device = vhost_blk_destroy,
1879 : .set_coalescing = vhost_blk_set_coalescing,
1880 : .get_coalescing = vhost_blk_get_coalescing,
1881 : };
1882 :
1883 : int
1884 1 : virtio_blk_construct_ctrlr(struct spdk_vhost_dev *vdev, const char *address,
1885 : struct spdk_cpuset *cpumask, const struct spdk_json_val *params,
1886 : const struct spdk_vhost_user_dev_backend *user_backend)
1887 : {
1888 1 : struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
1889 :
1890 1 : assert(bvdev != NULL);
1891 :
1892 1 : return bvdev->ops->create_ctrlr(vdev, cpumask, address, params, (void *)user_backend);
1893 : }
1894 :
1895 : int
1896 1 : spdk_vhost_blk_construct(const char *name, const char *cpumask, const char *dev_name,
1897 : const char *transport, const struct spdk_json_val *params)
1898 : {
1899 1 : struct spdk_vhost_blk_dev *bvdev = NULL;
1900 : struct spdk_vhost_dev *vdev;
1901 : struct spdk_bdev *bdev;
1902 1 : const char *transport_name = VIRTIO_BLK_DEFAULT_TRANSPORT;
1903 1 : int ret = 0;
1904 :
1905 1 : bvdev = calloc(1, sizeof(*bvdev));
1906 1 : if (bvdev == NULL) {
1907 0 : ret = -ENOMEM;
1908 0 : goto out;
1909 : }
1910 :
1911 1 : if (transport != NULL) {
1912 0 : transport_name = transport;
1913 : }
1914 :
1915 1 : bvdev->ops = virtio_blk_get_transport_ops(transport_name);
1916 1 : if (!bvdev->ops) {
1917 0 : ret = -EINVAL;
1918 0 : SPDK_ERRLOG("Transport type '%s' unavailable.\n", transport_name);
1919 0 : goto out;
1920 : }
1921 :
1922 1 : ret = spdk_bdev_open_ext(dev_name, true, bdev_event_cb, bvdev, &bvdev->bdev_desc);
1923 1 : if (ret != 0) {
1924 0 : SPDK_ERRLOG("%s: could not open bdev '%s', error=%d\n",
1925 : name, dev_name, ret);
1926 0 : goto out;
1927 : }
1928 1 : bdev = spdk_bdev_desc_get_bdev(bvdev->bdev_desc);
1929 :
1930 1 : vdev = &bvdev->vdev;
1931 1 : vdev->virtio_features = SPDK_VHOST_BLK_FEATURES_BASE;
1932 1 : vdev->disabled_features = SPDK_VHOST_BLK_DISABLED_FEATURES;
1933 1 : vdev->protocol_features = SPDK_VHOST_BLK_PROTOCOL_FEATURES;
1934 :
1935 1 : if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) {
1936 1 : vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_DISCARD);
1937 : }
1938 1 : if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) {
1939 1 : vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_WRITE_ZEROES);
1940 : }
1941 :
1942 1 : if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH)) {
1943 1 : vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_FLUSH);
1944 : }
1945 :
1946 1 : bvdev->bdev = bdev;
1947 1 : bvdev->readonly = false;
1948 1 : ret = vhost_dev_register(vdev, name, cpumask, params, &vhost_blk_device_backend,
1949 : &vhost_blk_user_device_backend, false);
1950 1 : if (ret != 0) {
1951 0 : spdk_bdev_close(bvdev->bdev_desc);
1952 0 : goto out;
1953 : }
1954 :
1955 1 : SPDK_INFOLOG(vhost, "%s: using bdev '%s'\n", name, dev_name);
1956 1 : out:
1957 1 : if (ret != 0 && bvdev) {
1958 0 : free(bvdev);
1959 : }
1960 1 : return ret;
1961 : }
1962 :
1963 : int
1964 1 : virtio_blk_destroy_ctrlr(struct spdk_vhost_dev *vdev)
1965 : {
1966 1 : struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
1967 :
1968 1 : assert(bvdev != NULL);
1969 :
1970 1 : return bvdev->ops->destroy_ctrlr(vdev);
1971 : }
1972 :
1973 : static int
1974 1 : vhost_blk_destroy(struct spdk_vhost_dev *vdev)
1975 : {
1976 1 : struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
1977 : int rc;
1978 :
1979 1 : assert(bvdev != NULL);
1980 :
1981 1 : rc = vhost_dev_unregister(&bvdev->vdev);
1982 1 : if (rc != 0) {
1983 0 : return rc;
1984 : }
1985 :
1986 1 : if (bvdev->bdev_desc) {
1987 0 : spdk_bdev_close(bvdev->bdev_desc);
1988 0 : bvdev->bdev_desc = NULL;
1989 : }
1990 1 : bvdev->bdev = NULL;
1991 :
1992 1 : free(bvdev);
1993 1 : return 0;
1994 : }
1995 :
1996 : struct spdk_io_channel *
1997 0 : vhost_blk_get_io_channel(struct spdk_vhost_dev *vdev)
1998 : {
1999 0 : struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
2000 :
2001 0 : assert(bvdev != NULL);
2002 :
2003 0 : return spdk_bdev_get_io_channel(bvdev->bdev_desc);
2004 : }
2005 :
2006 : void
2007 0 : vhost_blk_put_io_channel(struct spdk_io_channel *ch)
2008 : {
2009 0 : spdk_put_io_channel(ch);
2010 0 : }
2011 :
2012 : static struct spdk_virtio_blk_transport *
2013 1 : vhost_user_blk_create(const struct spdk_json_val *params)
2014 : {
2015 : int ret;
2016 : struct spdk_virtio_blk_transport *vhost_user_blk;
2017 :
2018 1 : vhost_user_blk = calloc(1, sizeof(*vhost_user_blk));
2019 1 : if (!vhost_user_blk) {
2020 0 : return NULL;
2021 : }
2022 :
2023 1 : ret = vhost_user_init();
2024 1 : if (ret != 0) {
2025 0 : free(vhost_user_blk);
2026 0 : return NULL;
2027 : }
2028 :
2029 1 : return vhost_user_blk;
2030 : }
2031 :
2032 : static int
2033 1 : vhost_user_blk_destroy(struct spdk_virtio_blk_transport *transport,
2034 : spdk_vhost_fini_cb cb_fn)
2035 : {
2036 1 : vhost_user_fini(cb_fn);
2037 1 : free(transport);
2038 1 : return 0;
2039 : }
2040 :
2041 : struct rpc_vhost_blk {
2042 : bool readonly;
2043 : bool packed_ring;
2044 : };
2045 :
2046 : static const struct spdk_json_object_decoder rpc_construct_vhost_blk[] = {
2047 : {"readonly", offsetof(struct rpc_vhost_blk, readonly), spdk_json_decode_bool, true},
2048 : {"packed_ring", offsetof(struct rpc_vhost_blk, packed_ring), spdk_json_decode_bool, true},
2049 : };
2050 :
2051 : static int
2052 1 : vhost_user_blk_create_ctrlr(struct spdk_vhost_dev *vdev, struct spdk_cpuset *cpumask,
2053 : const char *address, const struct spdk_json_val *params, void *custom_opts)
2054 : {
2055 1 : struct rpc_vhost_blk req = {0};
2056 1 : struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
2057 :
2058 1 : assert(bvdev != NULL);
2059 :
2060 1 : if (spdk_json_decode_object_relaxed(params, rpc_construct_vhost_blk,
2061 : SPDK_COUNTOF(rpc_construct_vhost_blk),
2062 : &req)) {
2063 0 : SPDK_DEBUGLOG(vhost_blk, "spdk_json_decode_object failed\n");
2064 0 : return -EINVAL;
2065 : }
2066 :
2067 1 : if (req.packed_ring) {
2068 0 : vdev->virtio_features |= (uint64_t)req.packed_ring << VIRTIO_F_RING_PACKED;
2069 : }
2070 1 : if (req.readonly) {
2071 0 : vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_RO);
2072 0 : bvdev->readonly = req.readonly;
2073 : }
2074 :
2075 1 : return vhost_user_dev_create(vdev, address, cpumask, custom_opts, false);
2076 : }
2077 :
2078 : static int
2079 1 : vhost_user_blk_destroy_ctrlr(struct spdk_vhost_dev *vdev)
2080 : {
2081 1 : return vhost_user_dev_unregister(vdev);
2082 : }
2083 :
2084 : static void
2085 0 : vhost_user_blk_dump_opts(struct spdk_virtio_blk_transport *transport, struct spdk_json_write_ctx *w)
2086 : {
2087 0 : assert(w != NULL);
2088 :
2089 0 : spdk_json_write_named_string(w, "name", transport->ops->name);
2090 0 : }
2091 :
2092 : static const struct spdk_virtio_blk_transport_ops vhost_user_blk = {
2093 : .name = "vhost_user_blk",
2094 :
2095 : .dump_opts = vhost_user_blk_dump_opts,
2096 :
2097 : .create = vhost_user_blk_create,
2098 : .destroy = vhost_user_blk_destroy,
2099 :
2100 : .create_ctrlr = vhost_user_blk_create_ctrlr,
2101 : .destroy_ctrlr = vhost_user_blk_destroy_ctrlr,
2102 :
2103 : .bdev_event = vhost_user_bdev_event_cb,
2104 : .set_coalescing = vhost_user_set_coalescing,
2105 : .get_coalescing = vhost_user_get_coalescing,
2106 : };
2107 :
2108 1 : SPDK_VIRTIO_BLK_TRANSPORT_REGISTER(vhost_user_blk, &vhost_user_blk);
2109 :
2110 1 : SPDK_LOG_REGISTER_COMPONENT(vhost_blk)
2111 1 : SPDK_LOG_REGISTER_COMPONENT(vhost_blk_data)
|