Line data Source code
1 : /* SPDX-License-Identifier: BSD-3-Clause
2 : * Copyright (C) 2017 Intel Corporation. All rights reserved.
3 : * All rights reserved.
4 : */
5 :
6 : #include <linux/virtio_blk.h>
7 :
8 : #include "spdk/env.h"
9 : #include "spdk/bdev.h"
10 : #include "spdk/bdev_module.h"
11 : #include "spdk/thread.h"
12 : #include "spdk/likely.h"
13 : #include "spdk/string.h"
14 : #include "spdk/util.h"
15 : #include "spdk/vhost.h"
16 : #include "spdk/json.h"
17 :
18 : #include "vhost_internal.h"
19 : #include <rte_version.h>
20 :
21 : /* Minimal set of features supported by every SPDK VHOST-BLK device */
22 : #define SPDK_VHOST_BLK_FEATURES_BASE (SPDK_VHOST_FEATURES | \
23 : (1ULL << VIRTIO_BLK_F_SIZE_MAX) | (1ULL << VIRTIO_BLK_F_SEG_MAX) | \
24 : (1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_BLK_SIZE) | \
25 : (1ULL << VIRTIO_BLK_F_TOPOLOGY) | (1ULL << VIRTIO_BLK_F_BARRIER) | \
26 : (1ULL << VIRTIO_BLK_F_SCSI) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
27 : (1ULL << VIRTIO_BLK_F_MQ))
28 :
29 : /* Not supported features */
30 : #define SPDK_VHOST_BLK_DISABLED_FEATURES (SPDK_VHOST_DISABLED_FEATURES | \
31 : (1ULL << VIRTIO_BLK_F_GEOMETRY) | (1ULL << VIRTIO_BLK_F_CONFIG_WCE) | \
32 : (1ULL << VIRTIO_BLK_F_BARRIER) | (1ULL << VIRTIO_BLK_F_SCSI))
33 :
34 : /* Vhost-blk support protocol features */
35 : #define SPDK_VHOST_BLK_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_CONFIG) | \
36 : (1ULL << VHOST_USER_PROTOCOL_F_INFLIGHT_SHMFD))
37 :
38 : #define VIRTIO_BLK_DEFAULT_TRANSPORT "vhost_user_blk"
39 :
40 : struct spdk_vhost_user_blk_task {
41 : struct spdk_vhost_blk_task blk_task;
42 : struct spdk_vhost_blk_session *bvsession;
43 : struct spdk_vhost_virtqueue *vq;
44 :
45 : uint16_t req_idx;
46 : uint16_t num_descs;
47 : uint16_t buffer_id;
48 : uint16_t inflight_head;
49 :
50 : /* If set, the task is currently used for I/O processing. */
51 : bool used;
52 : };
53 :
54 : struct spdk_vhost_blk_dev {
55 : struct spdk_vhost_dev vdev;
56 : struct spdk_bdev *bdev;
57 : struct spdk_bdev_desc *bdev_desc;
58 : const struct spdk_virtio_blk_transport_ops *ops;
59 :
60 : bool readonly;
61 : };
62 :
63 : struct spdk_vhost_blk_session {
64 : /* The parent session must be the very first field in this struct */
65 : struct spdk_vhost_session vsession;
66 : struct spdk_vhost_blk_dev *bvdev;
67 : struct spdk_poller *requestq_poller;
68 : struct spdk_io_channel *io_channel;
69 : struct spdk_poller *stop_poller;
70 : };
71 :
72 : /* forward declaration */
73 : static const struct spdk_vhost_dev_backend vhost_blk_device_backend;
74 :
75 : static void vhost_user_blk_request_finish(uint8_t status, struct spdk_vhost_blk_task *task,
76 : void *cb_arg);
77 :
78 : static int
79 0 : vhost_user_process_blk_request(struct spdk_vhost_user_blk_task *user_task)
80 : {
81 0 : struct spdk_vhost_blk_session *bvsession = user_task->bvsession;
82 0 : struct spdk_vhost_dev *vdev = &bvsession->bvdev->vdev;
83 :
84 0 : return virtio_blk_process_request(vdev, bvsession->io_channel, &user_task->blk_task,
85 : vhost_user_blk_request_finish, NULL);
86 0 : }
87 :
88 : static struct spdk_vhost_blk_dev *
89 4 : to_blk_dev(struct spdk_vhost_dev *vdev)
90 : {
91 4 : if (vdev == NULL) {
92 0 : return NULL;
93 : }
94 :
95 4 : if (vdev->backend->type != VHOST_BACKEND_BLK) {
96 0 : SPDK_ERRLOG("%s: not a vhost-blk device\n", vdev->name);
97 0 : return NULL;
98 : }
99 :
100 4 : return SPDK_CONTAINEROF(vdev, struct spdk_vhost_blk_dev, vdev);
101 4 : }
102 :
103 : struct spdk_bdev *
104 0 : vhost_blk_get_bdev(struct spdk_vhost_dev *vdev)
105 : {
106 0 : struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
107 :
108 0 : assert(bvdev != NULL);
109 :
110 0 : return bvdev->bdev;
111 0 : }
112 :
113 : static struct spdk_vhost_blk_session *
114 0 : to_blk_session(struct spdk_vhost_session *vsession)
115 : {
116 0 : assert(vsession->vdev->backend->type == VHOST_BACKEND_BLK);
117 0 : return (struct spdk_vhost_blk_session *)vsession;
118 : }
119 :
120 : static inline void
121 0 : blk_task_inc_task_cnt(struct spdk_vhost_user_blk_task *task)
122 : {
123 0 : task->bvsession->vsession.task_cnt++;
124 0 : }
125 :
126 : static inline void
127 0 : blk_task_dec_task_cnt(struct spdk_vhost_user_blk_task *task)
128 : {
129 0 : assert(task->bvsession->vsession.task_cnt > 0);
130 0 : task->bvsession->vsession.task_cnt--;
131 0 : }
132 :
133 : static void
134 0 : blk_task_finish(struct spdk_vhost_user_blk_task *task)
135 : {
136 0 : blk_task_dec_task_cnt(task);
137 0 : task->used = false;
138 0 : }
139 :
140 : static void
141 0 : blk_task_init(struct spdk_vhost_user_blk_task *task)
142 : {
143 0 : struct spdk_vhost_blk_task *blk_task = &task->blk_task;
144 :
145 0 : task->used = true;
146 0 : blk_task->iovcnt = SPDK_COUNTOF(blk_task->iovs);
147 0 : blk_task->status = NULL;
148 0 : blk_task->used_len = 0;
149 0 : blk_task->payload_size = 0;
150 0 : }
151 :
152 : static void
153 0 : blk_task_enqueue(struct spdk_vhost_user_blk_task *task)
154 : {
155 0 : if (task->vq->packed.packed_ring) {
156 0 : vhost_vq_packed_ring_enqueue(&task->bvsession->vsession, task->vq,
157 0 : task->num_descs,
158 0 : task->buffer_id, task->blk_task.used_len,
159 0 : task->inflight_head);
160 0 : } else {
161 0 : vhost_vq_used_ring_enqueue(&task->bvsession->vsession, task->vq,
162 0 : task->req_idx, task->blk_task.used_len);
163 : }
164 0 : }
165 :
166 : static void
167 0 : vhost_user_blk_request_finish(uint8_t status, struct spdk_vhost_blk_task *task, void *cb_arg)
168 : {
169 0 : struct spdk_vhost_user_blk_task *user_task;
170 :
171 0 : user_task = SPDK_CONTAINEROF(task, struct spdk_vhost_user_blk_task, blk_task);
172 :
173 0 : blk_task_enqueue(user_task);
174 :
175 0 : SPDK_DEBUGLOG(vhost_blk, "Finished task (%p) req_idx=%d\n status: %" PRIu8"\n",
176 : user_task, user_task->req_idx, status);
177 0 : blk_task_finish(user_task);
178 0 : }
179 :
180 : static void
181 0 : blk_request_finish(uint8_t status, struct spdk_vhost_blk_task *task)
182 : {
183 :
184 0 : if (task->status) {
185 0 : *task->status = status;
186 0 : }
187 :
188 0 : task->cb(status, task, task->cb_arg);
189 0 : }
190 :
191 : /*
192 : * Process task's descriptor chain and setup data related fields.
193 : * Return
194 : * total size of supplied buffers
195 : *
196 : * FIXME: Make this function return to rd_cnt and wr_cnt
197 : */
198 : static int
199 0 : blk_iovs_split_queue_setup(struct spdk_vhost_blk_session *bvsession,
200 : struct spdk_vhost_virtqueue *vq,
201 : uint16_t req_idx, struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length)
202 : {
203 0 : struct spdk_vhost_session *vsession = &bvsession->vsession;
204 0 : struct spdk_vhost_dev *vdev = vsession->vdev;
205 0 : struct vring_desc *desc, *desc_table;
206 0 : uint16_t out_cnt = 0, cnt = 0;
207 0 : uint32_t desc_table_size, len = 0;
208 0 : uint32_t desc_handled_cnt;
209 0 : int rc;
210 :
211 0 : rc = vhost_vq_get_desc(vsession, vq, req_idx, &desc, &desc_table, &desc_table_size);
212 0 : if (rc != 0) {
213 0 : SPDK_ERRLOG("%s: invalid descriptor at index %"PRIu16".\n", vdev->name, req_idx);
214 0 : return -1;
215 : }
216 :
217 0 : desc_handled_cnt = 0;
218 0 : while (1) {
219 : /*
220 : * Maximum cnt reached?
221 : * Should not happen if request is well formatted, otherwise this is a BUG.
222 : */
223 0 : if (spdk_unlikely(cnt == *iovs_cnt)) {
224 0 : SPDK_DEBUGLOG(vhost_blk, "%s: max IOVs in request reached (req_idx = %"PRIu16").\n",
225 : vsession->name, req_idx);
226 0 : return -1;
227 : }
228 :
229 0 : if (spdk_unlikely(vhost_vring_desc_to_iov(vsession, iovs, &cnt, desc))) {
230 0 : SPDK_DEBUGLOG(vhost_blk, "%s: invalid descriptor %" PRIu16" (req_idx = %"PRIu16").\n",
231 : vsession->name, req_idx, cnt);
232 0 : return -1;
233 : }
234 :
235 0 : len += desc->len;
236 :
237 0 : out_cnt += vhost_vring_desc_is_wr(desc);
238 :
239 0 : rc = vhost_vring_desc_get_next(&desc, desc_table, desc_table_size);
240 0 : if (rc != 0) {
241 0 : SPDK_ERRLOG("%s: descriptor chain at index %"PRIu16" terminated unexpectedly.\n",
242 : vsession->name, req_idx);
243 0 : return -1;
244 0 : } else if (desc == NULL) {
245 0 : break;
246 : }
247 :
248 0 : desc_handled_cnt++;
249 0 : if (spdk_unlikely(desc_handled_cnt > desc_table_size)) {
250 : /* Break a cycle and report an error, if any. */
251 0 : SPDK_ERRLOG("%s: found a cycle in the descriptor chain: desc_table_size = %d, desc_handled_cnt = %d.\n",
252 : vsession->name, desc_table_size, desc_handled_cnt);
253 0 : return -1;
254 : }
255 : }
256 :
257 : /*
258 : * There must be least two descriptors.
259 : * First contain request so it must be readable.
260 : * Last descriptor contain buffer for response so it must be writable.
261 : */
262 0 : if (spdk_unlikely(out_cnt == 0 || cnt < 2)) {
263 0 : return -1;
264 : }
265 :
266 0 : *length = len;
267 0 : *iovs_cnt = cnt;
268 0 : return 0;
269 0 : }
270 :
271 : static int
272 0 : blk_iovs_packed_desc_setup(struct spdk_vhost_session *vsession,
273 : struct spdk_vhost_virtqueue *vq, uint16_t req_idx,
274 : struct vring_packed_desc *desc_table, uint16_t desc_table_size,
275 : struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length)
276 : {
277 0 : struct vring_packed_desc *desc;
278 0 : uint16_t cnt = 0, out_cnt = 0;
279 0 : uint32_t len = 0;
280 :
281 0 : if (desc_table == NULL) {
282 0 : desc = &vq->vring.desc_packed[req_idx];
283 0 : } else {
284 0 : req_idx = 0;
285 0 : desc = desc_table;
286 : }
287 :
288 0 : while (1) {
289 : /*
290 : * Maximum cnt reached?
291 : * Should not happen if request is well formatted, otherwise this is a BUG.
292 : */
293 0 : if (spdk_unlikely(cnt == *iovs_cnt)) {
294 0 : SPDK_ERRLOG("%s: max IOVs in request reached (req_idx = %"PRIu16").\n",
295 : vsession->name, req_idx);
296 0 : return -EINVAL;
297 : }
298 :
299 0 : if (spdk_unlikely(vhost_vring_packed_desc_to_iov(vsession, iovs, &cnt, desc))) {
300 0 : SPDK_ERRLOG("%s: invalid descriptor %" PRIu16" (req_idx = %"PRIu16").\n",
301 : vsession->name, req_idx, cnt);
302 0 : return -EINVAL;
303 : }
304 :
305 0 : len += desc->len;
306 0 : out_cnt += vhost_vring_packed_desc_is_wr(desc);
307 :
308 : /* desc is NULL means we reach the last desc of this request */
309 0 : vhost_vring_packed_desc_get_next(&desc, &req_idx, vq, desc_table, desc_table_size);
310 0 : if (desc == NULL) {
311 0 : break;
312 : }
313 : }
314 :
315 : /*
316 : * There must be least two descriptors.
317 : * First contain request so it must be readable.
318 : * Last descriptor contain buffer for response so it must be writable.
319 : */
320 0 : if (spdk_unlikely(out_cnt == 0 || cnt < 2)) {
321 0 : return -EINVAL;
322 : }
323 :
324 0 : *length = len;
325 0 : *iovs_cnt = cnt;
326 :
327 0 : return 0;
328 0 : }
329 :
330 : static int
331 0 : blk_iovs_packed_queue_setup(struct spdk_vhost_blk_session *bvsession,
332 : struct spdk_vhost_virtqueue *vq, uint16_t req_idx,
333 : struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length)
334 : {
335 0 : struct spdk_vhost_session *vsession = &bvsession->vsession;
336 0 : struct spdk_vhost_dev *vdev = vsession->vdev;
337 0 : struct vring_packed_desc *desc = NULL, *desc_table;
338 0 : uint32_t desc_table_size;
339 0 : int rc;
340 :
341 0 : rc = vhost_vq_get_desc_packed(vsession, vq, req_idx, &desc,
342 : &desc_table, &desc_table_size);
343 0 : if (spdk_unlikely(rc != 0)) {
344 0 : SPDK_ERRLOG("%s: Invalid descriptor at index %"PRIu16".\n", vdev->name, req_idx);
345 0 : return rc;
346 : }
347 :
348 0 : return blk_iovs_packed_desc_setup(vsession, vq, req_idx, desc_table, desc_table_size,
349 0 : iovs, iovs_cnt, length);
350 0 : }
351 :
352 : static int
353 0 : blk_iovs_inflight_queue_setup(struct spdk_vhost_blk_session *bvsession,
354 : struct spdk_vhost_virtqueue *vq, uint16_t req_idx,
355 : struct iovec *iovs, uint16_t *iovs_cnt, uint32_t *length)
356 : {
357 0 : struct spdk_vhost_session *vsession = &bvsession->vsession;
358 0 : struct spdk_vhost_dev *vdev = vsession->vdev;
359 0 : spdk_vhost_inflight_desc *inflight_desc;
360 0 : struct vring_packed_desc *desc_table;
361 0 : uint16_t out_cnt = 0, cnt = 0;
362 0 : uint32_t desc_table_size, len = 0;
363 0 : int rc = 0;
364 :
365 0 : rc = vhost_inflight_queue_get_desc(vsession, vq->vring_inflight.inflight_packed->desc,
366 0 : req_idx, &inflight_desc, &desc_table, &desc_table_size);
367 0 : if (spdk_unlikely(rc != 0)) {
368 0 : SPDK_ERRLOG("%s: Invalid descriptor at index %"PRIu16".\n", vdev->name, req_idx);
369 0 : return rc;
370 : }
371 :
372 0 : if (desc_table != NULL) {
373 0 : return blk_iovs_packed_desc_setup(vsession, vq, req_idx, desc_table, desc_table_size,
374 0 : iovs, iovs_cnt, length);
375 : }
376 :
377 0 : while (1) {
378 : /*
379 : * Maximum cnt reached?
380 : * Should not happen if request is well formatted, otherwise this is a BUG.
381 : */
382 0 : if (spdk_unlikely(cnt == *iovs_cnt)) {
383 0 : SPDK_ERRLOG("%s: max IOVs in request reached (req_idx = %"PRIu16").\n",
384 : vsession->name, req_idx);
385 0 : return -EINVAL;
386 : }
387 :
388 0 : if (spdk_unlikely(vhost_vring_inflight_desc_to_iov(vsession, iovs, &cnt, inflight_desc))) {
389 0 : SPDK_ERRLOG("%s: invalid descriptor %" PRIu16" (req_idx = %"PRIu16").\n",
390 : vsession->name, req_idx, cnt);
391 0 : return -EINVAL;
392 : }
393 :
394 0 : len += inflight_desc->len;
395 0 : out_cnt += vhost_vring_inflight_desc_is_wr(inflight_desc);
396 :
397 : /* Without F_NEXT means it's the last desc */
398 0 : if ((inflight_desc->flags & VRING_DESC_F_NEXT) == 0) {
399 0 : break;
400 : }
401 :
402 0 : inflight_desc = &vq->vring_inflight.inflight_packed->desc[inflight_desc->next];
403 : }
404 :
405 : /*
406 : * There must be least two descriptors.
407 : * First contain request so it must be readable.
408 : * Last descriptor contain buffer for response so it must be writable.
409 : */
410 0 : if (spdk_unlikely(out_cnt == 0 || cnt < 2)) {
411 0 : return -EINVAL;
412 : }
413 :
414 0 : *length = len;
415 0 : *iovs_cnt = cnt;
416 :
417 0 : return 0;
418 0 : }
419 :
420 : static void
421 0 : blk_request_complete_cb(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
422 : {
423 0 : struct spdk_vhost_blk_task *task = cb_arg;
424 :
425 0 : spdk_bdev_free_io(bdev_io);
426 0 : blk_request_finish(success ? VIRTIO_BLK_S_OK : VIRTIO_BLK_S_IOERR, task);
427 0 : }
428 :
429 : static void
430 0 : blk_request_resubmit(void *arg)
431 : {
432 0 : struct spdk_vhost_blk_task *task = arg;
433 0 : int rc = 0;
434 :
435 0 : rc = virtio_blk_process_request(task->bdev_io_wait_vdev, task->bdev_io_wait_ch, task,
436 0 : task->cb, task->cb_arg);
437 0 : if (rc == 0) {
438 0 : SPDK_DEBUGLOG(vhost_blk, "====== Task %p resubmitted ======\n", task);
439 0 : } else {
440 0 : SPDK_DEBUGLOG(vhost_blk, "====== Task %p failed ======\n", task);
441 : }
442 0 : }
443 :
444 : static inline void
445 0 : blk_request_queue_io(struct spdk_vhost_dev *vdev, struct spdk_io_channel *ch,
446 : struct spdk_vhost_blk_task *task)
447 : {
448 0 : int rc;
449 0 : struct spdk_bdev *bdev = vhost_blk_get_bdev(vdev);
450 :
451 0 : task->bdev_io_wait.bdev = bdev;
452 0 : task->bdev_io_wait.cb_fn = blk_request_resubmit;
453 0 : task->bdev_io_wait.cb_arg = task;
454 0 : task->bdev_io_wait_ch = ch;
455 0 : task->bdev_io_wait_vdev = vdev;
456 :
457 0 : rc = spdk_bdev_queue_io_wait(bdev, ch, &task->bdev_io_wait);
458 0 : if (rc != 0) {
459 0 : blk_request_finish(VIRTIO_BLK_S_IOERR, task);
460 0 : }
461 0 : }
462 :
463 : int
464 0 : virtio_blk_process_request(struct spdk_vhost_dev *vdev, struct spdk_io_channel *ch,
465 : struct spdk_vhost_blk_task *task, virtio_blk_request_cb cb, void *cb_arg)
466 : {
467 0 : struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
468 0 : struct virtio_blk_outhdr req;
469 0 : struct virtio_blk_discard_write_zeroes *desc;
470 0 : struct iovec *iov;
471 0 : uint32_t type;
472 0 : uint64_t flush_bytes;
473 0 : uint32_t payload_len;
474 0 : uint16_t iovcnt;
475 0 : int rc;
476 :
477 0 : assert(bvdev != NULL);
478 :
479 0 : task->cb = cb;
480 0 : task->cb_arg = cb_arg;
481 :
482 0 : iov = &task->iovs[0];
483 0 : if (spdk_unlikely(iov->iov_len != sizeof(req))) {
484 0 : SPDK_DEBUGLOG(vhost_blk,
485 : "First descriptor size is %zu but expected %zu (task = %p).\n",
486 : iov->iov_len, sizeof(req), task);
487 0 : blk_request_finish(VIRTIO_BLK_S_UNSUPP, task);
488 0 : return -1;
489 : }
490 :
491 : /* Some SeaBIOS versions don't align the virtio_blk_outhdr on an 8-byte boundary, which
492 : * triggers ubsan errors. So copy this small 16-byte structure to the stack to workaround
493 : * this problem.
494 : */
495 0 : memcpy(&req, iov->iov_base, sizeof(req));
496 :
497 0 : iov = &task->iovs[task->iovcnt - 1];
498 0 : if (spdk_unlikely(iov->iov_len != 1)) {
499 0 : SPDK_DEBUGLOG(vhost_blk,
500 : "Last descriptor size is %zu but expected %d (task = %p).\n",
501 : iov->iov_len, 1, task);
502 0 : blk_request_finish(VIRTIO_BLK_S_UNSUPP, task);
503 0 : return -1;
504 : }
505 :
506 0 : payload_len = task->payload_size;
507 0 : task->status = iov->iov_base;
508 0 : payload_len -= sizeof(req) + sizeof(*task->status);
509 0 : iovcnt = task->iovcnt - 2;
510 :
511 0 : type = req.type;
512 : #ifdef VIRTIO_BLK_T_BARRIER
513 : /* Don't care about barrier for now (as QEMU's virtio-blk do). */
514 0 : type &= ~VIRTIO_BLK_T_BARRIER;
515 : #endif
516 :
517 0 : switch (type) {
518 : case VIRTIO_BLK_T_IN:
519 : case VIRTIO_BLK_T_OUT:
520 0 : if (spdk_unlikely(payload_len == 0 || (payload_len & (512 - 1)) != 0)) {
521 0 : SPDK_ERRLOG("%s - passed IO buffer is not multiple of 512b (task = %p).\n",
522 : type ? "WRITE" : "READ", task);
523 0 : blk_request_finish(VIRTIO_BLK_S_UNSUPP, task);
524 0 : return -1;
525 : }
526 :
527 0 : if (type == VIRTIO_BLK_T_IN) {
528 0 : task->used_len = payload_len + sizeof(*task->status);
529 0 : rc = spdk_bdev_readv(bvdev->bdev_desc, ch,
530 0 : &task->iovs[1], iovcnt, req.sector * 512,
531 0 : payload_len, blk_request_complete_cb, task);
532 0 : } else if (!bvdev->readonly) {
533 0 : task->used_len = sizeof(*task->status);
534 0 : rc = spdk_bdev_writev(bvdev->bdev_desc, ch,
535 0 : &task->iovs[1], iovcnt, req.sector * 512,
536 0 : payload_len, blk_request_complete_cb, task);
537 0 : } else {
538 0 : SPDK_DEBUGLOG(vhost_blk, "Device is in read-only mode!\n");
539 0 : rc = -1;
540 : }
541 :
542 0 : if (rc) {
543 0 : if (rc == -ENOMEM) {
544 0 : SPDK_DEBUGLOG(vhost_blk, "No memory, start to queue io.\n");
545 0 : blk_request_queue_io(vdev, ch, task);
546 0 : } else {
547 0 : blk_request_finish(VIRTIO_BLK_S_IOERR, task);
548 0 : return -1;
549 : }
550 0 : }
551 0 : break;
552 : case VIRTIO_BLK_T_DISCARD:
553 0 : desc = task->iovs[1].iov_base;
554 0 : if (payload_len != sizeof(*desc)) {
555 0 : SPDK_NOTICELOG("Invalid discard payload size: %u\n", payload_len);
556 0 : blk_request_finish(VIRTIO_BLK_S_IOERR, task);
557 0 : return -1;
558 : }
559 :
560 0 : if (desc->flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
561 0 : SPDK_ERRLOG("UNMAP flag is only used for WRITE ZEROES command\n");
562 0 : blk_request_finish(VIRTIO_BLK_S_UNSUPP, task);
563 0 : return -1;
564 : }
565 :
566 0 : rc = spdk_bdev_unmap(bvdev->bdev_desc, ch,
567 0 : desc->sector * 512, desc->num_sectors * 512,
568 0 : blk_request_complete_cb, task);
569 0 : if (rc) {
570 0 : if (rc == -ENOMEM) {
571 0 : SPDK_DEBUGLOG(vhost_blk, "No memory, start to queue io.\n");
572 0 : blk_request_queue_io(vdev, ch, task);
573 0 : } else {
574 0 : blk_request_finish(VIRTIO_BLK_S_IOERR, task);
575 0 : return -1;
576 : }
577 0 : }
578 0 : break;
579 : case VIRTIO_BLK_T_WRITE_ZEROES:
580 0 : desc = task->iovs[1].iov_base;
581 0 : if (payload_len != sizeof(*desc)) {
582 0 : SPDK_NOTICELOG("Invalid write zeroes payload size: %u\n", payload_len);
583 0 : blk_request_finish(VIRTIO_BLK_S_IOERR, task);
584 0 : return -1;
585 : }
586 :
587 : /* Unmap this range, SPDK doesn't support it, kernel will enable this flag by default
588 : * without checking unmap feature is negotiated or not, the flag isn't mandatory, so
589 : * just print a warning.
590 : */
591 0 : if (desc->flags & VIRTIO_BLK_WRITE_ZEROES_FLAG_UNMAP) {
592 0 : SPDK_WARNLOG("Ignore the unmap flag for WRITE ZEROES from %"PRIx64", len %"PRIx64"\n",
593 : (uint64_t)desc->sector * 512, (uint64_t)desc->num_sectors * 512);
594 0 : }
595 :
596 0 : rc = spdk_bdev_write_zeroes(bvdev->bdev_desc, ch,
597 0 : desc->sector * 512, desc->num_sectors * 512,
598 0 : blk_request_complete_cb, task);
599 0 : if (rc) {
600 0 : if (rc == -ENOMEM) {
601 0 : SPDK_DEBUGLOG(vhost_blk, "No memory, start to queue io.\n");
602 0 : blk_request_queue_io(vdev, ch, task);
603 0 : } else {
604 0 : blk_request_finish(VIRTIO_BLK_S_IOERR, task);
605 0 : return -1;
606 : }
607 0 : }
608 0 : break;
609 : case VIRTIO_BLK_T_FLUSH:
610 0 : flush_bytes = spdk_bdev_get_num_blocks(bvdev->bdev) * spdk_bdev_get_block_size(bvdev->bdev);
611 0 : if (req.sector != 0) {
612 0 : SPDK_NOTICELOG("sector must be zero for flush command\n");
613 0 : blk_request_finish(VIRTIO_BLK_S_IOERR, task);
614 0 : return -1;
615 : }
616 0 : rc = spdk_bdev_flush(bvdev->bdev_desc, ch,
617 0 : 0, flush_bytes,
618 0 : blk_request_complete_cb, task);
619 0 : if (rc) {
620 0 : if (rc == -ENOMEM) {
621 0 : SPDK_DEBUGLOG(vhost_blk, "No memory, start to queue io.\n");
622 0 : blk_request_queue_io(vdev, ch, task);
623 0 : } else if (rc == -ENOTSUP) {
624 0 : blk_request_finish(VIRTIO_BLK_S_UNSUPP, task);
625 0 : return -1;
626 : } else {
627 0 : blk_request_finish(VIRTIO_BLK_S_IOERR, task);
628 0 : return -1;
629 : }
630 0 : }
631 0 : break;
632 : case VIRTIO_BLK_T_GET_ID:
633 0 : if (!iovcnt || !payload_len) {
634 0 : blk_request_finish(VIRTIO_BLK_S_UNSUPP, task);
635 0 : return -1;
636 : }
637 0 : task->used_len = spdk_min((size_t)VIRTIO_BLK_ID_BYTES, task->iovs[1].iov_len);
638 0 : spdk_strcpy_pad(task->iovs[1].iov_base, spdk_bdev_get_name(bvdev->bdev),
639 0 : task->used_len, ' ');
640 0 : blk_request_finish(VIRTIO_BLK_S_OK, task);
641 0 : break;
642 : default:
643 0 : SPDK_DEBUGLOG(vhost_blk, "Not supported request type '%"PRIu32"'.\n", type);
644 0 : blk_request_finish(VIRTIO_BLK_S_UNSUPP, task);
645 0 : return -1;
646 : }
647 :
648 0 : return 0;
649 0 : }
650 :
651 : static void
652 0 : process_blk_task(struct spdk_vhost_virtqueue *vq, uint16_t req_idx)
653 : {
654 0 : struct spdk_vhost_user_blk_task *task;
655 0 : struct spdk_vhost_blk_task *blk_task;
656 0 : int rc;
657 :
658 0 : assert(vq->packed.packed_ring == false);
659 :
660 0 : task = &((struct spdk_vhost_user_blk_task *)vq->tasks)[req_idx];
661 0 : blk_task = &task->blk_task;
662 0 : if (spdk_unlikely(task->used)) {
663 0 : SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n",
664 : task->bvsession->vsession.name, req_idx);
665 0 : blk_task->used_len = 0;
666 0 : blk_task_enqueue(task);
667 0 : return;
668 : }
669 :
670 0 : blk_task_inc_task_cnt(task);
671 :
672 0 : blk_task_init(task);
673 :
674 0 : rc = blk_iovs_split_queue_setup(task->bvsession, vq, task->req_idx,
675 0 : blk_task->iovs, &blk_task->iovcnt, &blk_task->payload_size);
676 :
677 0 : if (rc) {
678 0 : SPDK_DEBUGLOG(vhost_blk, "Invalid request (req_idx = %"PRIu16").\n", task->req_idx);
679 : /* Only READ and WRITE are supported for now. */
680 0 : vhost_user_blk_request_finish(VIRTIO_BLK_S_UNSUPP, blk_task, NULL);
681 0 : return;
682 : }
683 :
684 0 : if (vhost_user_process_blk_request(task) == 0) {
685 0 : SPDK_DEBUGLOG(vhost_blk, "====== Task %p req_idx %d submitted ======\n", task,
686 : req_idx);
687 0 : } else {
688 0 : SPDK_ERRLOG("====== Task %p req_idx %d failed ======\n", task, req_idx);
689 : }
690 0 : }
691 :
692 : static void
693 0 : process_packed_blk_task(struct spdk_vhost_virtqueue *vq, uint16_t req_idx)
694 : {
695 0 : struct spdk_vhost_user_blk_task *task;
696 0 : struct spdk_vhost_blk_task *blk_task;
697 0 : uint16_t task_idx = req_idx, num_descs;
698 0 : int rc;
699 :
700 0 : assert(vq->packed.packed_ring);
701 :
702 : /* Packed ring used the buffer_id as the task_idx to get task struct.
703 : * In kernel driver, it uses the vq->free_head to set the buffer_id so the value
704 : * must be in the range of 0 ~ vring.size. The free_head value must be unique
705 : * in the outstanding requests.
706 : * We can't use the req_idx as the task_idx because the desc can be reused in
707 : * the next phase even when it's not completed in the previous phase. For example,
708 : * At phase 0, last_used_idx was 2 and desc0 was not completed.Then after moving
709 : * phase 1, last_avail_idx is updated to 1. In this case, req_idx can not be used
710 : * as task_idx because we will know task[0]->used is true at phase 1.
711 : * The split queue is quite different, the desc would insert into the free list when
712 : * device completes the request, the driver gets the desc from the free list which
713 : * ensures the req_idx is unique in the outstanding requests.
714 : */
715 0 : task_idx = vhost_vring_packed_desc_get_buffer_id(vq, req_idx, &num_descs);
716 :
717 0 : task = &((struct spdk_vhost_user_blk_task *)vq->tasks)[task_idx];
718 0 : blk_task = &task->blk_task;
719 0 : if (spdk_unlikely(task->used)) {
720 0 : SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n",
721 : task->bvsession->vsession.name, task_idx);
722 0 : blk_task->used_len = 0;
723 0 : blk_task_enqueue(task);
724 0 : return;
725 : }
726 :
727 0 : task->req_idx = req_idx;
728 0 : task->num_descs = num_descs;
729 0 : task->buffer_id = task_idx;
730 :
731 0 : rte_vhost_set_inflight_desc_packed(task->bvsession->vsession.vid, vq->vring_idx,
732 0 : req_idx, (req_idx + num_descs - 1) % vq->vring.size,
733 0 : &task->inflight_head);
734 :
735 0 : blk_task_inc_task_cnt(task);
736 :
737 0 : blk_task_init(task);
738 :
739 0 : rc = blk_iovs_packed_queue_setup(task->bvsession, vq, task->req_idx, blk_task->iovs,
740 0 : &blk_task->iovcnt,
741 0 : &blk_task->payload_size);
742 0 : if (rc) {
743 0 : SPDK_DEBUGLOG(vhost_blk, "Invalid request (req_idx = %"PRIu16").\n", task->req_idx);
744 : /* Only READ and WRITE are supported for now. */
745 0 : vhost_user_blk_request_finish(VIRTIO_BLK_S_UNSUPP, blk_task, NULL);
746 0 : return;
747 : }
748 :
749 0 : if (vhost_user_process_blk_request(task) == 0) {
750 0 : SPDK_DEBUGLOG(vhost_blk, "====== Task %p req_idx %d submitted ======\n", task,
751 : task_idx);
752 0 : } else {
753 0 : SPDK_ERRLOG("====== Task %p req_idx %d failed ======\n", task, task_idx);
754 : }
755 0 : }
756 :
757 : static void
758 0 : process_packed_inflight_blk_task(struct spdk_vhost_virtqueue *vq,
759 : uint16_t req_idx)
760 : {
761 0 : spdk_vhost_inflight_desc *desc_array = vq->vring_inflight.inflight_packed->desc;
762 0 : spdk_vhost_inflight_desc *desc = &desc_array[req_idx];
763 0 : struct spdk_vhost_user_blk_task *task;
764 0 : struct spdk_vhost_blk_task *blk_task;
765 0 : uint16_t task_idx, num_descs;
766 0 : int rc;
767 :
768 0 : task_idx = desc_array[desc->last].id;
769 0 : num_descs = desc->num;
770 : /* In packed ring reconnection, we use the last_used_idx as the
771 : * initial value. So when we process the inflight descs we still
772 : * need to update the available ring index.
773 : */
774 0 : vq->last_avail_idx += num_descs;
775 0 : if (vq->last_avail_idx >= vq->vring.size) {
776 0 : vq->last_avail_idx -= vq->vring.size;
777 0 : vq->packed.avail_phase = !vq->packed.avail_phase;
778 0 : }
779 :
780 0 : task = &((struct spdk_vhost_user_blk_task *)vq->tasks)[task_idx];
781 0 : blk_task = &task->blk_task;
782 0 : if (spdk_unlikely(task->used)) {
783 0 : SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n",
784 : task->bvsession->vsession.name, task_idx);
785 0 : blk_task->used_len = 0;
786 0 : blk_task_enqueue(task);
787 0 : return;
788 : }
789 :
790 0 : task->req_idx = req_idx;
791 0 : task->num_descs = num_descs;
792 0 : task->buffer_id = task_idx;
793 : /* It's for cleaning inflight entries */
794 0 : task->inflight_head = req_idx;
795 :
796 0 : blk_task_inc_task_cnt(task);
797 :
798 0 : blk_task_init(task);
799 :
800 0 : rc = blk_iovs_inflight_queue_setup(task->bvsession, vq, task->req_idx, blk_task->iovs,
801 0 : &blk_task->iovcnt,
802 0 : &blk_task->payload_size);
803 0 : if (rc) {
804 0 : SPDK_DEBUGLOG(vhost_blk, "Invalid request (req_idx = %"PRIu16").\n", task->req_idx);
805 : /* Only READ and WRITE are supported for now. */
806 0 : vhost_user_blk_request_finish(VIRTIO_BLK_S_UNSUPP, blk_task, NULL);
807 0 : return;
808 : }
809 :
810 0 : if (vhost_user_process_blk_request(task) == 0) {
811 0 : SPDK_DEBUGLOG(vhost_blk, "====== Task %p req_idx %d submitted ======\n", task,
812 : task_idx);
813 0 : } else {
814 0 : SPDK_ERRLOG("====== Task %p req_idx %d failed ======\n", task, task_idx);
815 : }
816 0 : }
817 :
818 : static int
819 0 : submit_inflight_desc(struct spdk_vhost_blk_session *bvsession,
820 : struct spdk_vhost_virtqueue *vq)
821 : {
822 0 : struct spdk_vhost_session *vsession;
823 0 : spdk_vhost_resubmit_info *resubmit;
824 0 : spdk_vhost_resubmit_desc *resubmit_list;
825 0 : uint16_t req_idx;
826 0 : int i, resubmit_cnt;
827 :
828 0 : resubmit = vq->vring_inflight.resubmit_inflight;
829 0 : if (spdk_likely(resubmit == NULL || resubmit->resubmit_list == NULL ||
830 : resubmit->resubmit_num == 0)) {
831 0 : return 0;
832 : }
833 :
834 0 : resubmit_list = resubmit->resubmit_list;
835 0 : vsession = &bvsession->vsession;
836 :
837 0 : for (i = resubmit->resubmit_num - 1; i >= 0; --i) {
838 0 : req_idx = resubmit_list[i].index;
839 0 : SPDK_DEBUGLOG(vhost_blk, "====== Start processing resubmit request idx %"PRIu16"======\n",
840 : req_idx);
841 :
842 0 : if (spdk_unlikely(req_idx >= vq->vring.size)) {
843 0 : SPDK_ERRLOG("%s: request idx '%"PRIu16"' exceeds virtqueue size (%"PRIu16").\n",
844 : vsession->name, req_idx, vq->vring.size);
845 0 : vhost_vq_used_ring_enqueue(vsession, vq, req_idx, 0);
846 0 : continue;
847 : }
848 :
849 0 : if (vq->packed.packed_ring) {
850 0 : process_packed_inflight_blk_task(vq, req_idx);
851 0 : } else {
852 0 : process_blk_task(vq, req_idx);
853 : }
854 0 : }
855 0 : resubmit_cnt = resubmit->resubmit_num;
856 0 : resubmit->resubmit_num = 0;
857 0 : return resubmit_cnt;
858 0 : }
859 :
860 : static int
861 0 : process_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq)
862 : {
863 0 : struct spdk_vhost_session *vsession = &bvsession->vsession;
864 0 : uint16_t reqs[SPDK_VHOST_VQ_MAX_SUBMISSIONS];
865 0 : uint16_t reqs_cnt, i;
866 0 : int resubmit_cnt = 0;
867 :
868 0 : resubmit_cnt = submit_inflight_desc(bvsession, vq);
869 :
870 0 : reqs_cnt = vhost_vq_avail_ring_get(vq, reqs, SPDK_COUNTOF(reqs));
871 0 : if (!reqs_cnt) {
872 0 : return resubmit_cnt;
873 : }
874 :
875 0 : for (i = 0; i < reqs_cnt; i++) {
876 0 : SPDK_DEBUGLOG(vhost_blk, "====== Starting processing request idx %"PRIu16"======\n",
877 : reqs[i]);
878 :
879 0 : if (spdk_unlikely(reqs[i] >= vq->vring.size)) {
880 0 : SPDK_ERRLOG("%s: request idx '%"PRIu16"' exceeds virtqueue size (%"PRIu16").\n",
881 : vsession->name, reqs[i], vq->vring.size);
882 0 : vhost_vq_used_ring_enqueue(vsession, vq, reqs[i], 0);
883 0 : continue;
884 : }
885 :
886 0 : rte_vhost_set_inflight_desc_split(vsession->vid, vq->vring_idx, reqs[i]);
887 :
888 0 : process_blk_task(vq, reqs[i]);
889 0 : }
890 :
891 0 : return reqs_cnt;
892 0 : }
893 :
894 : static int
895 0 : process_packed_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq)
896 : {
897 0 : uint16_t i = 0;
898 0 : uint16_t count = 0;
899 0 : int resubmit_cnt = 0;
900 :
901 0 : resubmit_cnt = submit_inflight_desc(bvsession, vq);
902 :
903 0 : while (i++ < SPDK_VHOST_VQ_MAX_SUBMISSIONS &&
904 0 : vhost_vq_packed_ring_is_avail(vq)) {
905 0 : SPDK_DEBUGLOG(vhost_blk, "====== Starting processing request idx %"PRIu16"======\n",
906 : vq->last_avail_idx);
907 0 : count++;
908 0 : process_packed_blk_task(vq, vq->last_avail_idx);
909 : }
910 :
911 0 : return count > 0 ? count : resubmit_cnt;
912 0 : }
913 :
914 : static int
915 0 : _vdev_vq_worker(struct spdk_vhost_virtqueue *vq)
916 : {
917 0 : struct spdk_vhost_session *vsession = vq->vsession;
918 0 : struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession);
919 0 : bool packed_ring;
920 0 : int rc = 0;
921 :
922 0 : packed_ring = vq->packed.packed_ring;
923 0 : if (packed_ring) {
924 0 : rc = process_packed_vq(bvsession, vq);
925 0 : } else {
926 0 : rc = process_vq(bvsession, vq);
927 : }
928 :
929 0 : vhost_session_vq_used_signal(vq);
930 :
931 0 : return rc;
932 :
933 0 : }
934 :
935 : static int
936 0 : vdev_vq_worker(void *arg)
937 : {
938 0 : struct spdk_vhost_virtqueue *vq = arg;
939 :
940 0 : return _vdev_vq_worker(vq);
941 0 : }
942 :
943 : static int
944 0 : vdev_worker(void *arg)
945 : {
946 0 : struct spdk_vhost_blk_session *bvsession = arg;
947 0 : struct spdk_vhost_session *vsession = &bvsession->vsession;
948 0 : uint16_t q_idx;
949 0 : int rc = 0;
950 :
951 0 : for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) {
952 0 : rc += _vdev_vq_worker(&vsession->virtqueue[q_idx]);
953 0 : }
954 :
955 0 : return rc > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
956 0 : }
957 :
958 : static void
959 0 : no_bdev_process_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq)
960 : {
961 0 : struct spdk_vhost_session *vsession = &bvsession->vsession;
962 0 : struct iovec iovs[SPDK_VHOST_IOVS_MAX];
963 0 : uint32_t length;
964 0 : uint16_t iovcnt, req_idx;
965 :
966 0 : if (vhost_vq_avail_ring_get(vq, &req_idx, 1) != 1) {
967 0 : return;
968 : }
969 :
970 0 : iovcnt = SPDK_COUNTOF(iovs);
971 0 : if (blk_iovs_split_queue_setup(bvsession, vq, req_idx, iovs, &iovcnt, &length) == 0) {
972 0 : *(volatile uint8_t *)iovs[iovcnt - 1].iov_base = VIRTIO_BLK_S_IOERR;
973 0 : SPDK_DEBUGLOG(vhost_blk_data, "Aborting request %" PRIu16"\n", req_idx);
974 0 : }
975 :
976 0 : vhost_vq_used_ring_enqueue(vsession, vq, req_idx, 0);
977 0 : }
978 :
979 : static void
980 0 : no_bdev_process_packed_vq(struct spdk_vhost_blk_session *bvsession, struct spdk_vhost_virtqueue *vq)
981 : {
982 0 : struct spdk_vhost_session *vsession = &bvsession->vsession;
983 0 : struct spdk_vhost_user_blk_task *task;
984 0 : struct spdk_vhost_blk_task *blk_task;
985 0 : uint32_t length;
986 0 : uint16_t req_idx = vq->last_avail_idx;
987 0 : uint16_t task_idx, num_descs;
988 :
989 0 : if (!vhost_vq_packed_ring_is_avail(vq)) {
990 0 : return;
991 : }
992 :
993 0 : task_idx = vhost_vring_packed_desc_get_buffer_id(vq, req_idx, &num_descs);
994 0 : task = &((struct spdk_vhost_user_blk_task *)vq->tasks)[task_idx];
995 0 : blk_task = &task->blk_task;
996 0 : if (spdk_unlikely(task->used)) {
997 0 : SPDK_ERRLOG("%s: request with idx '%"PRIu16"' is already pending.\n",
998 : vsession->name, req_idx);
999 0 : vhost_vq_packed_ring_enqueue(vsession, vq, num_descs,
1000 0 : task->buffer_id, blk_task->used_len,
1001 0 : task->inflight_head);
1002 0 : return;
1003 : }
1004 :
1005 0 : task->req_idx = req_idx;
1006 0 : task->num_descs = num_descs;
1007 0 : task->buffer_id = task_idx;
1008 0 : blk_task_init(task);
1009 :
1010 0 : if (blk_iovs_packed_queue_setup(bvsession, vq, task->req_idx, blk_task->iovs, &blk_task->iovcnt,
1011 : &length)) {
1012 0 : *(volatile uint8_t *)(blk_task->iovs[blk_task->iovcnt - 1].iov_base) = VIRTIO_BLK_S_IOERR;
1013 0 : SPDK_DEBUGLOG(vhost_blk_data, "Aborting request %" PRIu16"\n", req_idx);
1014 0 : }
1015 :
1016 0 : task->used = false;
1017 0 : vhost_vq_packed_ring_enqueue(vsession, vq, num_descs,
1018 0 : task->buffer_id, blk_task->used_len,
1019 0 : task->inflight_head);
1020 0 : }
1021 :
1022 : static int
1023 0 : _no_bdev_vdev_vq_worker(struct spdk_vhost_virtqueue *vq)
1024 : {
1025 0 : struct spdk_vhost_session *vsession = vq->vsession;
1026 0 : struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession);
1027 0 : bool packed_ring;
1028 :
1029 0 : packed_ring = vq->packed.packed_ring;
1030 0 : if (packed_ring) {
1031 0 : no_bdev_process_packed_vq(bvsession, vq);
1032 0 : } else {
1033 0 : no_bdev_process_vq(bvsession, vq);
1034 : }
1035 :
1036 0 : vhost_session_vq_used_signal(vq);
1037 :
1038 0 : if (vsession->task_cnt == 0 && bvsession->io_channel) {
1039 0 : vhost_blk_put_io_channel(bvsession->io_channel);
1040 0 : bvsession->io_channel = NULL;
1041 0 : }
1042 :
1043 0 : return SPDK_POLLER_BUSY;
1044 0 : }
1045 :
1046 : static int
1047 0 : no_bdev_vdev_vq_worker(void *arg)
1048 : {
1049 0 : struct spdk_vhost_virtqueue *vq = arg;
1050 :
1051 0 : return _no_bdev_vdev_vq_worker(vq);
1052 0 : }
1053 :
1054 : static int
1055 0 : no_bdev_vdev_worker(void *arg)
1056 : {
1057 0 : struct spdk_vhost_blk_session *bvsession = arg;
1058 0 : struct spdk_vhost_session *vsession = &bvsession->vsession;
1059 0 : uint16_t q_idx;
1060 :
1061 0 : for (q_idx = 0; q_idx < vsession->max_queues; q_idx++) {
1062 0 : _no_bdev_vdev_vq_worker(&vsession->virtqueue[q_idx]);
1063 0 : }
1064 :
1065 0 : return SPDK_POLLER_BUSY;
1066 0 : }
1067 :
1068 : static void
1069 0 : vhost_blk_session_unregister_interrupts(struct spdk_vhost_blk_session *bvsession)
1070 : {
1071 0 : struct spdk_vhost_session *vsession = &bvsession->vsession;
1072 0 : struct spdk_vhost_virtqueue *vq;
1073 0 : int i;
1074 :
1075 0 : SPDK_DEBUGLOG(vhost_blk, "unregister virtqueues interrupt\n");
1076 0 : for (i = 0; i < vsession->max_queues; i++) {
1077 0 : vq = &vsession->virtqueue[i];
1078 0 : if (vq->intr == NULL) {
1079 0 : break;
1080 : }
1081 :
1082 0 : SPDK_DEBUGLOG(vhost_blk, "unregister vq[%d]'s kickfd is %d\n",
1083 : i, vq->vring.kickfd);
1084 0 : spdk_interrupt_unregister(&vq->intr);
1085 0 : }
1086 0 : }
1087 :
1088 : static void
1089 0 : _vhost_blk_vq_register_interrupt(void *arg)
1090 : {
1091 0 : struct spdk_vhost_virtqueue *vq = arg;
1092 0 : struct spdk_vhost_session *vsession = vq->vsession;
1093 0 : struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vsession->vdev);
1094 :
1095 0 : assert(bvdev != NULL);
1096 :
1097 0 : if (bvdev->bdev) {
1098 0 : vq->intr = spdk_interrupt_register(vq->vring.kickfd, vdev_vq_worker, vq, "vdev_vq_worker");
1099 0 : } else {
1100 0 : vq->intr = spdk_interrupt_register(vq->vring.kickfd, no_bdev_vdev_vq_worker, vq,
1101 : "no_bdev_vdev_vq_worker");
1102 : }
1103 :
1104 0 : if (vq->intr == NULL) {
1105 0 : SPDK_ERRLOG("Fail to register req notifier handler.\n");
1106 0 : assert(false);
1107 : }
1108 0 : }
1109 :
1110 : static int
1111 0 : vhost_blk_vq_enable(struct spdk_vhost_session *vsession, struct spdk_vhost_virtqueue *vq)
1112 : {
1113 0 : if (spdk_interrupt_mode_is_enabled()) {
1114 0 : spdk_thread_send_msg(vsession->vdev->thread, _vhost_blk_vq_register_interrupt, vq);
1115 0 : }
1116 :
1117 0 : return 0;
1118 : }
1119 :
1120 : static int
1121 0 : vhost_blk_session_register_no_bdev_interrupts(struct spdk_vhost_blk_session *bvsession)
1122 : {
1123 0 : struct spdk_vhost_session *vsession = &bvsession->vsession;
1124 0 : struct spdk_vhost_virtqueue *vq = NULL;
1125 0 : int i;
1126 :
1127 0 : SPDK_DEBUGLOG(vhost_blk, "Register virtqueues interrupt\n");
1128 0 : for (i = 0; i < vsession->max_queues; i++) {
1129 0 : vq = &vsession->virtqueue[i];
1130 0 : SPDK_DEBUGLOG(vhost_blk, "Register vq[%d]'s kickfd is %d\n",
1131 : i, vq->vring.kickfd);
1132 0 : vq->intr = spdk_interrupt_register(vq->vring.kickfd, no_bdev_vdev_vq_worker, vq,
1133 : "no_bdev_vdev_vq_worker");
1134 0 : if (vq->intr == NULL) {
1135 0 : goto err;
1136 : }
1137 :
1138 0 : }
1139 :
1140 0 : return 0;
1141 :
1142 : err:
1143 0 : vhost_blk_session_unregister_interrupts(bvsession);
1144 0 : return -1;
1145 0 : }
1146 :
1147 : static void
1148 0 : vhost_blk_poller_set_interrupt_mode(struct spdk_poller *poller, void *cb_arg, bool interrupt_mode)
1149 : {
1150 0 : struct spdk_vhost_blk_session *bvsession = cb_arg;
1151 :
1152 0 : vhost_user_session_set_interrupt_mode(&bvsession->vsession, interrupt_mode);
1153 0 : }
1154 :
1155 : static void
1156 0 : bdev_event_cpl_cb(struct spdk_vhost_dev *vdev, void *ctx)
1157 : {
1158 0 : enum spdk_bdev_event_type type = (enum spdk_bdev_event_type)(uintptr_t)ctx;
1159 0 : struct spdk_vhost_blk_dev *bvdev;
1160 :
1161 0 : if (type == SPDK_BDEV_EVENT_REMOVE) {
1162 : /* All sessions have been notified, time to close the bdev */
1163 0 : bvdev = to_blk_dev(vdev);
1164 0 : assert(bvdev != NULL);
1165 0 : spdk_bdev_close(bvdev->bdev_desc);
1166 0 : bvdev->bdev_desc = NULL;
1167 0 : bvdev->bdev = NULL;
1168 0 : }
1169 0 : }
1170 :
1171 : static int
1172 0 : vhost_session_bdev_resize_cb(struct spdk_vhost_dev *vdev,
1173 : struct spdk_vhost_session *vsession,
1174 : void *ctx)
1175 : {
1176 0 : SPDK_NOTICELOG("bdev send slave msg to vid(%d)\n", vsession->vid);
1177 : #if RTE_VERSION >= RTE_VERSION_NUM(23, 03, 0, 0)
1178 0 : rte_vhost_backend_config_change(vsession->vid, false);
1179 : #else
1180 : rte_vhost_slave_config_change(vsession->vid, false);
1181 : #endif
1182 :
1183 0 : return 0;
1184 : }
1185 :
1186 : static void
1187 0 : vhost_user_blk_resize_cb(struct spdk_vhost_dev *vdev, bdev_event_cb_complete cb, void *cb_arg)
1188 : {
1189 0 : vhost_user_dev_foreach_session(vdev, vhost_session_bdev_resize_cb,
1190 0 : cb, cb_arg);
1191 0 : }
1192 :
1193 : static int
1194 0 : vhost_user_session_bdev_remove_cb(struct spdk_vhost_dev *vdev,
1195 : struct spdk_vhost_session *vsession,
1196 : void *ctx)
1197 : {
1198 0 : struct spdk_vhost_blk_session *bvsession;
1199 0 : int rc;
1200 :
1201 0 : bvsession = to_blk_session(vsession);
1202 0 : if (bvsession->requestq_poller) {
1203 0 : spdk_poller_unregister(&bvsession->requestq_poller);
1204 0 : if (spdk_interrupt_mode_is_enabled()) {
1205 0 : vhost_blk_session_unregister_interrupts(bvsession);
1206 0 : rc = vhost_blk_session_register_no_bdev_interrupts(bvsession);
1207 0 : if (rc) {
1208 0 : SPDK_ERRLOG("%s: Interrupt register failed\n", vsession->name);
1209 0 : return rc;
1210 : }
1211 0 : }
1212 :
1213 0 : bvsession->requestq_poller = SPDK_POLLER_REGISTER(no_bdev_vdev_worker, bvsession, 0);
1214 0 : spdk_poller_register_interrupt(bvsession->requestq_poller, vhost_blk_poller_set_interrupt_mode,
1215 0 : bvsession);
1216 0 : }
1217 :
1218 0 : return 0;
1219 0 : }
1220 :
1221 : static void
1222 0 : vhost_user_bdev_remove_cb(struct spdk_vhost_dev *vdev, bdev_event_cb_complete cb, void *cb_arg)
1223 : {
1224 0 : SPDK_WARNLOG("%s: hot-removing bdev - all further requests will fail.\n",
1225 : vdev->name);
1226 :
1227 0 : vhost_user_dev_foreach_session(vdev, vhost_user_session_bdev_remove_cb,
1228 0 : cb, cb_arg);
1229 0 : }
1230 :
1231 : static void
1232 0 : vhost_user_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_vhost_dev *vdev,
1233 : bdev_event_cb_complete cb, void *cb_arg)
1234 : {
1235 0 : switch (type) {
1236 : case SPDK_BDEV_EVENT_REMOVE:
1237 0 : vhost_user_bdev_remove_cb(vdev, cb, cb_arg);
1238 0 : break;
1239 : case SPDK_BDEV_EVENT_RESIZE:
1240 0 : vhost_user_blk_resize_cb(vdev, cb, cb_arg);
1241 0 : break;
1242 : default:
1243 0 : assert(false);
1244 : return;
1245 : }
1246 0 : }
1247 :
1248 : static void
1249 0 : bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev,
1250 : void *event_ctx)
1251 : {
1252 0 : struct spdk_vhost_dev *vdev = (struct spdk_vhost_dev *)event_ctx;
1253 0 : struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
1254 :
1255 0 : assert(bvdev != NULL);
1256 :
1257 0 : SPDK_DEBUGLOG(vhost_blk, "Bdev event: type %d, name %s\n",
1258 : type,
1259 : bdev->name);
1260 :
1261 0 : switch (type) {
1262 : case SPDK_BDEV_EVENT_REMOVE:
1263 : case SPDK_BDEV_EVENT_RESIZE:
1264 0 : bvdev->ops->bdev_event(type, vdev, bdev_event_cpl_cb, (void *)type);
1265 0 : break;
1266 : default:
1267 0 : SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type);
1268 0 : break;
1269 : }
1270 0 : }
1271 :
1272 : static void
1273 0 : free_task_pool(struct spdk_vhost_blk_session *bvsession)
1274 : {
1275 0 : struct spdk_vhost_session *vsession = &bvsession->vsession;
1276 0 : struct spdk_vhost_virtqueue *vq;
1277 0 : uint16_t i;
1278 :
1279 0 : for (i = 0; i < vsession->max_queues; i++) {
1280 0 : vq = &vsession->virtqueue[i];
1281 0 : if (vq->tasks == NULL) {
1282 0 : continue;
1283 : }
1284 :
1285 0 : spdk_free(vq->tasks);
1286 0 : vq->tasks = NULL;
1287 0 : }
1288 0 : }
1289 :
1290 : static int
1291 0 : alloc_vq_task_pool(struct spdk_vhost_session *vsession, uint16_t qid)
1292 : {
1293 0 : struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession);
1294 0 : struct spdk_vhost_virtqueue *vq;
1295 0 : struct spdk_vhost_user_blk_task *task;
1296 0 : uint32_t task_cnt;
1297 0 : uint32_t j;
1298 :
1299 0 : if (qid >= SPDK_VHOST_MAX_VQUEUES) {
1300 0 : return -EINVAL;
1301 : }
1302 :
1303 0 : vq = &vsession->virtqueue[qid];
1304 0 : if (vq->vring.desc == NULL) {
1305 0 : return 0;
1306 : }
1307 :
1308 0 : task_cnt = vq->vring.size;
1309 0 : if (task_cnt > SPDK_VHOST_MAX_VQ_SIZE) {
1310 : /* sanity check */
1311 0 : SPDK_ERRLOG("%s: virtqueue %"PRIu16" is too big. (size = %"PRIu32", max = %"PRIu32")\n",
1312 : vsession->name, qid, task_cnt, SPDK_VHOST_MAX_VQ_SIZE);
1313 0 : return -1;
1314 : }
1315 0 : vq->tasks = spdk_zmalloc(sizeof(struct spdk_vhost_user_blk_task) * task_cnt,
1316 : SPDK_CACHE_LINE_SIZE, NULL,
1317 : SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
1318 0 : if (vq->tasks == NULL) {
1319 0 : SPDK_ERRLOG("%s: failed to allocate %"PRIu32" tasks for virtqueue %"PRIu16"\n",
1320 : vsession->name, task_cnt, qid);
1321 0 : return -1;
1322 : }
1323 :
1324 0 : for (j = 0; j < task_cnt; j++) {
1325 0 : task = &((struct spdk_vhost_user_blk_task *)vq->tasks)[j];
1326 0 : task->bvsession = bvsession;
1327 0 : task->req_idx = j;
1328 0 : task->vq = vq;
1329 0 : }
1330 :
1331 0 : return 0;
1332 0 : }
1333 :
1334 : static int
1335 0 : vhost_blk_start(struct spdk_vhost_dev *vdev,
1336 : struct spdk_vhost_session *vsession, void *unused)
1337 : {
1338 0 : struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession);
1339 0 : struct spdk_vhost_blk_dev *bvdev;
1340 0 : int i;
1341 :
1342 : /* return if start is already in progress */
1343 0 : if (bvsession->requestq_poller) {
1344 0 : SPDK_INFOLOG(vhost, "%s: start in progress\n", vsession->name);
1345 0 : return -EINPROGRESS;
1346 : }
1347 :
1348 : /* validate all I/O queues are in a contiguous index range */
1349 0 : for (i = 0; i < vsession->max_queues; i++) {
1350 : /* vring.desc and vring.desc_packed are in a union struct
1351 : * so q->vring.desc can replace q->vring.desc_packed.
1352 : */
1353 0 : if (vsession->virtqueue[i].vring.desc == NULL) {
1354 0 : SPDK_ERRLOG("%s: queue %"PRIu32" is empty\n", vsession->name, i);
1355 0 : return -1;
1356 : }
1357 0 : }
1358 :
1359 0 : bvdev = to_blk_dev(vdev);
1360 0 : assert(bvdev != NULL);
1361 0 : bvsession->bvdev = bvdev;
1362 :
1363 0 : if (bvdev->bdev) {
1364 0 : bvsession->io_channel = vhost_blk_get_io_channel(vdev);
1365 0 : if (!bvsession->io_channel) {
1366 0 : free_task_pool(bvsession);
1367 0 : SPDK_ERRLOG("%s: I/O channel allocation failed\n", vsession->name);
1368 0 : return -1;
1369 : }
1370 0 : }
1371 :
1372 0 : if (bvdev->bdev) {
1373 0 : bvsession->requestq_poller = SPDK_POLLER_REGISTER(vdev_worker, bvsession, 0);
1374 0 : } else {
1375 0 : bvsession->requestq_poller = SPDK_POLLER_REGISTER(no_bdev_vdev_worker, bvsession, 0);
1376 : }
1377 0 : SPDK_INFOLOG(vhost, "%s: started poller on lcore %d\n",
1378 : vsession->name, spdk_env_get_current_core());
1379 :
1380 0 : spdk_poller_register_interrupt(bvsession->requestq_poller, vhost_blk_poller_set_interrupt_mode,
1381 0 : bvsession);
1382 :
1383 0 : return 0;
1384 0 : }
1385 :
1386 : static int
1387 0 : destroy_session_poller_cb(void *arg)
1388 : {
1389 0 : struct spdk_vhost_blk_session *bvsession = arg;
1390 0 : struct spdk_vhost_session *vsession = &bvsession->vsession;
1391 0 : struct spdk_vhost_user_dev *user_dev = to_user_dev(vsession->vdev);
1392 0 : int i;
1393 :
1394 0 : if (vsession->task_cnt > 0 || (pthread_mutex_trylock(&user_dev->lock) != 0)) {
1395 0 : assert(vsession->stop_retry_count > 0);
1396 0 : vsession->stop_retry_count--;
1397 0 : if (vsession->stop_retry_count == 0) {
1398 0 : SPDK_ERRLOG("%s: Timedout when destroy session (task_cnt %d)\n", vsession->name,
1399 : vsession->task_cnt);
1400 0 : spdk_poller_unregister(&bvsession->stop_poller);
1401 0 : vhost_user_session_stop_done(vsession, -ETIMEDOUT);
1402 0 : }
1403 :
1404 0 : return SPDK_POLLER_BUSY;
1405 : }
1406 :
1407 0 : for (i = 0; i < vsession->max_queues; i++) {
1408 0 : vsession->virtqueue[i].next_event_time = 0;
1409 0 : vhost_vq_used_signal(vsession, &vsession->virtqueue[i]);
1410 0 : }
1411 :
1412 0 : SPDK_INFOLOG(vhost, "%s: stopping poller on lcore %d\n",
1413 : vsession->name, spdk_env_get_current_core());
1414 :
1415 0 : if (bvsession->io_channel) {
1416 0 : vhost_blk_put_io_channel(bvsession->io_channel);
1417 0 : bvsession->io_channel = NULL;
1418 0 : }
1419 :
1420 0 : free_task_pool(bvsession);
1421 0 : spdk_poller_unregister(&bvsession->stop_poller);
1422 0 : vhost_user_session_stop_done(vsession, 0);
1423 :
1424 0 : pthread_mutex_unlock(&user_dev->lock);
1425 0 : return SPDK_POLLER_BUSY;
1426 0 : }
1427 :
1428 : static int
1429 0 : vhost_blk_stop(struct spdk_vhost_dev *vdev,
1430 : struct spdk_vhost_session *vsession, void *unused)
1431 : {
1432 0 : struct spdk_vhost_blk_session *bvsession = to_blk_session(vsession);
1433 :
1434 : /* return if stop is already in progress */
1435 0 : if (bvsession->stop_poller) {
1436 0 : return -EINPROGRESS;
1437 : }
1438 :
1439 0 : spdk_poller_unregister(&bvsession->requestq_poller);
1440 0 : vhost_blk_session_unregister_interrupts(bvsession);
1441 :
1442 0 : bvsession->vsession.stop_retry_count = (SPDK_VHOST_SESSION_STOP_RETRY_TIMEOUT_IN_SEC * 1000 *
1443 : 1000) / SPDK_VHOST_SESSION_STOP_RETRY_PERIOD_IN_US;
1444 0 : bvsession->stop_poller = SPDK_POLLER_REGISTER(destroy_session_poller_cb,
1445 : bvsession, SPDK_VHOST_SESSION_STOP_RETRY_PERIOD_IN_US);
1446 0 : return 0;
1447 0 : }
1448 :
1449 : static void
1450 0 : vhost_blk_dump_info_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w)
1451 : {
1452 0 : struct spdk_vhost_blk_dev *bvdev;
1453 :
1454 0 : bvdev = to_blk_dev(vdev);
1455 0 : assert(bvdev != NULL);
1456 :
1457 0 : spdk_json_write_named_object_begin(w, "block");
1458 :
1459 0 : spdk_json_write_named_bool(w, "readonly", bvdev->readonly);
1460 :
1461 0 : spdk_json_write_name(w, "bdev");
1462 0 : if (bvdev->bdev) {
1463 0 : spdk_json_write_string(w, spdk_bdev_get_name(bvdev->bdev));
1464 0 : } else {
1465 0 : spdk_json_write_null(w);
1466 : }
1467 0 : spdk_json_write_named_string(w, "transport", bvdev->ops->name);
1468 :
1469 0 : spdk_json_write_object_end(w);
1470 0 : }
1471 :
1472 : static void
1473 0 : vhost_blk_write_config_json(struct spdk_vhost_dev *vdev, struct spdk_json_write_ctx *w)
1474 : {
1475 0 : struct spdk_vhost_blk_dev *bvdev;
1476 :
1477 0 : bvdev = to_blk_dev(vdev);
1478 0 : assert(bvdev != NULL);
1479 :
1480 0 : if (!bvdev->bdev) {
1481 0 : return;
1482 : }
1483 :
1484 0 : spdk_json_write_object_begin(w);
1485 0 : spdk_json_write_named_string(w, "method", "vhost_create_blk_controller");
1486 :
1487 0 : spdk_json_write_named_object_begin(w, "params");
1488 0 : spdk_json_write_named_string(w, "ctrlr", vdev->name);
1489 0 : spdk_json_write_named_string(w, "dev_name", spdk_bdev_get_name(bvdev->bdev));
1490 0 : spdk_json_write_named_string(w, "cpumask",
1491 0 : spdk_cpuset_fmt(spdk_thread_get_cpumask(vdev->thread)));
1492 0 : spdk_json_write_named_bool(w, "readonly", bvdev->readonly);
1493 0 : spdk_json_write_named_string(w, "transport", bvdev->ops->name);
1494 0 : spdk_json_write_object_end(w);
1495 :
1496 0 : spdk_json_write_object_end(w);
1497 0 : }
1498 :
1499 : static int vhost_blk_destroy(struct spdk_vhost_dev *dev);
1500 :
1501 : static int
1502 0 : vhost_blk_get_config(struct spdk_vhost_dev *vdev, uint8_t *config,
1503 : uint32_t len)
1504 : {
1505 0 : struct virtio_blk_config blkcfg;
1506 0 : struct spdk_bdev *bdev;
1507 0 : uint32_t blk_size;
1508 0 : uint64_t blkcnt;
1509 :
1510 0 : memset(&blkcfg, 0, sizeof(blkcfg));
1511 0 : bdev = vhost_blk_get_bdev(vdev);
1512 0 : if (bdev == NULL) {
1513 : /* We can't just return -1 here as this GET_CONFIG message might
1514 : * be caused by a QEMU VM reboot. Returning -1 will indicate an
1515 : * error to QEMU, who might then decide to terminate itself.
1516 : * We don't want that. A simple reboot shouldn't break the system.
1517 : *
1518 : * Presenting a block device with block size 0 and block count 0
1519 : * doesn't cause any problems on QEMU side and the virtio-pci
1520 : * device is even still available inside the VM, but there will
1521 : * be no block device created for it - the kernel drivers will
1522 : * silently reject it.
1523 : */
1524 0 : blk_size = 0;
1525 0 : blkcnt = 0;
1526 0 : } else {
1527 0 : blk_size = spdk_bdev_get_block_size(bdev);
1528 0 : blkcnt = spdk_bdev_get_num_blocks(bdev);
1529 0 : if (spdk_bdev_get_buf_align(bdev) > 1) {
1530 0 : blkcfg.size_max = SPDK_BDEV_LARGE_BUF_MAX_SIZE;
1531 0 : blkcfg.seg_max = spdk_min(SPDK_VHOST_IOVS_MAX - 2 - 1, SPDK_BDEV_IO_NUM_CHILD_IOV - 2 - 1);
1532 0 : } else {
1533 0 : blkcfg.size_max = 131072;
1534 : /* -2 for REQ and RESP and -1 for region boundary splitting */
1535 0 : blkcfg.seg_max = SPDK_VHOST_IOVS_MAX - 2 - 1;
1536 : }
1537 : }
1538 :
1539 0 : blkcfg.blk_size = blk_size;
1540 : /* minimum I/O size in blocks */
1541 0 : blkcfg.min_io_size = 1;
1542 : /* expressed in 512 Bytes sectors */
1543 0 : blkcfg.capacity = (blkcnt * blk_size) / 512;
1544 : /* QEMU can overwrite this value when started */
1545 0 : blkcfg.num_queues = SPDK_VHOST_MAX_VQUEUES;
1546 :
1547 0 : if (bdev && spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) {
1548 : /* 16MiB, expressed in 512 Bytes */
1549 0 : blkcfg.max_discard_sectors = 32768;
1550 0 : blkcfg.max_discard_seg = 1;
1551 0 : blkcfg.discard_sector_alignment = blk_size / 512;
1552 0 : }
1553 0 : if (bdev && spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) {
1554 0 : blkcfg.max_write_zeroes_sectors = 32768;
1555 0 : blkcfg.max_write_zeroes_seg = 1;
1556 0 : }
1557 :
1558 0 : memcpy(config, &blkcfg, spdk_min(len, sizeof(blkcfg)));
1559 :
1560 0 : return 0;
1561 0 : }
1562 :
1563 : static int
1564 0 : vhost_blk_set_coalescing(struct spdk_vhost_dev *vdev, uint32_t delay_base_us,
1565 : uint32_t iops_threshold)
1566 : {
1567 0 : struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
1568 :
1569 0 : assert(bvdev != NULL);
1570 :
1571 0 : return bvdev->ops->set_coalescing(vdev, delay_base_us, iops_threshold);
1572 0 : }
1573 :
1574 : static void
1575 0 : vhost_blk_get_coalescing(struct spdk_vhost_dev *vdev, uint32_t *delay_base_us,
1576 : uint32_t *iops_threshold)
1577 : {
1578 0 : struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
1579 :
1580 0 : assert(bvdev != NULL);
1581 :
1582 0 : bvdev->ops->get_coalescing(vdev, delay_base_us, iops_threshold);
1583 0 : }
1584 :
1585 : static const struct spdk_vhost_user_dev_backend vhost_blk_user_device_backend = {
1586 : .session_ctx_size = sizeof(struct spdk_vhost_blk_session) - sizeof(struct spdk_vhost_session),
1587 : .start_session = vhost_blk_start,
1588 : .stop_session = vhost_blk_stop,
1589 : .alloc_vq_tasks = alloc_vq_task_pool,
1590 : .enable_vq = vhost_blk_vq_enable,
1591 : };
1592 :
1593 : static const struct spdk_vhost_dev_backend vhost_blk_device_backend = {
1594 : .type = VHOST_BACKEND_BLK,
1595 : .vhost_get_config = vhost_blk_get_config,
1596 : .dump_info_json = vhost_blk_dump_info_json,
1597 : .write_config_json = vhost_blk_write_config_json,
1598 : .remove_device = vhost_blk_destroy,
1599 : .set_coalescing = vhost_blk_set_coalescing,
1600 : .get_coalescing = vhost_blk_get_coalescing,
1601 : };
1602 :
1603 : int
1604 1 : virtio_blk_construct_ctrlr(struct spdk_vhost_dev *vdev, const char *address,
1605 : struct spdk_cpuset *cpumask, const struct spdk_json_val *params,
1606 : const struct spdk_vhost_user_dev_backend *user_backend)
1607 : {
1608 1 : struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
1609 :
1610 1 : assert(bvdev != NULL);
1611 :
1612 1 : return bvdev->ops->create_ctrlr(vdev, cpumask, address, params, (void *)user_backend);
1613 1 : }
1614 :
1615 : int
1616 1 : spdk_vhost_blk_construct(const char *name, const char *cpumask, const char *dev_name,
1617 : const char *transport, const struct spdk_json_val *params)
1618 : {
1619 1 : struct spdk_vhost_blk_dev *bvdev = NULL;
1620 1 : struct spdk_vhost_dev *vdev;
1621 1 : struct spdk_bdev *bdev;
1622 1 : const char *transport_name = VIRTIO_BLK_DEFAULT_TRANSPORT;
1623 1 : int ret = 0;
1624 :
1625 1 : bvdev = calloc(1, sizeof(*bvdev));
1626 1 : if (bvdev == NULL) {
1627 0 : ret = -ENOMEM;
1628 0 : goto out;
1629 : }
1630 :
1631 1 : if (transport != NULL) {
1632 0 : transport_name = transport;
1633 0 : }
1634 :
1635 1 : bvdev->ops = virtio_blk_get_transport_ops(transport_name);
1636 1 : if (!bvdev->ops) {
1637 0 : ret = -EINVAL;
1638 0 : SPDK_ERRLOG("Transport type '%s' unavailable.\n", transport_name);
1639 0 : goto out;
1640 : }
1641 :
1642 1 : ret = spdk_bdev_open_ext(dev_name, true, bdev_event_cb, bvdev, &bvdev->bdev_desc);
1643 1 : if (ret != 0) {
1644 0 : SPDK_ERRLOG("%s: could not open bdev '%s', error=%d\n",
1645 : name, dev_name, ret);
1646 0 : goto out;
1647 : }
1648 1 : bdev = spdk_bdev_desc_get_bdev(bvdev->bdev_desc);
1649 :
1650 1 : vdev = &bvdev->vdev;
1651 1 : vdev->virtio_features = SPDK_VHOST_BLK_FEATURES_BASE;
1652 1 : vdev->disabled_features = SPDK_VHOST_BLK_DISABLED_FEATURES;
1653 1 : vdev->protocol_features = SPDK_VHOST_BLK_PROTOCOL_FEATURES;
1654 :
1655 1 : if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_UNMAP)) {
1656 1 : vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_DISCARD);
1657 1 : }
1658 1 : if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) {
1659 1 : vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_WRITE_ZEROES);
1660 1 : }
1661 :
1662 1 : if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH)) {
1663 1 : vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_FLUSH);
1664 1 : }
1665 :
1666 1 : bvdev->bdev = bdev;
1667 1 : bvdev->readonly = false;
1668 1 : ret = vhost_dev_register(vdev, name, cpumask, params, &vhost_blk_device_backend,
1669 : &vhost_blk_user_device_backend, false);
1670 1 : if (ret != 0) {
1671 0 : spdk_bdev_close(bvdev->bdev_desc);
1672 0 : goto out;
1673 : }
1674 :
1675 1 : SPDK_INFOLOG(vhost, "%s: using bdev '%s'\n", name, dev_name);
1676 : out:
1677 1 : if (ret != 0 && bvdev) {
1678 0 : free(bvdev);
1679 0 : }
1680 2 : return ret;
1681 1 : }
1682 :
1683 : int
1684 1 : virtio_blk_destroy_ctrlr(struct spdk_vhost_dev *vdev)
1685 : {
1686 1 : struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
1687 :
1688 1 : assert(bvdev != NULL);
1689 :
1690 1 : return bvdev->ops->destroy_ctrlr(vdev);
1691 1 : }
1692 :
1693 : static int
1694 1 : vhost_blk_destroy(struct spdk_vhost_dev *vdev)
1695 : {
1696 1 : struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
1697 1 : int rc;
1698 :
1699 1 : assert(bvdev != NULL);
1700 :
1701 1 : rc = vhost_dev_unregister(&bvdev->vdev);
1702 1 : if (rc != 0) {
1703 0 : return rc;
1704 : }
1705 :
1706 1 : if (bvdev->bdev_desc) {
1707 0 : spdk_bdev_close(bvdev->bdev_desc);
1708 0 : bvdev->bdev_desc = NULL;
1709 0 : }
1710 1 : bvdev->bdev = NULL;
1711 :
1712 1 : free(bvdev);
1713 1 : return 0;
1714 1 : }
1715 :
1716 : struct spdk_io_channel *
1717 0 : vhost_blk_get_io_channel(struct spdk_vhost_dev *vdev)
1718 : {
1719 0 : struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
1720 :
1721 0 : assert(bvdev != NULL);
1722 :
1723 0 : return spdk_bdev_get_io_channel(bvdev->bdev_desc);
1724 0 : }
1725 :
1726 : void
1727 0 : vhost_blk_put_io_channel(struct spdk_io_channel *ch)
1728 : {
1729 0 : spdk_put_io_channel(ch);
1730 0 : }
1731 :
1732 : static struct spdk_virtio_blk_transport *
1733 1 : vhost_user_blk_create(const struct spdk_json_val *params)
1734 : {
1735 1 : int ret;
1736 1 : struct spdk_virtio_blk_transport *vhost_user_blk;
1737 :
1738 1 : vhost_user_blk = calloc(1, sizeof(*vhost_user_blk));
1739 1 : if (!vhost_user_blk) {
1740 0 : return NULL;
1741 : }
1742 :
1743 1 : ret = vhost_user_init();
1744 1 : if (ret != 0) {
1745 0 : free(vhost_user_blk);
1746 0 : return NULL;
1747 : }
1748 :
1749 1 : return vhost_user_blk;
1750 1 : }
1751 :
1752 : static int
1753 1 : vhost_user_blk_destroy(struct spdk_virtio_blk_transport *transport,
1754 : spdk_vhost_fini_cb cb_fn)
1755 : {
1756 1 : vhost_user_fini(cb_fn);
1757 1 : free(transport);
1758 1 : return 0;
1759 : }
1760 :
1761 : struct rpc_vhost_blk {
1762 : bool readonly;
1763 : bool packed_ring;
1764 : };
1765 :
1766 : static const struct spdk_json_object_decoder rpc_construct_vhost_blk[] = {
1767 : {"readonly", offsetof(struct rpc_vhost_blk, readonly), spdk_json_decode_bool, true},
1768 : {"packed_ring", offsetof(struct rpc_vhost_blk, packed_ring), spdk_json_decode_bool, true},
1769 : };
1770 :
1771 : static int
1772 1 : vhost_user_blk_create_ctrlr(struct spdk_vhost_dev *vdev, struct spdk_cpuset *cpumask,
1773 : const char *address, const struct spdk_json_val *params, void *custom_opts)
1774 : {
1775 1 : struct rpc_vhost_blk req = {0};
1776 1 : struct spdk_vhost_blk_dev *bvdev = to_blk_dev(vdev);
1777 :
1778 1 : assert(bvdev != NULL);
1779 :
1780 1 : if (spdk_json_decode_object_relaxed(params, rpc_construct_vhost_blk,
1781 : SPDK_COUNTOF(rpc_construct_vhost_blk),
1782 : &req)) {
1783 0 : SPDK_DEBUGLOG(vhost_blk, "spdk_json_decode_object failed\n");
1784 0 : return -EINVAL;
1785 : }
1786 :
1787 1 : if (req.packed_ring) {
1788 0 : vdev->virtio_features |= (uint64_t)req.packed_ring << VIRTIO_F_RING_PACKED;
1789 0 : }
1790 1 : if (req.readonly) {
1791 0 : vdev->virtio_features |= (1ULL << VIRTIO_BLK_F_RO);
1792 0 : bvdev->readonly = req.readonly;
1793 0 : }
1794 :
1795 1 : return vhost_user_dev_create(vdev, address, cpumask, custom_opts, false);
1796 1 : }
1797 :
1798 : static int
1799 1 : vhost_user_blk_destroy_ctrlr(struct spdk_vhost_dev *vdev)
1800 : {
1801 1 : return vhost_user_dev_unregister(vdev);
1802 : }
1803 :
1804 : static void
1805 0 : vhost_user_blk_dump_opts(struct spdk_virtio_blk_transport *transport, struct spdk_json_write_ctx *w)
1806 : {
1807 0 : assert(w != NULL);
1808 :
1809 0 : spdk_json_write_named_string(w, "name", transport->ops->name);
1810 0 : }
1811 :
1812 : static const struct spdk_virtio_blk_transport_ops vhost_user_blk = {
1813 : .name = "vhost_user_blk",
1814 :
1815 : .dump_opts = vhost_user_blk_dump_opts,
1816 :
1817 : .create = vhost_user_blk_create,
1818 : .destroy = vhost_user_blk_destroy,
1819 :
1820 : .create_ctrlr = vhost_user_blk_create_ctrlr,
1821 : .destroy_ctrlr = vhost_user_blk_destroy_ctrlr,
1822 :
1823 : .bdev_event = vhost_user_bdev_event_cb,
1824 : .set_coalescing = vhost_user_set_coalescing,
1825 : .get_coalescing = vhost_user_get_coalescing,
1826 : };
1827 :
1828 1 : SPDK_VIRTIO_BLK_TRANSPORT_REGISTER(vhost_user_blk, &vhost_user_blk);
1829 :
1830 1 : SPDK_LOG_REGISTER_COMPONENT(vhost_blk)
1831 1 : SPDK_LOG_REGISTER_COMPONENT(vhost_blk_data)
|