Line data Source code
1 : /* SPDX-License-Identifier: BSD-3-Clause
2 : * Copyright (C) 2016 Intel Corporation. All rights reserved.
3 : * Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
4 : * Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5 : */
6 :
7 : #include "spdk/stdinc.h"
8 :
9 : #include "spdk/bdev.h"
10 :
11 : #include "spdk/accel.h"
12 : #include "spdk/config.h"
13 : #include "spdk/env.h"
14 : #include "spdk/thread.h"
15 : #include "spdk/likely.h"
16 : #include "spdk/queue.h"
17 : #include "spdk/nvme_spec.h"
18 : #include "spdk/scsi_spec.h"
19 : #include "spdk/notify.h"
20 : #include "spdk/util.h"
21 : #include "spdk/trace.h"
22 : #include "spdk/dma.h"
23 :
24 : #include "spdk/bdev_module.h"
25 : #include "spdk/log.h"
26 : #include "spdk/string.h"
27 :
28 : #include "bdev_internal.h"
29 : #include "spdk_internal/trace_defs.h"
30 : #include "spdk_internal/assert.h"
31 :
32 : #ifdef SPDK_CONFIG_VTUNE
33 : #include "ittnotify.h"
34 : #include "ittnotify_types.h"
35 : int __itt_init_ittlib(const char *, __itt_group_id);
36 : #endif
37 :
38 : #define SPDK_BDEV_IO_POOL_SIZE (64 * 1024 - 1)
39 : #define SPDK_BDEV_IO_CACHE_SIZE 256
40 : #define SPDK_BDEV_AUTO_EXAMINE true
41 : #define BUF_SMALL_CACHE_SIZE 128
42 : #define BUF_LARGE_CACHE_SIZE 16
43 : #define NOMEM_THRESHOLD_COUNT 8
44 :
45 : #define SPDK_BDEV_QOS_TIMESLICE_IN_USEC 1000
46 : #define SPDK_BDEV_QOS_MIN_IO_PER_TIMESLICE 1
47 : #define SPDK_BDEV_QOS_MIN_BYTE_PER_TIMESLICE 512
48 : #define SPDK_BDEV_QOS_MIN_IOS_PER_SEC 1000
49 : #define SPDK_BDEV_QOS_MIN_BYTES_PER_SEC (1024 * 1024)
50 : #define SPDK_BDEV_QOS_MAX_MBYTES_PER_SEC (UINT64_MAX / (1024 * 1024))
51 : #define SPDK_BDEV_QOS_LIMIT_NOT_DEFINED UINT64_MAX
52 : #define SPDK_BDEV_IO_POLL_INTERVAL_IN_MSEC 1000
53 :
54 : /* The maximum number of children requests for a UNMAP or WRITE ZEROES command
55 : * when splitting into children requests at a time.
56 : */
57 : #define SPDK_BDEV_MAX_CHILDREN_UNMAP_WRITE_ZEROES_REQS (8)
58 : #define BDEV_RESET_CHECK_OUTSTANDING_IO_PERIOD 1000000
59 :
60 : /* The maximum number of children requests for a COPY command
61 : * when splitting into children requests at a time.
62 : */
63 : #define SPDK_BDEV_MAX_CHILDREN_COPY_REQS (8)
64 :
65 : #define LOG_ALREADY_CLAIMED_ERROR(detail, bdev) \
66 : log_already_claimed(SPDK_LOG_ERROR, __LINE__, __func__, detail, bdev)
67 : #ifdef DEBUG
68 : #define LOG_ALREADY_CLAIMED_DEBUG(detail, bdev) \
69 : log_already_claimed(SPDK_LOG_DEBUG, __LINE__, __func__, detail, bdev)
70 : #else
71 : #define LOG_ALREADY_CLAIMED_DEBUG(detail, bdev) do {} while(0)
72 : #endif
73 :
74 : static void log_already_claimed(enum spdk_log_level level, const int line, const char *func,
75 : const char *detail, struct spdk_bdev *bdev);
76 :
77 : static const char *qos_rpc_type[] = {"rw_ios_per_sec",
78 : "rw_mbytes_per_sec", "r_mbytes_per_sec", "w_mbytes_per_sec"
79 : };
80 :
81 : TAILQ_HEAD(spdk_bdev_list, spdk_bdev);
82 :
83 : RB_HEAD(bdev_name_tree, spdk_bdev_name);
84 :
85 : static int
86 575 : bdev_name_cmp(struct spdk_bdev_name *name1, struct spdk_bdev_name *name2)
87 : {
88 575 : return strcmp(name1->name, name2->name);
89 : }
90 :
91 2111 : RB_GENERATE_STATIC(bdev_name_tree, spdk_bdev_name, node, bdev_name_cmp);
92 :
93 : struct spdk_bdev_mgr {
94 : struct spdk_mempool *bdev_io_pool;
95 :
96 : void *zero_buffer;
97 :
98 : TAILQ_HEAD(bdev_module_list, spdk_bdev_module) bdev_modules;
99 :
100 : struct spdk_bdev_list bdevs;
101 : struct bdev_name_tree bdev_names;
102 :
103 : bool init_complete;
104 : bool module_init_complete;
105 :
106 : struct spdk_spinlock spinlock;
107 :
108 : TAILQ_HEAD(, spdk_bdev_open_async_ctx) async_bdev_opens;
109 :
110 : #ifdef SPDK_CONFIG_VTUNE
111 : __itt_domain *domain;
112 : #endif
113 : };
114 :
115 : static struct spdk_bdev_mgr g_bdev_mgr = {
116 : .bdev_modules = TAILQ_HEAD_INITIALIZER(g_bdev_mgr.bdev_modules),
117 : .bdevs = TAILQ_HEAD_INITIALIZER(g_bdev_mgr.bdevs),
118 : .bdev_names = RB_INITIALIZER(g_bdev_mgr.bdev_names),
119 : .init_complete = false,
120 : .module_init_complete = false,
121 : .async_bdev_opens = TAILQ_HEAD_INITIALIZER(g_bdev_mgr.async_bdev_opens),
122 : };
123 :
124 : static void
125 : __attribute__((constructor))
126 3 : _bdev_init(void)
127 : {
128 3 : spdk_spin_init(&g_bdev_mgr.spinlock);
129 3 : }
130 :
131 : typedef void (*lock_range_cb)(struct lba_range *range, void *ctx, int status);
132 :
133 : typedef void (*bdev_copy_bounce_buffer_cpl)(void *ctx, int rc);
134 :
135 : struct lba_range {
136 : struct spdk_bdev *bdev;
137 : uint64_t offset;
138 : uint64_t length;
139 : bool quiesce;
140 : void *locked_ctx;
141 : struct spdk_thread *owner_thread;
142 : struct spdk_bdev_channel *owner_ch;
143 : TAILQ_ENTRY(lba_range) tailq;
144 : TAILQ_ENTRY(lba_range) tailq_module;
145 : };
146 :
147 : static struct spdk_bdev_opts g_bdev_opts = {
148 : .bdev_io_pool_size = SPDK_BDEV_IO_POOL_SIZE,
149 : .bdev_io_cache_size = SPDK_BDEV_IO_CACHE_SIZE,
150 : .bdev_auto_examine = SPDK_BDEV_AUTO_EXAMINE,
151 : .iobuf_small_cache_size = BUF_SMALL_CACHE_SIZE,
152 : .iobuf_large_cache_size = BUF_LARGE_CACHE_SIZE,
153 : };
154 :
155 : static spdk_bdev_init_cb g_init_cb_fn = NULL;
156 : static void *g_init_cb_arg = NULL;
157 :
158 : static spdk_bdev_fini_cb g_fini_cb_fn = NULL;
159 : static void *g_fini_cb_arg = NULL;
160 : static struct spdk_thread *g_fini_thread = NULL;
161 :
162 : struct spdk_bdev_qos_limit {
163 : /** IOs or bytes allowed per second (i.e., 1s). */
164 : uint64_t limit;
165 :
166 : /** Remaining IOs or bytes allowed in current timeslice (e.g., 1ms).
167 : * For remaining bytes, allowed to run negative if an I/O is submitted when
168 : * some bytes are remaining, but the I/O is bigger than that amount. The
169 : * excess will be deducted from the next timeslice.
170 : */
171 : int64_t remaining_this_timeslice;
172 :
173 : /** Minimum allowed IOs or bytes to be issued in one timeslice (e.g., 1ms). */
174 : uint32_t min_per_timeslice;
175 :
176 : /** Maximum allowed IOs or bytes to be issued in one timeslice (e.g., 1ms). */
177 : uint32_t max_per_timeslice;
178 :
179 : /** Function to check whether to queue the IO.
180 : * If The IO is allowed to pass, the quota will be reduced correspondingly.
181 : */
182 : bool (*queue_io)(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io);
183 :
184 : /** Function to rewind the quota once the IO was allowed to be sent by this
185 : * limit but queued due to one of the further limits.
186 : */
187 : void (*rewind_quota)(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io);
188 : };
189 :
190 : struct spdk_bdev_qos {
191 : /** Types of structure of rate limits. */
192 : struct spdk_bdev_qos_limit rate_limits[SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES];
193 :
194 : /** The channel that all I/O are funneled through. */
195 : struct spdk_bdev_channel *ch;
196 :
197 : /** The thread on which the poller is running. */
198 : struct spdk_thread *thread;
199 :
200 : /** Size of a timeslice in tsc ticks. */
201 : uint64_t timeslice_size;
202 :
203 : /** Timestamp of start of last timeslice. */
204 : uint64_t last_timeslice;
205 :
206 : /** Poller that processes queued I/O commands each time slice. */
207 : struct spdk_poller *poller;
208 : };
209 :
210 : struct spdk_bdev_mgmt_channel {
211 : /*
212 : * Each thread keeps a cache of bdev_io - this allows
213 : * bdev threads which are *not* DPDK threads to still
214 : * benefit from a per-thread bdev_io cache. Without
215 : * this, non-DPDK threads fetching from the mempool
216 : * incur a cmpxchg on get and put.
217 : */
218 : bdev_io_stailq_t per_thread_cache;
219 : uint32_t per_thread_cache_count;
220 : uint32_t bdev_io_cache_size;
221 :
222 : struct spdk_iobuf_channel iobuf;
223 :
224 : TAILQ_HEAD(, spdk_bdev_shared_resource) shared_resources;
225 : TAILQ_HEAD(, spdk_bdev_io_wait_entry) io_wait_queue;
226 : };
227 :
228 : /*
229 : * Per-module (or per-io_device) data. Multiple bdevs built on the same io_device
230 : * will queue here their IO that awaits retry. It makes it possible to retry sending
231 : * IO to one bdev after IO from other bdev completes.
232 : */
233 : struct spdk_bdev_shared_resource {
234 : /* The bdev management channel */
235 : struct spdk_bdev_mgmt_channel *mgmt_ch;
236 :
237 : /*
238 : * Count of I/O submitted to bdev module and waiting for completion.
239 : * Incremented before submit_request() is called on an spdk_bdev_io.
240 : */
241 : uint64_t io_outstanding;
242 :
243 : /*
244 : * Queue of IO awaiting retry because of a previous NOMEM status returned
245 : * on this channel.
246 : */
247 : bdev_io_tailq_t nomem_io;
248 :
249 : /*
250 : * Threshold which io_outstanding must drop to before retrying nomem_io.
251 : */
252 : uint64_t nomem_threshold;
253 :
254 : /* I/O channel allocated by a bdev module */
255 : struct spdk_io_channel *shared_ch;
256 :
257 : struct spdk_poller *nomem_poller;
258 :
259 : /* Refcount of bdev channels using this resource */
260 : uint32_t ref;
261 :
262 : TAILQ_ENTRY(spdk_bdev_shared_resource) link;
263 : };
264 :
265 : #define BDEV_CH_RESET_IN_PROGRESS (1 << 0)
266 : #define BDEV_CH_QOS_ENABLED (1 << 1)
267 :
268 : struct spdk_bdev_channel {
269 : struct spdk_bdev *bdev;
270 :
271 : /* The channel for the underlying device */
272 : struct spdk_io_channel *channel;
273 :
274 : /* Accel channel */
275 : struct spdk_io_channel *accel_channel;
276 :
277 : /* Per io_device per thread data */
278 : struct spdk_bdev_shared_resource *shared_resource;
279 :
280 : struct spdk_bdev_io_stat *stat;
281 :
282 : /*
283 : * Count of I/O submitted to the underlying dev module through this channel
284 : * and waiting for completion.
285 : */
286 : uint64_t io_outstanding;
287 :
288 : /*
289 : * List of all submitted I/Os including I/O that are generated via splitting.
290 : */
291 : bdev_io_tailq_t io_submitted;
292 :
293 : /*
294 : * List of spdk_bdev_io that are currently queued because they write to a locked
295 : * LBA range.
296 : */
297 : bdev_io_tailq_t io_locked;
298 :
299 : /* List of I/Os with accel sequence being currently executed */
300 : bdev_io_tailq_t io_accel_exec;
301 :
302 : /* List of I/Os doing memory domain pull/push */
303 : bdev_io_tailq_t io_memory_domain;
304 :
305 : uint32_t flags;
306 :
307 : /* Counts number of bdev_io in the io_submitted TAILQ */
308 : uint16_t queue_depth;
309 :
310 : uint16_t trace_id;
311 :
312 : struct spdk_histogram_data *histogram;
313 :
314 : #ifdef SPDK_CONFIG_VTUNE
315 : uint64_t start_tsc;
316 : uint64_t interval_tsc;
317 : __itt_string_handle *handle;
318 : struct spdk_bdev_io_stat *prev_stat;
319 : #endif
320 :
321 : lba_range_tailq_t locked_ranges;
322 :
323 : /** List of I/Os queued by QoS. */
324 : bdev_io_tailq_t qos_queued_io;
325 : };
326 :
327 : struct media_event_entry {
328 : struct spdk_bdev_media_event event;
329 : TAILQ_ENTRY(media_event_entry) tailq;
330 : };
331 :
332 : #define MEDIA_EVENT_POOL_SIZE 64
333 :
334 : struct spdk_bdev_desc {
335 : struct spdk_bdev *bdev;
336 : bool write;
337 : bool memory_domains_supported;
338 : bool accel_sequence_supported[SPDK_BDEV_NUM_IO_TYPES];
339 : struct spdk_bdev_open_opts opts;
340 : struct spdk_thread *thread;
341 : struct {
342 : spdk_bdev_event_cb_t event_fn;
343 : void *ctx;
344 : } callback;
345 : bool closed;
346 : struct spdk_spinlock spinlock;
347 : uint32_t refs;
348 : TAILQ_HEAD(, media_event_entry) pending_media_events;
349 : TAILQ_HEAD(, media_event_entry) free_media_events;
350 : struct media_event_entry *media_events_buffer;
351 : TAILQ_ENTRY(spdk_bdev_desc) link;
352 :
353 : uint64_t timeout_in_sec;
354 : spdk_bdev_io_timeout_cb cb_fn;
355 : void *cb_arg;
356 : struct spdk_poller *io_timeout_poller;
357 : struct spdk_bdev_module_claim *claim;
358 : };
359 :
360 : struct spdk_bdev_iostat_ctx {
361 : struct spdk_bdev_io_stat *stat;
362 : enum spdk_bdev_reset_stat_mode reset_mode;
363 : spdk_bdev_get_device_stat_cb cb;
364 : void *cb_arg;
365 : };
366 :
367 : struct set_qos_limit_ctx {
368 : void (*cb_fn)(void *cb_arg, int status);
369 : void *cb_arg;
370 : struct spdk_bdev *bdev;
371 : };
372 :
373 : struct spdk_bdev_channel_iter {
374 : spdk_bdev_for_each_channel_msg fn;
375 : spdk_bdev_for_each_channel_done cpl;
376 : struct spdk_io_channel_iter *i;
377 : void *ctx;
378 : };
379 :
380 : struct spdk_bdev_io_error_stat {
381 : uint32_t error_status[-SPDK_MIN_BDEV_IO_STATUS];
382 : };
383 :
384 : enum bdev_io_retry_state {
385 : BDEV_IO_RETRY_STATE_INVALID,
386 : BDEV_IO_RETRY_STATE_PULL,
387 : BDEV_IO_RETRY_STATE_PULL_MD,
388 : BDEV_IO_RETRY_STATE_SUBMIT,
389 : BDEV_IO_RETRY_STATE_PUSH,
390 : BDEV_IO_RETRY_STATE_PUSH_MD,
391 : };
392 :
393 : #define __bdev_to_io_dev(bdev) (((char *)bdev) + 1)
394 : #define __bdev_from_io_dev(io_dev) ((struct spdk_bdev *)(((char *)io_dev) - 1))
395 : #define __io_ch_to_bdev_ch(io_ch) ((struct spdk_bdev_channel *)spdk_io_channel_get_ctx(io_ch))
396 : #define __io_ch_to_bdev_mgmt_ch(io_ch) ((struct spdk_bdev_mgmt_channel *)spdk_io_channel_get_ctx(io_ch))
397 :
398 : static inline void bdev_io_complete(void *ctx);
399 : static inline void bdev_io_complete_unsubmitted(struct spdk_bdev_io *bdev_io);
400 : static void bdev_io_push_bounce_md_buf(struct spdk_bdev_io *bdev_io);
401 : static void bdev_io_push_bounce_data(struct spdk_bdev_io *bdev_io);
402 :
403 : static void bdev_write_zero_buffer_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
404 : static int bdev_write_zero_buffer(struct spdk_bdev_io *bdev_io);
405 :
406 : static void bdev_enable_qos_msg(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
407 : struct spdk_io_channel *ch, void *_ctx);
408 : static void bdev_enable_qos_done(struct spdk_bdev *bdev, void *_ctx, int status);
409 :
410 : static int bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
411 : struct iovec *iov, int iovcnt, void *md_buf, uint64_t offset_blocks,
412 : uint64_t num_blocks,
413 : struct spdk_memory_domain *domain, void *domain_ctx,
414 : struct spdk_accel_sequence *seq, uint32_t dif_check_flags,
415 : spdk_bdev_io_completion_cb cb, void *cb_arg);
416 : static int bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
417 : struct iovec *iov, int iovcnt, void *md_buf,
418 : uint64_t offset_blocks, uint64_t num_blocks,
419 : struct spdk_memory_domain *domain, void *domain_ctx,
420 : struct spdk_accel_sequence *seq, uint32_t dif_check_flags,
421 : uint32_t nvme_cdw12_raw, uint32_t nvme_cdw13_raw,
422 : spdk_bdev_io_completion_cb cb, void *cb_arg);
423 :
424 : static int bdev_lock_lba_range(struct spdk_bdev_desc *desc, struct spdk_io_channel *_ch,
425 : uint64_t offset, uint64_t length,
426 : lock_range_cb cb_fn, void *cb_arg);
427 :
428 : static int bdev_unlock_lba_range(struct spdk_bdev_desc *desc, struct spdk_io_channel *_ch,
429 : uint64_t offset, uint64_t length,
430 : lock_range_cb cb_fn, void *cb_arg);
431 :
432 : static bool bdev_abort_queued_io(bdev_io_tailq_t *queue, struct spdk_bdev_io *bio_to_abort);
433 : static bool bdev_abort_buf_io(struct spdk_bdev_mgmt_channel *ch, struct spdk_bdev_io *bio_to_abort);
434 :
435 : static bool claim_type_is_v2(enum spdk_bdev_claim_type type);
436 : static void bdev_desc_release_claims(struct spdk_bdev_desc *desc);
437 : static void claim_reset(struct spdk_bdev *bdev);
438 :
439 : static void bdev_ch_retry_io(struct spdk_bdev_channel *bdev_ch);
440 :
441 : static bool bdev_io_should_split(struct spdk_bdev_io *bdev_io);
442 :
443 : #define bdev_get_ext_io_opt(opts, field, defval) \
444 : ((opts) != NULL ? SPDK_GET_FIELD(opts, field, defval) : (defval))
445 :
446 : static inline void
447 671 : bdev_ch_add_to_io_submitted(struct spdk_bdev_io *bdev_io)
448 : {
449 671 : TAILQ_INSERT_TAIL(&bdev_io->internal.ch->io_submitted, bdev_io, internal.ch_link);
450 671 : bdev_io->internal.ch->queue_depth++;
451 671 : }
452 :
453 : static inline void
454 671 : bdev_ch_remove_from_io_submitted(struct spdk_bdev_io *bdev_io)
455 : {
456 671 : TAILQ_REMOVE(&bdev_io->internal.ch->io_submitted, bdev_io, internal.ch_link);
457 671 : bdev_io->internal.ch->queue_depth--;
458 671 : }
459 :
460 : void
461 16 : spdk_bdev_get_opts(struct spdk_bdev_opts *opts, size_t opts_size)
462 : {
463 16 : if (!opts) {
464 0 : SPDK_ERRLOG("opts should not be NULL\n");
465 0 : return;
466 : }
467 :
468 16 : if (!opts_size) {
469 0 : SPDK_ERRLOG("opts_size should not be zero value\n");
470 0 : return;
471 : }
472 :
473 16 : opts->opts_size = opts_size;
474 :
475 : #define SET_FIELD(field) \
476 : if (offsetof(struct spdk_bdev_opts, field) + sizeof(opts->field) <= opts_size) { \
477 : opts->field = g_bdev_opts.field; \
478 : } \
479 :
480 16 : SET_FIELD(bdev_io_pool_size);
481 16 : SET_FIELD(bdev_io_cache_size);
482 16 : SET_FIELD(bdev_auto_examine);
483 16 : SET_FIELD(iobuf_small_cache_size);
484 16 : SET_FIELD(iobuf_large_cache_size);
485 :
486 : /* Do not remove this statement, you should always update this statement when you adding a new field,
487 : * and do not forget to add the SET_FIELD statement for your added field. */
488 : SPDK_STATIC_ASSERT(sizeof(struct spdk_bdev_opts) == 32, "Incorrect size");
489 :
490 : #undef SET_FIELD
491 16 : }
492 :
493 : int
494 17 : spdk_bdev_set_opts(struct spdk_bdev_opts *opts)
495 : {
496 : uint32_t min_pool_size;
497 :
498 17 : if (!opts) {
499 0 : SPDK_ERRLOG("opts cannot be NULL\n");
500 0 : return -1;
501 : }
502 :
503 17 : if (!opts->opts_size) {
504 1 : SPDK_ERRLOG("opts_size inside opts cannot be zero value\n");
505 1 : return -1;
506 : }
507 :
508 : /*
509 : * Add 1 to the thread count to account for the extra mgmt_ch that gets created during subsystem
510 : * initialization. A second mgmt_ch will be created on the same thread when the application starts
511 : * but before the deferred put_io_channel event is executed for the first mgmt_ch.
512 : */
513 16 : min_pool_size = opts->bdev_io_cache_size * (spdk_thread_get_count() + 1);
514 16 : if (opts->bdev_io_pool_size < min_pool_size) {
515 0 : SPDK_ERRLOG("bdev_io_pool_size %" PRIu32 " is not compatible with bdev_io_cache_size %" PRIu32
516 : " and %" PRIu32 " threads\n", opts->bdev_io_pool_size, opts->bdev_io_cache_size,
517 : spdk_thread_get_count());
518 0 : SPDK_ERRLOG("bdev_io_pool_size must be at least %" PRIu32 "\n", min_pool_size);
519 0 : return -1;
520 : }
521 :
522 : #define SET_FIELD(field) \
523 : if (offsetof(struct spdk_bdev_opts, field) + sizeof(opts->field) <= opts->opts_size) { \
524 : g_bdev_opts.field = opts->field; \
525 : } \
526 :
527 16 : SET_FIELD(bdev_io_pool_size);
528 16 : SET_FIELD(bdev_io_cache_size);
529 16 : SET_FIELD(bdev_auto_examine);
530 16 : SET_FIELD(iobuf_small_cache_size);
531 16 : SET_FIELD(iobuf_large_cache_size);
532 :
533 16 : g_bdev_opts.opts_size = opts->opts_size;
534 :
535 : #undef SET_FIELD
536 :
537 16 : return 0;
538 17 : }
539 :
540 : static struct spdk_bdev *
541 155 : bdev_get_by_name(const char *bdev_name)
542 : {
543 : struct spdk_bdev_name find;
544 : struct spdk_bdev_name *res;
545 :
546 155 : find.name = (char *)bdev_name;
547 155 : res = RB_FIND(bdev_name_tree, &g_bdev_mgr.bdev_names, &find);
548 155 : if (res != NULL) {
549 148 : return res->bdev;
550 : }
551 :
552 7 : return NULL;
553 155 : }
554 :
555 : struct spdk_bdev *
556 19 : spdk_bdev_get_by_name(const char *bdev_name)
557 : {
558 : struct spdk_bdev *bdev;
559 :
560 19 : spdk_spin_lock(&g_bdev_mgr.spinlock);
561 19 : bdev = bdev_get_by_name(bdev_name);
562 19 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
563 :
564 19 : return bdev;
565 : }
566 :
567 : struct bdev_io_status_string {
568 : enum spdk_bdev_io_status status;
569 : const char *str;
570 : };
571 :
572 : static const struct bdev_io_status_string bdev_io_status_strings[] = {
573 : { SPDK_BDEV_IO_STATUS_AIO_ERROR, "aio_error" },
574 : { SPDK_BDEV_IO_STATUS_ABORTED, "aborted" },
575 : { SPDK_BDEV_IO_STATUS_FIRST_FUSED_FAILED, "first_fused_failed" },
576 : { SPDK_BDEV_IO_STATUS_MISCOMPARE, "miscompare" },
577 : { SPDK_BDEV_IO_STATUS_NOMEM, "nomem" },
578 : { SPDK_BDEV_IO_STATUS_SCSI_ERROR, "scsi_error" },
579 : { SPDK_BDEV_IO_STATUS_NVME_ERROR, "nvme_error" },
580 : { SPDK_BDEV_IO_STATUS_FAILED, "failed" },
581 : { SPDK_BDEV_IO_STATUS_PENDING, "pending" },
582 : { SPDK_BDEV_IO_STATUS_SUCCESS, "success" },
583 : };
584 :
585 : static const char *
586 0 : bdev_io_status_get_string(enum spdk_bdev_io_status status)
587 : {
588 : uint32_t i;
589 :
590 0 : for (i = 0; i < SPDK_COUNTOF(bdev_io_status_strings); i++) {
591 0 : if (bdev_io_status_strings[i].status == status) {
592 0 : return bdev_io_status_strings[i].str;
593 : }
594 0 : }
595 :
596 0 : return "reserved";
597 0 : }
598 :
599 : struct spdk_bdev_wait_for_examine_ctx {
600 : struct spdk_poller *poller;
601 : spdk_bdev_wait_for_examine_cb cb_fn;
602 : void *cb_arg;
603 : };
604 :
605 : static bool bdev_module_all_actions_completed(void);
606 :
607 : static int
608 202 : bdev_wait_for_examine_cb(void *arg)
609 : {
610 202 : struct spdk_bdev_wait_for_examine_ctx *ctx = arg;
611 :
612 202 : if (!bdev_module_all_actions_completed()) {
613 0 : return SPDK_POLLER_IDLE;
614 : }
615 :
616 202 : spdk_poller_unregister(&ctx->poller);
617 202 : ctx->cb_fn(ctx->cb_arg);
618 202 : free(ctx);
619 :
620 202 : return SPDK_POLLER_BUSY;
621 202 : }
622 :
623 : int
624 202 : spdk_bdev_wait_for_examine(spdk_bdev_wait_for_examine_cb cb_fn, void *cb_arg)
625 : {
626 : struct spdk_bdev_wait_for_examine_ctx *ctx;
627 :
628 202 : ctx = calloc(1, sizeof(*ctx));
629 202 : if (ctx == NULL) {
630 0 : return -ENOMEM;
631 : }
632 202 : ctx->cb_fn = cb_fn;
633 202 : ctx->cb_arg = cb_arg;
634 202 : ctx->poller = SPDK_POLLER_REGISTER(bdev_wait_for_examine_cb, ctx, 0);
635 :
636 202 : return 0;
637 202 : }
638 :
639 : struct spdk_bdev_examine_item {
640 : char *name;
641 : TAILQ_ENTRY(spdk_bdev_examine_item) link;
642 : };
643 :
644 : TAILQ_HEAD(spdk_bdev_examine_allowlist, spdk_bdev_examine_item);
645 :
646 : struct spdk_bdev_examine_allowlist g_bdev_examine_allowlist = TAILQ_HEAD_INITIALIZER(
647 : g_bdev_examine_allowlist);
648 :
649 : static inline bool
650 22 : bdev_examine_allowlist_check(const char *name)
651 : {
652 : struct spdk_bdev_examine_item *item;
653 22 : TAILQ_FOREACH(item, &g_bdev_examine_allowlist, link) {
654 3 : if (strcmp(name, item->name) == 0) {
655 3 : return true;
656 : }
657 0 : }
658 19 : return false;
659 22 : }
660 :
661 : static inline void
662 256 : bdev_examine_allowlist_remove(const char *name)
663 : {
664 : struct spdk_bdev_examine_item *item;
665 256 : TAILQ_FOREACH(item, &g_bdev_examine_allowlist, link) {
666 3 : if (strcmp(name, item->name) == 0) {
667 3 : TAILQ_REMOVE(&g_bdev_examine_allowlist, item, link);
668 3 : free(item->name);
669 3 : free(item);
670 3 : break;
671 : }
672 0 : }
673 256 : }
674 :
675 : static inline void
676 68 : bdev_examine_allowlist_free(void)
677 : {
678 : struct spdk_bdev_examine_item *item;
679 68 : while (!TAILQ_EMPTY(&g_bdev_examine_allowlist)) {
680 0 : item = TAILQ_FIRST(&g_bdev_examine_allowlist);
681 0 : TAILQ_REMOVE(&g_bdev_examine_allowlist, item, link);
682 0 : free(item->name);
683 0 : free(item);
684 : }
685 68 : }
686 :
687 : static inline bool
688 11 : bdev_in_examine_allowlist(struct spdk_bdev *bdev)
689 : {
690 : struct spdk_bdev_alias *tmp;
691 11 : if (bdev_examine_allowlist_check(bdev->name)) {
692 3 : return true;
693 : }
694 16 : TAILQ_FOREACH(tmp, &bdev->aliases, tailq) {
695 8 : if (bdev_examine_allowlist_check(tmp->alias.name)) {
696 0 : return true;
697 : }
698 8 : }
699 8 : return false;
700 11 : }
701 :
702 : static inline bool
703 132 : bdev_ok_to_examine(struct spdk_bdev *bdev)
704 : {
705 : /* Some bdevs may not support the READ command.
706 : * Do not try to examine them.
707 : */
708 132 : if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_READ)) {
709 0 : return false;
710 : }
711 :
712 132 : if (g_bdev_opts.bdev_auto_examine) {
713 121 : return true;
714 : } else {
715 11 : return bdev_in_examine_allowlist(bdev);
716 : }
717 132 : }
718 :
719 : static void
720 132 : bdev_examine(struct spdk_bdev *bdev)
721 : {
722 : struct spdk_bdev_module *module;
723 : struct spdk_bdev_module_claim *claim, *tmpclaim;
724 : uint32_t action;
725 :
726 132 : if (!bdev_ok_to_examine(bdev)) {
727 8 : return;
728 : }
729 :
730 506 : TAILQ_FOREACH(module, &g_bdev_mgr.bdev_modules, internal.tailq) {
731 382 : if (module->examine_config) {
732 258 : spdk_spin_lock(&module->internal.spinlock);
733 258 : action = module->internal.action_in_progress;
734 258 : module->internal.action_in_progress++;
735 258 : spdk_spin_unlock(&module->internal.spinlock);
736 258 : module->examine_config(bdev);
737 258 : if (action != module->internal.action_in_progress) {
738 0 : SPDK_ERRLOG("examine_config for module %s did not call "
739 : "spdk_bdev_module_examine_done()\n", module->name);
740 0 : }
741 258 : }
742 382 : }
743 :
744 124 : spdk_spin_lock(&bdev->internal.spinlock);
745 :
746 124 : switch (bdev->internal.claim_type) {
747 : case SPDK_BDEV_CLAIM_NONE:
748 : /* Examine by all bdev modules */
749 466 : TAILQ_FOREACH(module, &g_bdev_mgr.bdev_modules, internal.tailq) {
750 350 : if (module->examine_disk) {
751 225 : spdk_spin_lock(&module->internal.spinlock);
752 225 : module->internal.action_in_progress++;
753 225 : spdk_spin_unlock(&module->internal.spinlock);
754 225 : spdk_spin_unlock(&bdev->internal.spinlock);
755 225 : module->examine_disk(bdev);
756 225 : spdk_spin_lock(&bdev->internal.spinlock);
757 225 : }
758 350 : }
759 116 : break;
760 : case SPDK_BDEV_CLAIM_EXCL_WRITE:
761 : /* Examine by the one bdev module with a v1 claim */
762 1 : module = bdev->internal.claim.v1.module;
763 1 : if (module->examine_disk) {
764 1 : spdk_spin_lock(&module->internal.spinlock);
765 1 : module->internal.action_in_progress++;
766 1 : spdk_spin_unlock(&module->internal.spinlock);
767 1 : spdk_spin_unlock(&bdev->internal.spinlock);
768 1 : module->examine_disk(bdev);
769 1 : return;
770 : }
771 0 : break;
772 : default:
773 : /* Examine by all bdev modules with a v2 claim */
774 7 : assert(claim_type_is_v2(bdev->internal.claim_type));
775 : /*
776 : * Removal of tailq nodes while iterating can cause the iteration to jump out of the
777 : * list, perhaps accessing freed memory. Without protection, this could happen
778 : * while the lock is dropped during the examine callback.
779 : */
780 7 : bdev->internal.examine_in_progress++;
781 :
782 16 : TAILQ_FOREACH(claim, &bdev->internal.claim.v2.claims, link) {
783 9 : module = claim->module;
784 :
785 9 : if (module == NULL) {
786 : /* This is a vestigial claim, held by examine_count */
787 0 : continue;
788 : }
789 :
790 9 : if (module->examine_disk == NULL) {
791 0 : continue;
792 : }
793 :
794 9 : spdk_spin_lock(&module->internal.spinlock);
795 9 : module->internal.action_in_progress++;
796 9 : spdk_spin_unlock(&module->internal.spinlock);
797 :
798 : /* Call examine_disk without holding internal.spinlock. */
799 9 : spdk_spin_unlock(&bdev->internal.spinlock);
800 9 : module->examine_disk(bdev);
801 9 : spdk_spin_lock(&bdev->internal.spinlock);
802 9 : }
803 :
804 7 : assert(bdev->internal.examine_in_progress > 0);
805 7 : bdev->internal.examine_in_progress--;
806 7 : if (bdev->internal.examine_in_progress == 0) {
807 : /* Remove any claims that were released during examine_disk */
808 16 : TAILQ_FOREACH_SAFE(claim, &bdev->internal.claim.v2.claims, link, tmpclaim) {
809 9 : if (claim->desc != NULL) {
810 9 : continue;
811 : }
812 :
813 0 : TAILQ_REMOVE(&bdev->internal.claim.v2.claims, claim, link);
814 0 : free(claim);
815 0 : }
816 7 : if (TAILQ_EMPTY(&bdev->internal.claim.v2.claims)) {
817 0 : claim_reset(bdev);
818 0 : }
819 7 : }
820 7 : }
821 :
822 123 : spdk_spin_unlock(&bdev->internal.spinlock);
823 132 : }
824 :
825 : int
826 4 : spdk_bdev_examine(const char *name)
827 : {
828 : struct spdk_bdev *bdev;
829 : struct spdk_bdev_examine_item *item;
830 4 : struct spdk_thread *thread = spdk_get_thread();
831 :
832 4 : if (spdk_unlikely(!spdk_thread_is_app_thread(thread))) {
833 1 : SPDK_ERRLOG("Cannot examine bdev %s on thread %p (%s)\n", name, thread,
834 : thread ? spdk_thread_get_name(thread) : "null");
835 1 : return -EINVAL;
836 : }
837 :
838 3 : if (g_bdev_opts.bdev_auto_examine) {
839 0 : SPDK_ERRLOG("Manual examine is not allowed if auto examine is enabled\n");
840 0 : return -EINVAL;
841 : }
842 :
843 3 : if (bdev_examine_allowlist_check(name)) {
844 0 : SPDK_ERRLOG("Duplicate bdev name for manual examine: %s\n", name);
845 0 : return -EEXIST;
846 : }
847 :
848 3 : item = calloc(1, sizeof(*item));
849 3 : if (!item) {
850 0 : return -ENOMEM;
851 : }
852 3 : item->name = strdup(name);
853 3 : if (!item->name) {
854 0 : free(item);
855 0 : return -ENOMEM;
856 : }
857 3 : TAILQ_INSERT_TAIL(&g_bdev_examine_allowlist, item, link);
858 :
859 3 : bdev = spdk_bdev_get_by_name(name);
860 3 : if (bdev) {
861 3 : bdev_examine(bdev);
862 3 : }
863 3 : return 0;
864 4 : }
865 :
866 : static inline void
867 0 : bdev_examine_allowlist_config_json(struct spdk_json_write_ctx *w)
868 : {
869 : struct spdk_bdev_examine_item *item;
870 0 : TAILQ_FOREACH(item, &g_bdev_examine_allowlist, link) {
871 0 : spdk_json_write_object_begin(w);
872 0 : spdk_json_write_named_string(w, "method", "bdev_examine");
873 0 : spdk_json_write_named_object_begin(w, "params");
874 0 : spdk_json_write_named_string(w, "name", item->name);
875 0 : spdk_json_write_object_end(w);
876 0 : spdk_json_write_object_end(w);
877 0 : }
878 0 : }
879 :
880 : struct spdk_bdev *
881 1 : spdk_bdev_first(void)
882 : {
883 : struct spdk_bdev *bdev;
884 :
885 1 : bdev = TAILQ_FIRST(&g_bdev_mgr.bdevs);
886 1 : if (bdev) {
887 1 : SPDK_DEBUGLOG(bdev, "Starting bdev iteration at %s\n", bdev->name);
888 1 : }
889 :
890 1 : return bdev;
891 : }
892 :
893 : struct spdk_bdev *
894 8 : spdk_bdev_next(struct spdk_bdev *prev)
895 : {
896 : struct spdk_bdev *bdev;
897 :
898 8 : bdev = TAILQ_NEXT(prev, internal.link);
899 8 : if (bdev) {
900 7 : SPDK_DEBUGLOG(bdev, "Continuing bdev iteration at %s\n", bdev->name);
901 7 : }
902 :
903 8 : return bdev;
904 : }
905 :
906 : static struct spdk_bdev *
907 6 : _bdev_next_leaf(struct spdk_bdev *bdev)
908 : {
909 9 : while (bdev != NULL) {
910 8 : if (bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE) {
911 5 : return bdev;
912 : } else {
913 3 : bdev = TAILQ_NEXT(bdev, internal.link);
914 : }
915 : }
916 :
917 1 : return bdev;
918 6 : }
919 :
920 : struct spdk_bdev *
921 1 : spdk_bdev_first_leaf(void)
922 : {
923 : struct spdk_bdev *bdev;
924 :
925 1 : bdev = _bdev_next_leaf(TAILQ_FIRST(&g_bdev_mgr.bdevs));
926 :
927 1 : if (bdev) {
928 1 : SPDK_DEBUGLOG(bdev, "Starting bdev iteration at %s\n", bdev->name);
929 1 : }
930 :
931 1 : return bdev;
932 : }
933 :
934 : struct spdk_bdev *
935 5 : spdk_bdev_next_leaf(struct spdk_bdev *prev)
936 : {
937 : struct spdk_bdev *bdev;
938 :
939 5 : bdev = _bdev_next_leaf(TAILQ_NEXT(prev, internal.link));
940 :
941 5 : if (bdev) {
942 4 : SPDK_DEBUGLOG(bdev, "Continuing bdev iteration at %s\n", bdev->name);
943 4 : }
944 :
945 5 : return bdev;
946 : }
947 :
948 : static inline bool
949 816 : bdev_io_use_memory_domain(struct spdk_bdev_io *bdev_io)
950 : {
951 816 : return bdev_io->internal.f.has_memory_domain;
952 : }
953 :
954 : static inline bool
955 1555 : bdev_io_use_accel_sequence(struct spdk_bdev_io *bdev_io)
956 : {
957 1555 : return bdev_io->internal.f.has_accel_sequence;
958 : }
959 :
960 : static inline uint32_t
961 373 : bdev_desc_get_block_size(struct spdk_bdev_desc *desc)
962 : {
963 373 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
964 :
965 373 : if (spdk_unlikely(desc->opts.hide_metadata)) {
966 0 : return bdev->blocklen - bdev->md_len;
967 : } else {
968 373 : return bdev->blocklen;
969 : }
970 373 : }
971 :
972 : static inline uint32_t
973 110 : bdev_io_get_block_size(struct spdk_bdev_io *bdev_io)
974 : {
975 110 : return bdev_desc_get_block_size(bdev_io->internal.desc);
976 : }
977 :
978 : static inline void
979 7 : bdev_queue_nomem_io_head(struct spdk_bdev_shared_resource *shared_resource,
980 : struct spdk_bdev_io *bdev_io, enum bdev_io_retry_state state)
981 : {
982 : /* Wait for some of the outstanding I/O to complete before we retry any of the nomem_io.
983 : * Normally we will wait for NOMEM_THRESHOLD_COUNT I/O to complete but for low queue depth
984 : * channels we will instead wait for half to complete.
985 : */
986 7 : shared_resource->nomem_threshold = spdk_max((int64_t)shared_resource->io_outstanding / 2,
987 : (int64_t)shared_resource->io_outstanding - NOMEM_THRESHOLD_COUNT);
988 :
989 7 : assert(state != BDEV_IO_RETRY_STATE_INVALID);
990 7 : bdev_io->internal.retry_state = state;
991 7 : TAILQ_INSERT_HEAD(&shared_resource->nomem_io, bdev_io, internal.link);
992 7 : }
993 :
994 : static inline void
995 43 : bdev_queue_nomem_io_tail(struct spdk_bdev_shared_resource *shared_resource,
996 : struct spdk_bdev_io *bdev_io, enum bdev_io_retry_state state)
997 : {
998 : /* We only queue IOs at the end of the nomem_io queue if they're submitted by the user while
999 : * the queue isn't empty, so we don't need to update the nomem_threshold here */
1000 43 : assert(!TAILQ_EMPTY(&shared_resource->nomem_io));
1001 :
1002 43 : assert(state != BDEV_IO_RETRY_STATE_INVALID);
1003 43 : bdev_io->internal.retry_state = state;
1004 43 : TAILQ_INSERT_TAIL(&shared_resource->nomem_io, bdev_io, internal.link);
1005 43 : }
1006 :
1007 : void
1008 16 : spdk_bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf, size_t len)
1009 : {
1010 : struct iovec *iovs;
1011 :
1012 16 : if (bdev_io->u.bdev.iovs == NULL) {
1013 3 : bdev_io->u.bdev.iovs = &bdev_io->iov;
1014 3 : bdev_io->u.bdev.iovcnt = 1;
1015 3 : }
1016 :
1017 16 : iovs = bdev_io->u.bdev.iovs;
1018 :
1019 16 : assert(iovs != NULL);
1020 16 : assert(bdev_io->u.bdev.iovcnt >= 1);
1021 :
1022 16 : iovs[0].iov_base = buf;
1023 16 : iovs[0].iov_len = len;
1024 16 : }
1025 :
1026 : void
1027 3 : spdk_bdev_io_set_md_buf(struct spdk_bdev_io *bdev_io, void *md_buf, size_t len)
1028 : {
1029 3 : assert((len / spdk_bdev_get_md_size(bdev_io->bdev)) >= bdev_io->u.bdev.num_blocks);
1030 3 : bdev_io->u.bdev.md_buf = md_buf;
1031 3 : }
1032 :
1033 : static bool
1034 167 : _is_buf_allocated(const struct iovec *iovs)
1035 : {
1036 167 : if (iovs == NULL) {
1037 6 : return false;
1038 : }
1039 :
1040 161 : return iovs[0].iov_base != NULL;
1041 167 : }
1042 :
1043 : static bool
1044 50 : _are_iovs_aligned(struct iovec *iovs, int iovcnt, uint32_t alignment)
1045 : {
1046 : int i;
1047 : uintptr_t iov_base;
1048 :
1049 50 : if (spdk_likely(alignment == 1)) {
1050 21 : return true;
1051 : }
1052 :
1053 36 : for (i = 0; i < iovcnt; i++) {
1054 29 : iov_base = (uintptr_t)iovs[i].iov_base;
1055 29 : if ((iov_base & (alignment - 1)) != 0) {
1056 22 : return false;
1057 : }
1058 7 : }
1059 :
1060 7 : return true;
1061 50 : }
1062 :
1063 : static inline bool
1064 856 : bdev_io_needs_sequence_exec(struct spdk_bdev_desc *desc, struct spdk_bdev_io *bdev_io)
1065 : {
1066 856 : if (!bdev_io_use_accel_sequence(bdev_io)) {
1067 856 : return false;
1068 : }
1069 :
1070 : /* For now, we don't allow splitting IOs with an accel sequence and will treat them as if
1071 : * bdev module didn't support accel sequences */
1072 0 : return !desc->accel_sequence_supported[bdev_io->type] || bdev_io->internal.f.split;
1073 856 : }
1074 :
1075 : static inline void
1076 592 : bdev_io_increment_outstanding(struct spdk_bdev_channel *bdev_ch,
1077 : struct spdk_bdev_shared_resource *shared_resource)
1078 : {
1079 592 : bdev_ch->io_outstanding++;
1080 592 : shared_resource->io_outstanding++;
1081 592 : }
1082 :
1083 : static inline void
1084 592 : bdev_io_decrement_outstanding(struct spdk_bdev_channel *bdev_ch,
1085 : struct spdk_bdev_shared_resource *shared_resource)
1086 : {
1087 592 : assert(bdev_ch->io_outstanding > 0);
1088 592 : assert(shared_resource->io_outstanding > 0);
1089 592 : bdev_ch->io_outstanding--;
1090 592 : shared_resource->io_outstanding--;
1091 592 : }
1092 :
1093 : static void
1094 0 : bdev_io_submit_sequence_cb(void *ctx, int status)
1095 : {
1096 0 : struct spdk_bdev_io *bdev_io = ctx;
1097 :
1098 0 : assert(bdev_io_use_accel_sequence(bdev_io));
1099 :
1100 0 : bdev_io->u.bdev.accel_sequence = NULL;
1101 0 : bdev_io->internal.f.has_accel_sequence = false;
1102 :
1103 0 : if (spdk_unlikely(status != 0)) {
1104 0 : SPDK_ERRLOG("Failed to execute accel sequence, status=%d\n", status);
1105 0 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
1106 0 : bdev_io_complete_unsubmitted(bdev_io);
1107 0 : return;
1108 : }
1109 :
1110 0 : bdev_io_submit(bdev_io);
1111 0 : }
1112 :
1113 : static void
1114 0 : bdev_io_exec_sequence_cb(void *ctx, int status)
1115 : {
1116 0 : struct spdk_bdev_io *bdev_io = ctx;
1117 0 : struct spdk_bdev_channel *ch = bdev_io->internal.ch;
1118 :
1119 0 : TAILQ_REMOVE(&bdev_io->internal.ch->io_accel_exec, bdev_io, internal.link);
1120 0 : bdev_io_decrement_outstanding(ch, ch->shared_resource);
1121 :
1122 0 : if (spdk_unlikely(!TAILQ_EMPTY(&ch->shared_resource->nomem_io))) {
1123 0 : bdev_ch_retry_io(ch);
1124 0 : }
1125 :
1126 0 : bdev_io->internal.data_transfer_cpl(bdev_io, status);
1127 0 : }
1128 :
1129 : static void
1130 0 : bdev_io_exec_sequence(struct spdk_bdev_io *bdev_io, void (*cb_fn)(void *ctx, int status))
1131 : {
1132 0 : struct spdk_bdev_channel *ch = bdev_io->internal.ch;
1133 :
1134 0 : assert(bdev_io_needs_sequence_exec(bdev_io->internal.desc, bdev_io));
1135 0 : assert(bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE || bdev_io->type == SPDK_BDEV_IO_TYPE_READ);
1136 0 : assert(bdev_io_use_accel_sequence(bdev_io));
1137 :
1138 : /* Since the operations are appended during submission, they're in the opposite order than
1139 : * how we want to execute them for reads (i.e. we need to execute the most recently added
1140 : * operation first), so reverse the sequence before executing it.
1141 : */
1142 0 : if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
1143 0 : spdk_accel_sequence_reverse(bdev_io->internal.accel_sequence);
1144 0 : }
1145 :
1146 0 : TAILQ_INSERT_TAIL(&bdev_io->internal.ch->io_accel_exec, bdev_io, internal.link);
1147 0 : bdev_io_increment_outstanding(ch, ch->shared_resource);
1148 0 : bdev_io->internal.data_transfer_cpl = cb_fn;
1149 :
1150 0 : spdk_accel_sequence_finish(bdev_io->internal.accel_sequence,
1151 0 : bdev_io_exec_sequence_cb, bdev_io);
1152 0 : }
1153 :
1154 : static void
1155 42 : bdev_io_get_buf_complete(struct spdk_bdev_io *bdev_io, bool status)
1156 : {
1157 42 : struct spdk_io_channel *ch = spdk_bdev_io_get_io_channel(bdev_io);
1158 : void *buf;
1159 :
1160 42 : if (spdk_unlikely(bdev_io->internal.get_aux_buf_cb != NULL)) {
1161 0 : buf = bdev_io->internal.buf.ptr;
1162 0 : bdev_io->internal.buf.ptr = NULL;
1163 0 : bdev_io->internal.f.has_buf = false;
1164 0 : bdev_io->internal.get_aux_buf_cb(ch, bdev_io, buf);
1165 0 : bdev_io->internal.get_aux_buf_cb = NULL;
1166 0 : } else {
1167 42 : assert(bdev_io->internal.get_buf_cb != NULL);
1168 42 : bdev_io->internal.get_buf_cb(ch, bdev_io, status);
1169 42 : bdev_io->internal.get_buf_cb = NULL;
1170 : }
1171 42 : }
1172 :
1173 : static void
1174 4 : _bdev_io_pull_buffer_cpl(void *ctx, int rc)
1175 : {
1176 4 : struct spdk_bdev_io *bdev_io = ctx;
1177 :
1178 4 : if (rc) {
1179 0 : SPDK_ERRLOG("Set bounce buffer failed with rc %d\n", rc);
1180 0 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
1181 0 : }
1182 4 : bdev_io_get_buf_complete(bdev_io, !rc);
1183 4 : }
1184 :
1185 : static void
1186 2 : bdev_io_pull_md_buf_done(void *ctx, int status)
1187 : {
1188 2 : struct spdk_bdev_io *bdev_io = ctx;
1189 2 : struct spdk_bdev_channel *ch = bdev_io->internal.ch;
1190 :
1191 2 : TAILQ_REMOVE(&ch->io_memory_domain, bdev_io, internal.link);
1192 2 : bdev_io_decrement_outstanding(ch, ch->shared_resource);
1193 :
1194 2 : if (spdk_unlikely(!TAILQ_EMPTY(&ch->shared_resource->nomem_io))) {
1195 0 : bdev_ch_retry_io(ch);
1196 0 : }
1197 :
1198 2 : assert(bdev_io->internal.data_transfer_cpl);
1199 2 : bdev_io->internal.data_transfer_cpl(bdev_io, status);
1200 2 : }
1201 :
1202 : static void
1203 4 : bdev_io_pull_md_buf(struct spdk_bdev_io *bdev_io)
1204 : {
1205 4 : struct spdk_bdev_channel *ch = bdev_io->internal.ch;
1206 4 : int rc = 0;
1207 :
1208 4 : if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
1209 2 : assert(bdev_io->internal.f.has_bounce_buf);
1210 2 : if (bdev_io_use_memory_domain(bdev_io)) {
1211 2 : TAILQ_INSERT_TAIL(&ch->io_memory_domain, bdev_io, internal.link);
1212 2 : bdev_io_increment_outstanding(ch, ch->shared_resource);
1213 4 : rc = spdk_memory_domain_pull_data(bdev_io->internal.memory_domain,
1214 2 : bdev_io->internal.memory_domain_ctx,
1215 2 : &bdev_io->internal.bounce_buf.orig_md_iov, 1,
1216 2 : &bdev_io->internal.bounce_buf.md_iov, 1,
1217 2 : bdev_io_pull_md_buf_done, bdev_io);
1218 2 : if (rc == 0) {
1219 : /* Continue to submit IO in completion callback */
1220 2 : return;
1221 : }
1222 0 : bdev_io_decrement_outstanding(ch, ch->shared_resource);
1223 0 : TAILQ_REMOVE(&ch->io_memory_domain, bdev_io, internal.link);
1224 0 : if (rc != -ENOMEM) {
1225 0 : SPDK_ERRLOG("Failed to pull data from memory domain %s, rc %d\n",
1226 : spdk_memory_domain_get_dma_device_id(
1227 : bdev_io->internal.memory_domain), rc);
1228 0 : }
1229 0 : } else {
1230 0 : memcpy(bdev_io->internal.bounce_buf.md_iov.iov_base,
1231 0 : bdev_io->internal.bounce_buf.orig_md_iov.iov_base,
1232 0 : bdev_io->internal.bounce_buf.orig_md_iov.iov_len);
1233 : }
1234 0 : }
1235 :
1236 2 : if (spdk_unlikely(rc == -ENOMEM)) {
1237 0 : bdev_queue_nomem_io_head(ch->shared_resource, bdev_io, BDEV_IO_RETRY_STATE_PULL_MD);
1238 0 : } else {
1239 2 : assert(bdev_io->internal.data_transfer_cpl);
1240 2 : bdev_io->internal.data_transfer_cpl(bdev_io, rc);
1241 : }
1242 4 : }
1243 :
1244 : static void
1245 4 : _bdev_io_pull_bounce_md_buf(struct spdk_bdev_io *bdev_io, void *md_buf, size_t len)
1246 : {
1247 4 : assert(bdev_io->internal.f.has_bounce_buf);
1248 :
1249 : /* save original md_buf */
1250 4 : bdev_io->internal.bounce_buf.orig_md_iov.iov_base = bdev_io->u.bdev.md_buf;
1251 4 : bdev_io->internal.bounce_buf.orig_md_iov.iov_len = len;
1252 4 : bdev_io->internal.bounce_buf.md_iov.iov_base = md_buf;
1253 4 : bdev_io->internal.bounce_buf.md_iov.iov_len = len;
1254 : /* set bounce md_buf */
1255 4 : bdev_io->u.bdev.md_buf = md_buf;
1256 :
1257 4 : bdev_io_pull_md_buf(bdev_io);
1258 4 : }
1259 :
1260 : static void
1261 42 : _bdev_io_set_md_buf(struct spdk_bdev_io *bdev_io)
1262 : {
1263 42 : struct spdk_bdev *bdev = bdev_io->bdev;
1264 : uint64_t md_len;
1265 : void *buf;
1266 :
1267 42 : if (spdk_bdev_is_md_separate(bdev)) {
1268 7 : assert(!bdev_io_use_accel_sequence(bdev_io));
1269 :
1270 7 : buf = (char *)bdev_io->u.bdev.iovs[0].iov_base + bdev_io->u.bdev.iovs[0].iov_len;
1271 7 : md_len = bdev_io->u.bdev.num_blocks * bdev->md_len;
1272 :
1273 7 : assert(((uintptr_t)buf & (spdk_bdev_get_buf_align(bdev) - 1)) == 0);
1274 :
1275 7 : if (bdev_io->u.bdev.md_buf != NULL) {
1276 4 : _bdev_io_pull_bounce_md_buf(bdev_io, buf, md_len);
1277 4 : return;
1278 : } else {
1279 3 : spdk_bdev_io_set_md_buf(bdev_io, buf, md_len);
1280 : }
1281 3 : }
1282 :
1283 38 : bdev_io_get_buf_complete(bdev_io, true);
1284 42 : }
1285 :
1286 : static inline void
1287 26 : bdev_io_pull_data_done(struct spdk_bdev_io *bdev_io, int rc)
1288 : {
1289 26 : if (rc) {
1290 0 : SPDK_ERRLOG("Failed to get data buffer\n");
1291 0 : assert(bdev_io->internal.data_transfer_cpl);
1292 0 : bdev_io->internal.data_transfer_cpl(bdev_io, rc);
1293 0 : return;
1294 : }
1295 :
1296 26 : _bdev_io_set_md_buf(bdev_io);
1297 26 : }
1298 :
1299 : static void
1300 2 : bdev_io_pull_data_done_and_track(void *ctx, int status)
1301 : {
1302 2 : struct spdk_bdev_io *bdev_io = ctx;
1303 2 : struct spdk_bdev_channel *ch = bdev_io->internal.ch;
1304 :
1305 2 : TAILQ_REMOVE(&ch->io_memory_domain, bdev_io, internal.link);
1306 2 : bdev_io_decrement_outstanding(ch, ch->shared_resource);
1307 :
1308 2 : if (spdk_unlikely(!TAILQ_EMPTY(&ch->shared_resource->nomem_io))) {
1309 0 : bdev_ch_retry_io(ch);
1310 0 : }
1311 :
1312 2 : bdev_io_pull_data_done(bdev_io, status);
1313 2 : }
1314 :
1315 : static void
1316 27 : bdev_io_pull_data(struct spdk_bdev_io *bdev_io)
1317 : {
1318 27 : struct spdk_bdev_channel *ch = bdev_io->internal.ch;
1319 27 : int rc = 0;
1320 :
1321 : /* If we need to exec an accel sequence or the IO uses a memory domain buffer and has a
1322 : * sequence, append a copy operation making accel change the src/dst buffers of the previous
1323 : * operation */
1324 27 : if (bdev_io_needs_sequence_exec(bdev_io->internal.desc, bdev_io) ||
1325 27 : (bdev_io_use_accel_sequence(bdev_io) && bdev_io_use_memory_domain(bdev_io))) {
1326 0 : if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
1327 0 : assert(bdev_io_use_accel_sequence(bdev_io));
1328 0 : assert(bdev_io->internal.f.has_bounce_buf);
1329 0 : rc = spdk_accel_append_copy(&bdev_io->internal.accel_sequence, ch->accel_channel,
1330 0 : bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
1331 : NULL, NULL,
1332 0 : bdev_io->internal.bounce_buf.orig_iovs,
1333 0 : bdev_io->internal.bounce_buf.orig_iovcnt,
1334 0 : bdev_io_use_memory_domain(bdev_io) ? bdev_io->internal.memory_domain : NULL,
1335 0 : bdev_io_use_memory_domain(bdev_io) ? bdev_io->internal.memory_domain_ctx : NULL,
1336 : NULL, NULL);
1337 0 : } else {
1338 : /* We need to reverse the src/dst for reads */
1339 0 : assert(bdev_io->type == SPDK_BDEV_IO_TYPE_READ);
1340 0 : assert(bdev_io_use_accel_sequence(bdev_io));
1341 0 : assert(bdev_io->internal.f.has_bounce_buf);
1342 0 : rc = spdk_accel_append_copy(&bdev_io->internal.accel_sequence, ch->accel_channel,
1343 0 : bdev_io->internal.bounce_buf.orig_iovs,
1344 0 : bdev_io->internal.bounce_buf.orig_iovcnt,
1345 0 : bdev_io_use_memory_domain(bdev_io) ? bdev_io->internal.memory_domain : NULL,
1346 0 : bdev_io_use_memory_domain(bdev_io) ? bdev_io->internal.memory_domain_ctx : NULL,
1347 0 : bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt,
1348 : NULL, NULL, NULL, NULL);
1349 : }
1350 :
1351 0 : if (spdk_unlikely(rc != 0 && rc != -ENOMEM)) {
1352 0 : SPDK_ERRLOG("Failed to append copy to accel sequence: %p\n",
1353 : bdev_io->internal.accel_sequence);
1354 0 : }
1355 27 : } else if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
1356 : /* if this is write path, copy data from original buffer to bounce buffer */
1357 17 : if (bdev_io_use_memory_domain(bdev_io)) {
1358 3 : assert(bdev_io->internal.f.has_bounce_buf);
1359 3 : TAILQ_INSERT_TAIL(&ch->io_memory_domain, bdev_io, internal.link);
1360 3 : bdev_io_increment_outstanding(ch, ch->shared_resource);
1361 6 : rc = spdk_memory_domain_pull_data(bdev_io->internal.memory_domain,
1362 3 : bdev_io->internal.memory_domain_ctx,
1363 3 : bdev_io->internal.bounce_buf.orig_iovs,
1364 3 : (uint32_t)bdev_io->internal.bounce_buf.orig_iovcnt,
1365 3 : bdev_io->u.bdev.iovs, 1,
1366 : bdev_io_pull_data_done_and_track,
1367 3 : bdev_io);
1368 3 : if (rc == 0) {
1369 : /* Continue to submit IO in completion callback */
1370 2 : return;
1371 : }
1372 1 : TAILQ_REMOVE(&ch->io_memory_domain, bdev_io, internal.link);
1373 1 : bdev_io_decrement_outstanding(ch, ch->shared_resource);
1374 1 : if (rc != -ENOMEM) {
1375 0 : SPDK_ERRLOG("Failed to pull data from memory domain %s\n",
1376 : spdk_memory_domain_get_dma_device_id(
1377 : bdev_io->internal.memory_domain));
1378 0 : }
1379 1 : } else {
1380 14 : assert(bdev_io->u.bdev.iovcnt == 1);
1381 14 : assert(bdev_io->internal.f.has_bounce_buf);
1382 28 : spdk_copy_iovs_to_buf(bdev_io->u.bdev.iovs[0].iov_base,
1383 14 : bdev_io->u.bdev.iovs[0].iov_len,
1384 14 : bdev_io->internal.bounce_buf.orig_iovs,
1385 14 : bdev_io->internal.bounce_buf.orig_iovcnt);
1386 : }
1387 15 : }
1388 :
1389 25 : if (spdk_unlikely(rc == -ENOMEM)) {
1390 1 : bdev_queue_nomem_io_head(ch->shared_resource, bdev_io, BDEV_IO_RETRY_STATE_PULL);
1391 1 : } else {
1392 24 : bdev_io_pull_data_done(bdev_io, rc);
1393 : }
1394 27 : }
1395 :
1396 : static void
1397 26 : _bdev_io_pull_bounce_data_buf(struct spdk_bdev_io *bdev_io, void *buf, size_t len,
1398 : bdev_copy_bounce_buffer_cpl cpl_cb)
1399 : {
1400 26 : struct spdk_bdev_shared_resource *shared_resource = bdev_io->internal.ch->shared_resource;
1401 :
1402 26 : assert(bdev_io->internal.f.has_bounce_buf == false);
1403 :
1404 26 : bdev_io->internal.data_transfer_cpl = cpl_cb;
1405 26 : bdev_io->internal.f.has_bounce_buf = true;
1406 : /* save original iovec */
1407 26 : bdev_io->internal.bounce_buf.orig_iovs = bdev_io->u.bdev.iovs;
1408 26 : bdev_io->internal.bounce_buf.orig_iovcnt = bdev_io->u.bdev.iovcnt;
1409 : /* zero the other data members */
1410 26 : bdev_io->internal.bounce_buf.iov.iov_base = NULL;
1411 26 : bdev_io->internal.bounce_buf.md_iov.iov_base = NULL;
1412 26 : bdev_io->internal.bounce_buf.orig_md_iov.iov_base = NULL;
1413 : /* set bounce iov */
1414 26 : bdev_io->u.bdev.iovs = &bdev_io->internal.bounce_buf.iov;
1415 26 : bdev_io->u.bdev.iovcnt = 1;
1416 : /* set bounce buffer for this operation */
1417 26 : bdev_io->u.bdev.iovs[0].iov_base = buf;
1418 26 : bdev_io->u.bdev.iovs[0].iov_len = len;
1419 : /* Now we use 1 iov, the split condition could have been changed */
1420 26 : bdev_io->internal.f.split = bdev_io_should_split(bdev_io);
1421 :
1422 26 : if (spdk_unlikely(!TAILQ_EMPTY(&shared_resource->nomem_io))) {
1423 0 : bdev_queue_nomem_io_tail(shared_resource, bdev_io, BDEV_IO_RETRY_STATE_PULL);
1424 0 : } else {
1425 26 : bdev_io_pull_data(bdev_io);
1426 : }
1427 26 : }
1428 :
1429 : static void
1430 42 : _bdev_io_set_buf(struct spdk_bdev_io *bdev_io, void *buf, uint64_t len)
1431 : {
1432 42 : struct spdk_bdev *bdev = bdev_io->bdev;
1433 : bool buf_allocated;
1434 : uint64_t alignment;
1435 : void *aligned_buf;
1436 :
1437 42 : bdev_io->internal.buf.ptr = buf;
1438 42 : bdev_io->internal.f.has_buf = true;
1439 :
1440 42 : if (spdk_unlikely(bdev_io->internal.get_aux_buf_cb != NULL)) {
1441 0 : bdev_io_get_buf_complete(bdev_io, true);
1442 0 : return;
1443 : }
1444 :
1445 42 : alignment = spdk_bdev_get_buf_align(bdev);
1446 42 : buf_allocated = _is_buf_allocated(bdev_io->u.bdev.iovs);
1447 42 : aligned_buf = (void *)(((uintptr_t)buf + (alignment - 1)) & ~(alignment - 1));
1448 :
1449 42 : if (buf_allocated) {
1450 26 : _bdev_io_pull_bounce_data_buf(bdev_io, aligned_buf, len, _bdev_io_pull_buffer_cpl);
1451 : /* Continue in completion callback */
1452 26 : return;
1453 : } else {
1454 16 : spdk_bdev_io_set_buf(bdev_io, aligned_buf, len);
1455 : }
1456 :
1457 16 : _bdev_io_set_md_buf(bdev_io);
1458 42 : }
1459 :
1460 : static inline uint64_t
1461 84 : bdev_io_get_max_buf_len(struct spdk_bdev_io *bdev_io, uint64_t len)
1462 : {
1463 84 : struct spdk_bdev *bdev = bdev_io->bdev;
1464 : uint64_t md_len, alignment;
1465 :
1466 84 : md_len = spdk_bdev_is_md_separate(bdev) ? bdev_io->u.bdev.num_blocks * bdev->md_len : 0;
1467 :
1468 : /* 1 byte alignment needs 0 byte of extra space, 64 bytes alignment needs 63 bytes of extra space, etc. */
1469 84 : alignment = spdk_bdev_get_buf_align(bdev) - 1;
1470 :
1471 84 : return len + alignment + md_len;
1472 : }
1473 :
1474 : static void
1475 42 : _bdev_io_put_buf(struct spdk_bdev_io *bdev_io, void *buf, uint64_t buf_len)
1476 : {
1477 : struct spdk_bdev_mgmt_channel *ch;
1478 :
1479 42 : ch = bdev_io->internal.ch->shared_resource->mgmt_ch;
1480 42 : spdk_iobuf_put(&ch->iobuf, buf, bdev_io_get_max_buf_len(bdev_io, buf_len));
1481 42 : }
1482 :
1483 : static void
1484 42 : bdev_io_put_buf(struct spdk_bdev_io *bdev_io)
1485 : {
1486 42 : assert(bdev_io->internal.f.has_buf);
1487 42 : _bdev_io_put_buf(bdev_io, bdev_io->internal.buf.ptr, bdev_io->internal.buf.len);
1488 42 : bdev_io->internal.buf.ptr = NULL;
1489 42 : bdev_io->internal.f.has_buf = false;
1490 42 : }
1491 :
1492 3 : SPDK_LOG_DEPRECATION_REGISTER(spdk_bdev_io_put_aux_buf,
1493 : "spdk_bdev_io_put_aux_buf is deprecated", "v25.01", 0);
1494 :
1495 : void
1496 0 : spdk_bdev_io_put_aux_buf(struct spdk_bdev_io *bdev_io, void *buf)
1497 : {
1498 0 : uint64_t len = bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen;
1499 :
1500 0 : SPDK_LOG_DEPRECATED(spdk_bdev_io_put_aux_buf);
1501 :
1502 0 : assert(buf != NULL);
1503 0 : _bdev_io_put_buf(bdev_io, buf, len);
1504 0 : }
1505 :
1506 : static inline void
1507 549 : bdev_submit_request(struct spdk_bdev *bdev, struct spdk_io_channel *ioch,
1508 : struct spdk_bdev_io *bdev_io)
1509 : {
1510 : /* After a request is submitted to a bdev module, the ownership of an accel sequence
1511 : * associated with that bdev_io is transferred to the bdev module. So, clear the internal
1512 : * sequence pointer to make sure we won't touch it anymore. */
1513 1016 : if ((bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE ||
1514 549 : bdev_io->type == SPDK_BDEV_IO_TYPE_READ) && bdev_io->u.bdev.accel_sequence != NULL) {
1515 0 : assert(!bdev_io_needs_sequence_exec(bdev_io->internal.desc, bdev_io));
1516 0 : bdev_io->internal.f.has_accel_sequence = false;
1517 0 : }
1518 :
1519 549 : bdev->fn_table->submit_request(ioch, bdev_io);
1520 549 : }
1521 :
1522 : static inline void
1523 10 : bdev_ch_resubmit_io(struct spdk_bdev_shared_resource *shared_resource, struct spdk_bdev_io *bdev_io)
1524 : {
1525 10 : struct spdk_bdev *bdev = bdev_io->bdev;
1526 :
1527 10 : bdev_io_increment_outstanding(bdev_io->internal.ch, shared_resource);
1528 10 : bdev_io->internal.error.nvme.cdw0 = 0;
1529 10 : bdev_io->num_retries++;
1530 10 : bdev_submit_request(bdev, spdk_bdev_io_get_io_channel(bdev_io), bdev_io);
1531 10 : }
1532 :
1533 : static void
1534 63 : bdev_shared_ch_retry_io(struct spdk_bdev_shared_resource *shared_resource)
1535 : {
1536 : struct spdk_bdev_io *bdev_io;
1537 :
1538 63 : if (shared_resource->io_outstanding > shared_resource->nomem_threshold) {
1539 : /*
1540 : * Allow some more I/O to complete before retrying the nomem_io queue.
1541 : * Some drivers (such as nvme) cannot immediately take a new I/O in
1542 : * the context of a completion, because the resources for the I/O are
1543 : * not released until control returns to the bdev poller. Also, we
1544 : * may require several small I/O to complete before a larger I/O
1545 : * (that requires splitting) can be submitted.
1546 : */
1547 58 : return;
1548 : }
1549 :
1550 16 : while (!TAILQ_EMPTY(&shared_resource->nomem_io)) {
1551 12 : bdev_io = TAILQ_FIRST(&shared_resource->nomem_io);
1552 12 : TAILQ_REMOVE(&shared_resource->nomem_io, bdev_io, internal.link);
1553 :
1554 12 : switch (bdev_io->internal.retry_state) {
1555 : case BDEV_IO_RETRY_STATE_SUBMIT:
1556 10 : bdev_ch_resubmit_io(shared_resource, bdev_io);
1557 10 : break;
1558 : case BDEV_IO_RETRY_STATE_PULL:
1559 1 : bdev_io_pull_data(bdev_io);
1560 1 : break;
1561 : case BDEV_IO_RETRY_STATE_PULL_MD:
1562 0 : bdev_io_pull_md_buf(bdev_io);
1563 0 : break;
1564 : case BDEV_IO_RETRY_STATE_PUSH:
1565 1 : bdev_io_push_bounce_data(bdev_io);
1566 1 : break;
1567 : case BDEV_IO_RETRY_STATE_PUSH_MD:
1568 0 : bdev_io_push_bounce_md_buf(bdev_io);
1569 0 : break;
1570 : default:
1571 0 : assert(0 && "invalid retry state");
1572 : break;
1573 : }
1574 :
1575 12 : if (bdev_io == TAILQ_FIRST(&shared_resource->nomem_io)) {
1576 : /* This IO completed again with NOMEM status, so break the loop and
1577 : * don't try anymore. Note that a bdev_io that fails with NOMEM
1578 : * always gets requeued at the front of the list, to maintain
1579 : * ordering.
1580 : */
1581 1 : break;
1582 : }
1583 : }
1584 63 : }
1585 :
1586 : static void
1587 63 : bdev_ch_retry_io(struct spdk_bdev_channel *bdev_ch)
1588 : {
1589 63 : bdev_shared_ch_retry_io(bdev_ch->shared_resource);
1590 63 : }
1591 :
1592 : static int
1593 0 : bdev_no_mem_poller(void *ctx)
1594 : {
1595 0 : struct spdk_bdev_shared_resource *shared_resource = ctx;
1596 :
1597 0 : spdk_poller_unregister(&shared_resource->nomem_poller);
1598 :
1599 0 : if (!TAILQ_EMPTY(&shared_resource->nomem_io)) {
1600 0 : bdev_shared_ch_retry_io(shared_resource);
1601 0 : }
1602 : /* the retry cb may re-register the poller so double check */
1603 0 : if (!TAILQ_EMPTY(&shared_resource->nomem_io) &&
1604 0 : shared_resource->io_outstanding == 0 && shared_resource->nomem_poller == NULL) {
1605 : /* No IOs were submitted, try again */
1606 0 : shared_resource->nomem_poller = SPDK_POLLER_REGISTER(bdev_no_mem_poller, shared_resource,
1607 : SPDK_BDEV_IO_POLL_INTERVAL_IN_MSEC * 10);
1608 0 : }
1609 :
1610 0 : return SPDK_POLLER_BUSY;
1611 : }
1612 :
1613 : static inline bool
1614 556 : _bdev_io_handle_no_mem(struct spdk_bdev_io *bdev_io, enum bdev_io_retry_state state)
1615 : {
1616 556 : struct spdk_bdev_channel *bdev_ch = bdev_io->internal.ch;
1617 556 : struct spdk_bdev_shared_resource *shared_resource = bdev_ch->shared_resource;
1618 :
1619 556 : if (spdk_unlikely(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_NOMEM)) {
1620 5 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_PENDING;
1621 5 : bdev_queue_nomem_io_head(shared_resource, bdev_io, state);
1622 :
1623 5 : if (shared_resource->io_outstanding == 0 && !shared_resource->nomem_poller) {
1624 : /* Special case when we have nomem IOs and no outstanding IOs which completions
1625 : * could trigger retry of queued IOs
1626 : * Any IOs submitted may trigger retry of queued IOs. This poller handles a case when no
1627 : * new IOs submitted, e.g. qd==1 */
1628 0 : shared_resource->nomem_poller = SPDK_POLLER_REGISTER(bdev_no_mem_poller, shared_resource,
1629 : SPDK_BDEV_IO_POLL_INTERVAL_IN_MSEC * 10);
1630 0 : }
1631 : /* If bdev module completed an I/O that has an accel sequence with NOMEM status, the
1632 : * ownership of that sequence is transferred back to the bdev layer, so we need to
1633 : * restore internal.accel_sequence to make sure that the sequence is handled
1634 : * correctly in case the I/O is later aborted. */
1635 5 : if ((bdev_io->type == SPDK_BDEV_IO_TYPE_READ ||
1636 5 : bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) && bdev_io->u.bdev.accel_sequence) {
1637 0 : assert(!bdev_io_use_accel_sequence(bdev_io));
1638 0 : bdev_io->internal.f.has_accel_sequence = true;
1639 0 : bdev_io->internal.accel_sequence = bdev_io->u.bdev.accel_sequence;
1640 0 : }
1641 :
1642 5 : return true;
1643 : }
1644 :
1645 551 : if (spdk_unlikely(!TAILQ_EMPTY(&shared_resource->nomem_io))) {
1646 63 : bdev_ch_retry_io(bdev_ch);
1647 63 : }
1648 :
1649 551 : return false;
1650 556 : }
1651 :
1652 : static void
1653 26 : _bdev_io_complete_push_bounce_done(void *ctx, int rc)
1654 : {
1655 26 : struct spdk_bdev_io *bdev_io = ctx;
1656 26 : struct spdk_bdev_channel *ch = bdev_io->internal.ch;
1657 :
1658 26 : if (rc) {
1659 0 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
1660 0 : }
1661 : /* We want to free the bounce buffer here since we know we're done with it (as opposed
1662 : * to waiting for the conditional free of internal.buf.ptr in spdk_bdev_free_io()).
1663 : */
1664 26 : bdev_io_put_buf(bdev_io);
1665 :
1666 26 : if (spdk_unlikely(!TAILQ_EMPTY(&ch->shared_resource->nomem_io))) {
1667 0 : bdev_ch_retry_io(ch);
1668 0 : }
1669 :
1670 : /* Continue with IO completion flow */
1671 26 : bdev_io_complete(bdev_io);
1672 26 : }
1673 :
1674 : static void
1675 2 : bdev_io_push_bounce_md_buf_done(void *ctx, int rc)
1676 : {
1677 2 : struct spdk_bdev_io *bdev_io = ctx;
1678 2 : struct spdk_bdev_channel *ch = bdev_io->internal.ch;
1679 :
1680 2 : TAILQ_REMOVE(&ch->io_memory_domain, bdev_io, internal.link);
1681 2 : bdev_io_decrement_outstanding(ch, ch->shared_resource);
1682 2 : bdev_io->internal.f.has_bounce_buf = false;
1683 :
1684 2 : if (spdk_unlikely(!TAILQ_EMPTY(&ch->shared_resource->nomem_io))) {
1685 0 : bdev_ch_retry_io(ch);
1686 0 : }
1687 :
1688 2 : bdev_io->internal.data_transfer_cpl(bdev_io, rc);
1689 2 : }
1690 :
1691 : static inline void
1692 26 : bdev_io_push_bounce_md_buf(struct spdk_bdev_io *bdev_io)
1693 : {
1694 26 : struct spdk_bdev_channel *ch = bdev_io->internal.ch;
1695 26 : int rc = 0;
1696 :
1697 26 : assert(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS);
1698 26 : assert(bdev_io->internal.f.has_bounce_buf);
1699 :
1700 : /* do the same for metadata buffer */
1701 26 : if (spdk_unlikely(bdev_io->internal.bounce_buf.orig_md_iov.iov_base != NULL)) {
1702 4 : assert(spdk_bdev_is_md_separate(bdev_io->bdev));
1703 :
1704 4 : if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
1705 2 : if (bdev_io_use_memory_domain(bdev_io)) {
1706 2 : TAILQ_INSERT_TAIL(&ch->io_memory_domain, bdev_io, internal.link);
1707 2 : bdev_io_increment_outstanding(ch, ch->shared_resource);
1708 : /* If memory domain is used then we need to call async push function */
1709 4 : rc = spdk_memory_domain_push_data(bdev_io->internal.memory_domain,
1710 2 : bdev_io->internal.memory_domain_ctx,
1711 2 : &bdev_io->internal.bounce_buf.orig_md_iov,
1712 2 : (uint32_t)bdev_io->internal.bounce_buf.orig_iovcnt,
1713 2 : &bdev_io->internal.bounce_buf.md_iov, 1,
1714 : bdev_io_push_bounce_md_buf_done,
1715 2 : bdev_io);
1716 2 : if (rc == 0) {
1717 : /* Continue IO completion in async callback */
1718 2 : return;
1719 : }
1720 0 : TAILQ_REMOVE(&ch->io_memory_domain, bdev_io, internal.link);
1721 0 : bdev_io_decrement_outstanding(ch, ch->shared_resource);
1722 0 : if (rc != -ENOMEM) {
1723 0 : SPDK_ERRLOG("Failed to push md to memory domain %s\n",
1724 : spdk_memory_domain_get_dma_device_id(
1725 : bdev_io->internal.memory_domain));
1726 0 : }
1727 0 : } else {
1728 0 : memcpy(bdev_io->internal.bounce_buf.orig_md_iov.iov_base, bdev_io->u.bdev.md_buf,
1729 0 : bdev_io->internal.bounce_buf.orig_md_iov.iov_len);
1730 : }
1731 0 : }
1732 2 : }
1733 :
1734 24 : if (spdk_unlikely(rc == -ENOMEM)) {
1735 0 : bdev_queue_nomem_io_head(ch->shared_resource, bdev_io, BDEV_IO_RETRY_STATE_PUSH_MD);
1736 0 : } else {
1737 24 : assert(bdev_io->internal.data_transfer_cpl);
1738 24 : bdev_io->internal.f.has_bounce_buf = false;
1739 24 : bdev_io->internal.data_transfer_cpl(bdev_io, rc);
1740 : }
1741 26 : }
1742 :
1743 : static inline void
1744 26 : bdev_io_push_bounce_data_done(struct spdk_bdev_io *bdev_io, int rc)
1745 : {
1746 26 : assert(bdev_io->internal.data_transfer_cpl);
1747 26 : if (rc) {
1748 0 : bdev_io->internal.data_transfer_cpl(bdev_io, rc);
1749 0 : return;
1750 : }
1751 :
1752 : /* set original buffer for this io */
1753 26 : bdev_io->u.bdev.iovcnt = bdev_io->internal.bounce_buf.orig_iovcnt;
1754 26 : bdev_io->u.bdev.iovs = bdev_io->internal.bounce_buf.orig_iovs;
1755 :
1756 : /* We don't set bdev_io->internal.f.has_bounce_buf to false here because
1757 : * we still need to clear the md buf */
1758 :
1759 26 : bdev_io_push_bounce_md_buf(bdev_io);
1760 26 : }
1761 :
1762 : static void
1763 2 : bdev_io_push_bounce_data_done_and_track(void *ctx, int status)
1764 : {
1765 2 : struct spdk_bdev_io *bdev_io = ctx;
1766 2 : struct spdk_bdev_channel *ch = bdev_io->internal.ch;
1767 :
1768 2 : TAILQ_REMOVE(&ch->io_memory_domain, bdev_io, internal.link);
1769 2 : bdev_io_decrement_outstanding(ch, ch->shared_resource);
1770 :
1771 2 : if (spdk_unlikely(!TAILQ_EMPTY(&ch->shared_resource->nomem_io))) {
1772 0 : bdev_ch_retry_io(ch);
1773 0 : }
1774 :
1775 2 : bdev_io_push_bounce_data_done(bdev_io, status);
1776 2 : }
1777 :
1778 : static inline void
1779 27 : bdev_io_push_bounce_data(struct spdk_bdev_io *bdev_io)
1780 : {
1781 27 : struct spdk_bdev_channel *ch = bdev_io->internal.ch;
1782 27 : int rc = 0;
1783 :
1784 27 : assert(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS);
1785 27 : assert(!bdev_io_use_accel_sequence(bdev_io));
1786 27 : assert(bdev_io->internal.f.has_bounce_buf);
1787 :
1788 : /* if this is read path, copy data from bounce buffer to original buffer */
1789 27 : if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ) {
1790 11 : if (bdev_io_use_memory_domain(bdev_io)) {
1791 3 : TAILQ_INSERT_TAIL(&ch->io_memory_domain, bdev_io, internal.link);
1792 3 : bdev_io_increment_outstanding(ch, ch->shared_resource);
1793 : /* If memory domain is used then we need to call async push function */
1794 6 : rc = spdk_memory_domain_push_data(bdev_io->internal.memory_domain,
1795 3 : bdev_io->internal.memory_domain_ctx,
1796 3 : bdev_io->internal.bounce_buf.orig_iovs,
1797 3 : (uint32_t)bdev_io->internal.bounce_buf.orig_iovcnt,
1798 3 : &bdev_io->internal.bounce_buf.iov, 1,
1799 : bdev_io_push_bounce_data_done_and_track,
1800 3 : bdev_io);
1801 3 : if (rc == 0) {
1802 : /* Continue IO completion in async callback */
1803 2 : return;
1804 : }
1805 :
1806 1 : TAILQ_REMOVE(&ch->io_memory_domain, bdev_io, internal.link);
1807 1 : bdev_io_decrement_outstanding(ch, ch->shared_resource);
1808 1 : if (rc != -ENOMEM) {
1809 0 : SPDK_ERRLOG("Failed to push data to memory domain %s\n",
1810 : spdk_memory_domain_get_dma_device_id(
1811 : bdev_io->internal.memory_domain));
1812 0 : }
1813 1 : } else {
1814 16 : spdk_copy_buf_to_iovs(bdev_io->internal.bounce_buf.orig_iovs,
1815 8 : bdev_io->internal.bounce_buf.orig_iovcnt,
1816 8 : bdev_io->internal.bounce_buf.iov.iov_base,
1817 8 : bdev_io->internal.bounce_buf.iov.iov_len);
1818 : }
1819 9 : }
1820 :
1821 25 : if (spdk_unlikely(rc == -ENOMEM)) {
1822 1 : bdev_queue_nomem_io_head(ch->shared_resource, bdev_io, BDEV_IO_RETRY_STATE_PUSH);
1823 1 : } else {
1824 24 : bdev_io_push_bounce_data_done(bdev_io, rc);
1825 : }
1826 27 : }
1827 :
1828 : static inline void
1829 26 : _bdev_io_push_bounce_data_buffer(struct spdk_bdev_io *bdev_io, bdev_copy_bounce_buffer_cpl cpl_cb)
1830 : {
1831 26 : bdev_io->internal.data_transfer_cpl = cpl_cb;
1832 26 : bdev_io_push_bounce_data(bdev_io);
1833 26 : }
1834 :
1835 : static void
1836 0 : bdev_io_get_iobuf_cb(struct spdk_iobuf_entry *iobuf, void *buf)
1837 : {
1838 : struct spdk_bdev_io *bdev_io;
1839 :
1840 0 : bdev_io = SPDK_CONTAINEROF(iobuf, struct spdk_bdev_io, internal.iobuf);
1841 0 : _bdev_io_set_buf(bdev_io, buf, bdev_io->internal.buf.len);
1842 0 : }
1843 :
1844 : static void
1845 42 : bdev_io_get_buf(struct spdk_bdev_io *bdev_io, uint64_t len)
1846 : {
1847 : struct spdk_bdev_mgmt_channel *mgmt_ch;
1848 : uint64_t max_len;
1849 : void *buf;
1850 :
1851 42 : assert(spdk_bdev_io_get_thread(bdev_io) == spdk_get_thread());
1852 42 : mgmt_ch = bdev_io->internal.ch->shared_resource->mgmt_ch;
1853 42 : max_len = bdev_io_get_max_buf_len(bdev_io, len);
1854 :
1855 42 : if (spdk_unlikely(max_len > mgmt_ch->iobuf.cache[0].large.bufsize)) {
1856 0 : SPDK_ERRLOG("Length %" PRIu64 " is larger than allowed\n", max_len);
1857 0 : bdev_io_get_buf_complete(bdev_io, false);
1858 0 : return;
1859 : }
1860 :
1861 42 : bdev_io->internal.buf.len = len;
1862 42 : buf = spdk_iobuf_get(&mgmt_ch->iobuf, max_len, &bdev_io->internal.iobuf,
1863 : bdev_io_get_iobuf_cb);
1864 42 : if (buf != NULL) {
1865 42 : _bdev_io_set_buf(bdev_io, buf, len);
1866 42 : }
1867 42 : }
1868 :
1869 : void
1870 56 : spdk_bdev_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb, uint64_t len)
1871 : {
1872 56 : struct spdk_bdev *bdev = bdev_io->bdev;
1873 : uint64_t alignment;
1874 :
1875 56 : assert(cb != NULL);
1876 56 : bdev_io->internal.get_buf_cb = cb;
1877 :
1878 56 : alignment = spdk_bdev_get_buf_align(bdev);
1879 :
1880 56 : if (_is_buf_allocated(bdev_io->u.bdev.iovs) &&
1881 40 : _are_iovs_aligned(bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, alignment)) {
1882 : /* Buffer already present and aligned */
1883 18 : cb(spdk_bdev_io_get_io_channel(bdev_io), bdev_io, true);
1884 18 : return;
1885 : }
1886 :
1887 38 : bdev_io_get_buf(bdev_io, len);
1888 56 : }
1889 :
1890 : static void
1891 4 : _bdev_memory_domain_get_io_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
1892 : bool success)
1893 : {
1894 4 : if (!success) {
1895 0 : SPDK_ERRLOG("Failed to get data buffer, completing IO\n");
1896 0 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
1897 0 : bdev_io_complete_unsubmitted(bdev_io);
1898 0 : return;
1899 : }
1900 :
1901 4 : if (bdev_io_needs_sequence_exec(bdev_io->internal.desc, bdev_io)) {
1902 0 : if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
1903 0 : bdev_io_exec_sequence(bdev_io, bdev_io_submit_sequence_cb);
1904 0 : return;
1905 : }
1906 : /* For reads we'll execute the sequence after the data is read, so, for now, only
1907 : * clear out accel_sequence pointer and submit the IO */
1908 0 : assert(bdev_io->type == SPDK_BDEV_IO_TYPE_READ);
1909 0 : bdev_io->u.bdev.accel_sequence = NULL;
1910 0 : }
1911 :
1912 4 : bdev_io_submit(bdev_io);
1913 4 : }
1914 :
1915 : static void
1916 4 : _bdev_memory_domain_io_get_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_buf_cb cb,
1917 : uint64_t len)
1918 : {
1919 4 : assert(cb != NULL);
1920 4 : bdev_io->internal.get_buf_cb = cb;
1921 :
1922 4 : bdev_io_get_buf(bdev_io, len);
1923 4 : }
1924 :
1925 :
1926 3 : SPDK_LOG_DEPRECATION_REGISTER(spdk_bdev_io_get_aux_buf,
1927 : "spdk_bdev_io_get_aux_buf is deprecated", "v25.01", 0);
1928 :
1929 : void
1930 0 : spdk_bdev_io_get_aux_buf(struct spdk_bdev_io *bdev_io, spdk_bdev_io_get_aux_buf_cb cb)
1931 : {
1932 0 : uint64_t len = bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen;
1933 :
1934 0 : SPDK_LOG_DEPRECATED(spdk_bdev_io_get_aux_buf);
1935 :
1936 0 : assert(cb != NULL);
1937 0 : assert(bdev_io->internal.get_aux_buf_cb == NULL);
1938 0 : bdev_io->internal.get_aux_buf_cb = cb;
1939 0 : bdev_io_get_buf(bdev_io, len);
1940 0 : }
1941 :
1942 : static int
1943 68 : bdev_module_get_max_ctx_size(void)
1944 : {
1945 : struct spdk_bdev_module *bdev_module;
1946 68 : int max_bdev_module_size = 0;
1947 :
1948 266 : TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, internal.tailq) {
1949 198 : if (bdev_module->get_ctx_size && bdev_module->get_ctx_size() > max_bdev_module_size) {
1950 67 : max_bdev_module_size = bdev_module->get_ctx_size();
1951 67 : }
1952 198 : }
1953 :
1954 68 : return max_bdev_module_size;
1955 : }
1956 :
1957 : static void
1958 0 : bdev_enable_histogram_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
1959 : {
1960 0 : if (!bdev->internal.histogram_enabled) {
1961 0 : return;
1962 : }
1963 :
1964 0 : spdk_json_write_object_begin(w);
1965 0 : spdk_json_write_named_string(w, "method", "bdev_enable_histogram");
1966 :
1967 0 : spdk_json_write_named_object_begin(w, "params");
1968 0 : spdk_json_write_named_string(w, "name", bdev->name);
1969 :
1970 0 : spdk_json_write_named_bool(w, "enable", bdev->internal.histogram_enabled);
1971 :
1972 0 : if (bdev->internal.histogram_io_type) {
1973 0 : spdk_json_write_named_string(w, "opc",
1974 0 : spdk_bdev_get_io_type_name(bdev->internal.histogram_io_type));
1975 0 : }
1976 :
1977 0 : spdk_json_write_object_end(w);
1978 :
1979 0 : spdk_json_write_object_end(w);
1980 0 : }
1981 :
1982 : static void
1983 0 : bdev_qos_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
1984 : {
1985 : int i;
1986 0 : struct spdk_bdev_qos *qos = bdev->internal.qos;
1987 : uint64_t limits[SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES];
1988 :
1989 0 : if (!qos) {
1990 0 : return;
1991 : }
1992 :
1993 0 : spdk_bdev_get_qos_rate_limits(bdev, limits);
1994 :
1995 0 : spdk_json_write_object_begin(w);
1996 0 : spdk_json_write_named_string(w, "method", "bdev_set_qos_limit");
1997 :
1998 0 : spdk_json_write_named_object_begin(w, "params");
1999 0 : spdk_json_write_named_string(w, "name", bdev->name);
2000 0 : for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
2001 0 : if (limits[i] > 0) {
2002 0 : spdk_json_write_named_uint64(w, qos_rpc_type[i], limits[i]);
2003 0 : }
2004 0 : }
2005 0 : spdk_json_write_object_end(w);
2006 :
2007 0 : spdk_json_write_object_end(w);
2008 0 : }
2009 :
2010 : void
2011 0 : spdk_bdev_subsystem_config_json(struct spdk_json_write_ctx *w)
2012 : {
2013 : struct spdk_bdev_module *bdev_module;
2014 : struct spdk_bdev *bdev;
2015 :
2016 0 : assert(w != NULL);
2017 :
2018 0 : spdk_json_write_array_begin(w);
2019 :
2020 0 : spdk_json_write_object_begin(w);
2021 0 : spdk_json_write_named_string(w, "method", "bdev_set_options");
2022 0 : spdk_json_write_named_object_begin(w, "params");
2023 0 : spdk_json_write_named_uint32(w, "bdev_io_pool_size", g_bdev_opts.bdev_io_pool_size);
2024 0 : spdk_json_write_named_uint32(w, "bdev_io_cache_size", g_bdev_opts.bdev_io_cache_size);
2025 0 : spdk_json_write_named_bool(w, "bdev_auto_examine", g_bdev_opts.bdev_auto_examine);
2026 0 : spdk_json_write_named_uint32(w, "iobuf_small_cache_size", g_bdev_opts.iobuf_small_cache_size);
2027 0 : spdk_json_write_named_uint32(w, "iobuf_large_cache_size", g_bdev_opts.iobuf_large_cache_size);
2028 0 : spdk_json_write_object_end(w);
2029 0 : spdk_json_write_object_end(w);
2030 :
2031 0 : bdev_examine_allowlist_config_json(w);
2032 :
2033 0 : TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, internal.tailq) {
2034 0 : if (bdev_module->config_json) {
2035 0 : bdev_module->config_json(w);
2036 0 : }
2037 0 : }
2038 :
2039 0 : spdk_spin_lock(&g_bdev_mgr.spinlock);
2040 :
2041 0 : TAILQ_FOREACH(bdev, &g_bdev_mgr.bdevs, internal.link) {
2042 0 : if (bdev->fn_table->write_config_json) {
2043 0 : bdev->fn_table->write_config_json(bdev, w);
2044 0 : }
2045 :
2046 0 : bdev_qos_config_json(bdev, w);
2047 0 : bdev_enable_histogram_config_json(bdev, w);
2048 0 : }
2049 :
2050 0 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
2051 :
2052 : /* This has to be last RPC in array to make sure all bdevs finished examine */
2053 0 : spdk_json_write_object_begin(w);
2054 0 : spdk_json_write_named_string(w, "method", "bdev_wait_for_examine");
2055 0 : spdk_json_write_object_end(w);
2056 :
2057 0 : spdk_json_write_array_end(w);
2058 0 : }
2059 :
2060 : static void
2061 72 : bdev_mgmt_channel_destroy(void *io_device, void *ctx_buf)
2062 : {
2063 72 : struct spdk_bdev_mgmt_channel *ch = ctx_buf;
2064 : struct spdk_bdev_io *bdev_io;
2065 :
2066 72 : spdk_iobuf_channel_fini(&ch->iobuf);
2067 :
2068 10226 : while (!STAILQ_EMPTY(&ch->per_thread_cache)) {
2069 10154 : bdev_io = STAILQ_FIRST(&ch->per_thread_cache);
2070 10154 : STAILQ_REMOVE_HEAD(&ch->per_thread_cache, internal.buf_link);
2071 10154 : ch->per_thread_cache_count--;
2072 10154 : spdk_mempool_put(g_bdev_mgr.bdev_io_pool, (void *)bdev_io);
2073 : }
2074 :
2075 72 : assert(ch->per_thread_cache_count == 0);
2076 72 : }
2077 :
2078 : static int
2079 72 : bdev_mgmt_channel_create(void *io_device, void *ctx_buf)
2080 : {
2081 72 : struct spdk_bdev_mgmt_channel *ch = ctx_buf;
2082 : struct spdk_bdev_io *bdev_io;
2083 : uint32_t i;
2084 : int rc;
2085 :
2086 144 : rc = spdk_iobuf_channel_init(&ch->iobuf, "bdev",
2087 72 : g_bdev_opts.iobuf_small_cache_size,
2088 72 : g_bdev_opts.iobuf_large_cache_size);
2089 72 : if (rc != 0) {
2090 0 : SPDK_ERRLOG("Failed to create iobuf channel: %s\n", spdk_strerror(-rc));
2091 0 : return -1;
2092 : }
2093 :
2094 72 : STAILQ_INIT(&ch->per_thread_cache);
2095 72 : ch->bdev_io_cache_size = g_bdev_opts.bdev_io_cache_size;
2096 :
2097 : /* Pre-populate bdev_io cache to ensure this thread cannot be starved. */
2098 72 : ch->per_thread_cache_count = 0;
2099 10226 : for (i = 0; i < ch->bdev_io_cache_size; i++) {
2100 10154 : bdev_io = spdk_mempool_get(g_bdev_mgr.bdev_io_pool);
2101 10154 : if (bdev_io == NULL) {
2102 0 : SPDK_ERRLOG("You need to increase bdev_io_pool_size using bdev_set_options RPC.\n");
2103 0 : assert(false);
2104 : bdev_mgmt_channel_destroy(io_device, ctx_buf);
2105 : return -1;
2106 : }
2107 10154 : ch->per_thread_cache_count++;
2108 10154 : STAILQ_INSERT_HEAD(&ch->per_thread_cache, bdev_io, internal.buf_link);
2109 10154 : }
2110 :
2111 72 : TAILQ_INIT(&ch->shared_resources);
2112 72 : TAILQ_INIT(&ch->io_wait_queue);
2113 :
2114 72 : return 0;
2115 72 : }
2116 :
2117 : static void
2118 68 : bdev_init_complete(int rc)
2119 : {
2120 68 : spdk_bdev_init_cb cb_fn = g_init_cb_fn;
2121 68 : void *cb_arg = g_init_cb_arg;
2122 : struct spdk_bdev_module *m;
2123 :
2124 68 : g_bdev_mgr.init_complete = true;
2125 68 : g_init_cb_fn = NULL;
2126 68 : g_init_cb_arg = NULL;
2127 :
2128 : /*
2129 : * For modules that need to know when subsystem init is complete,
2130 : * inform them now.
2131 : */
2132 68 : if (rc == 0) {
2133 266 : TAILQ_FOREACH(m, &g_bdev_mgr.bdev_modules, internal.tailq) {
2134 198 : if (m->init_complete) {
2135 24 : m->init_complete();
2136 24 : }
2137 198 : }
2138 68 : }
2139 :
2140 68 : cb_fn(cb_arg, rc);
2141 68 : }
2142 :
2143 : static bool
2144 270 : bdev_module_all_actions_completed(void)
2145 : {
2146 : struct spdk_bdev_module *m;
2147 :
2148 1073 : TAILQ_FOREACH(m, &g_bdev_mgr.bdev_modules, internal.tailq) {
2149 803 : if (m->internal.action_in_progress > 0) {
2150 0 : return false;
2151 : }
2152 803 : }
2153 270 : return true;
2154 270 : }
2155 :
2156 : static void
2157 629 : bdev_module_action_complete(void)
2158 : {
2159 : /*
2160 : * Don't finish bdev subsystem initialization if
2161 : * module pre-initialization is still in progress, or
2162 : * the subsystem been already initialized.
2163 : */
2164 629 : if (!g_bdev_mgr.module_init_complete || g_bdev_mgr.init_complete) {
2165 561 : return;
2166 : }
2167 :
2168 : /*
2169 : * Check all bdev modules for inits/examinations in progress. If any
2170 : * exist, return immediately since we cannot finish bdev subsystem
2171 : * initialization until all are completed.
2172 : */
2173 68 : if (!bdev_module_all_actions_completed()) {
2174 0 : return;
2175 : }
2176 :
2177 : /*
2178 : * Modules already finished initialization - now that all
2179 : * the bdev modules have finished their asynchronous I/O
2180 : * processing, the entire bdev layer can be marked as complete.
2181 : */
2182 68 : bdev_init_complete(0);
2183 629 : }
2184 :
2185 : static void
2186 561 : bdev_module_action_done(struct spdk_bdev_module *module)
2187 : {
2188 561 : spdk_spin_lock(&module->internal.spinlock);
2189 561 : assert(module->internal.action_in_progress > 0);
2190 561 : module->internal.action_in_progress--;
2191 561 : spdk_spin_unlock(&module->internal.spinlock);
2192 561 : bdev_module_action_complete();
2193 561 : }
2194 :
2195 : void
2196 68 : spdk_bdev_module_init_done(struct spdk_bdev_module *module)
2197 : {
2198 68 : assert(module->async_init);
2199 68 : bdev_module_action_done(module);
2200 68 : }
2201 :
2202 : void
2203 493 : spdk_bdev_module_examine_done(struct spdk_bdev_module *module)
2204 : {
2205 493 : bdev_module_action_done(module);
2206 493 : }
2207 :
2208 : /** The last initialized bdev module */
2209 : static struct spdk_bdev_module *g_resume_bdev_module = NULL;
2210 :
2211 : static void
2212 0 : bdev_init_failed(void *cb_arg)
2213 : {
2214 0 : struct spdk_bdev_module *module = cb_arg;
2215 :
2216 0 : spdk_spin_lock(&module->internal.spinlock);
2217 0 : assert(module->internal.action_in_progress > 0);
2218 0 : module->internal.action_in_progress--;
2219 0 : spdk_spin_unlock(&module->internal.spinlock);
2220 0 : bdev_init_complete(-1);
2221 0 : }
2222 :
2223 : static int
2224 68 : bdev_modules_init(void)
2225 : {
2226 : struct spdk_bdev_module *module;
2227 68 : int rc = 0;
2228 :
2229 266 : TAILQ_FOREACH(module, &g_bdev_mgr.bdev_modules, internal.tailq) {
2230 198 : g_resume_bdev_module = module;
2231 198 : if (module->async_init) {
2232 68 : spdk_spin_lock(&module->internal.spinlock);
2233 68 : module->internal.action_in_progress = 1;
2234 68 : spdk_spin_unlock(&module->internal.spinlock);
2235 68 : }
2236 198 : rc = module->module_init();
2237 198 : if (rc != 0) {
2238 : /* Bump action_in_progress to prevent other modules from completion of modules_init
2239 : * Send message to defer application shutdown until resources are cleaned up */
2240 0 : spdk_spin_lock(&module->internal.spinlock);
2241 0 : module->internal.action_in_progress = 1;
2242 0 : spdk_spin_unlock(&module->internal.spinlock);
2243 0 : spdk_thread_send_msg(spdk_get_thread(), bdev_init_failed, module);
2244 0 : return rc;
2245 : }
2246 198 : }
2247 :
2248 68 : g_resume_bdev_module = NULL;
2249 68 : return 0;
2250 68 : }
2251 :
2252 : void
2253 68 : spdk_bdev_initialize(spdk_bdev_init_cb cb_fn, void *cb_arg)
2254 : {
2255 68 : int rc = 0;
2256 : char mempool_name[32];
2257 :
2258 68 : assert(cb_fn != NULL);
2259 :
2260 68 : g_init_cb_fn = cb_fn;
2261 68 : g_init_cb_arg = cb_arg;
2262 :
2263 68 : spdk_notify_type_register("bdev_register");
2264 68 : spdk_notify_type_register("bdev_unregister");
2265 :
2266 68 : snprintf(mempool_name, sizeof(mempool_name), "bdev_io_%d", getpid());
2267 :
2268 68 : rc = spdk_iobuf_register_module("bdev");
2269 68 : if (rc != 0) {
2270 0 : SPDK_ERRLOG("could not register bdev iobuf module: %s\n", spdk_strerror(-rc));
2271 0 : bdev_init_complete(-1);
2272 0 : return;
2273 : }
2274 :
2275 136 : g_bdev_mgr.bdev_io_pool = spdk_mempool_create(mempool_name,
2276 68 : g_bdev_opts.bdev_io_pool_size,
2277 68 : sizeof(struct spdk_bdev_io) +
2278 68 : bdev_module_get_max_ctx_size(),
2279 : 0,
2280 : SPDK_ENV_NUMA_ID_ANY);
2281 :
2282 68 : if (g_bdev_mgr.bdev_io_pool == NULL) {
2283 0 : SPDK_ERRLOG("could not allocate spdk_bdev_io pool\n");
2284 0 : bdev_init_complete(-1);
2285 0 : return;
2286 : }
2287 :
2288 68 : g_bdev_mgr.zero_buffer = spdk_zmalloc(ZERO_BUFFER_SIZE, ZERO_BUFFER_SIZE,
2289 : NULL, SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
2290 68 : if (!g_bdev_mgr.zero_buffer) {
2291 0 : SPDK_ERRLOG("create bdev zero buffer failed\n");
2292 0 : bdev_init_complete(-1);
2293 0 : return;
2294 : }
2295 :
2296 : #ifdef SPDK_CONFIG_VTUNE
2297 : g_bdev_mgr.domain = __itt_domain_create("spdk_bdev");
2298 : #endif
2299 :
2300 68 : spdk_io_device_register(&g_bdev_mgr, bdev_mgmt_channel_create,
2301 : bdev_mgmt_channel_destroy,
2302 : sizeof(struct spdk_bdev_mgmt_channel),
2303 : "bdev_mgr");
2304 :
2305 68 : rc = bdev_modules_init();
2306 68 : g_bdev_mgr.module_init_complete = true;
2307 68 : if (rc != 0) {
2308 0 : SPDK_ERRLOG("bdev modules init failed\n");
2309 0 : return;
2310 : }
2311 :
2312 68 : bdev_module_action_complete();
2313 68 : }
2314 :
2315 : static void
2316 68 : bdev_mgr_unregister_cb(void *io_device)
2317 : {
2318 68 : spdk_bdev_fini_cb cb_fn = g_fini_cb_fn;
2319 :
2320 68 : if (g_bdev_mgr.bdev_io_pool) {
2321 68 : if (spdk_mempool_count(g_bdev_mgr.bdev_io_pool) != g_bdev_opts.bdev_io_pool_size) {
2322 0 : SPDK_ERRLOG("bdev IO pool count is %zu but should be %u\n",
2323 : spdk_mempool_count(g_bdev_mgr.bdev_io_pool),
2324 : g_bdev_opts.bdev_io_pool_size);
2325 0 : }
2326 :
2327 68 : spdk_mempool_free(g_bdev_mgr.bdev_io_pool);
2328 68 : }
2329 :
2330 68 : spdk_free(g_bdev_mgr.zero_buffer);
2331 :
2332 68 : bdev_examine_allowlist_free();
2333 :
2334 68 : cb_fn(g_fini_cb_arg);
2335 68 : g_fini_cb_fn = NULL;
2336 68 : g_fini_cb_arg = NULL;
2337 68 : g_bdev_mgr.init_complete = false;
2338 68 : g_bdev_mgr.module_init_complete = false;
2339 68 : }
2340 :
2341 : static void
2342 68 : bdev_module_fini_iter(void *arg)
2343 : {
2344 : struct spdk_bdev_module *bdev_module;
2345 :
2346 : /* FIXME: Handling initialization failures is broken now,
2347 : * so we won't even try cleaning up after successfully
2348 : * initialized modules. if module_init_complete is false,
2349 : * just call spdk_bdev_mgr_unregister_cb
2350 : */
2351 68 : if (!g_bdev_mgr.module_init_complete) {
2352 0 : bdev_mgr_unregister_cb(NULL);
2353 0 : return;
2354 : }
2355 :
2356 : /* Start iterating from the last touched module */
2357 68 : if (!g_resume_bdev_module) {
2358 68 : bdev_module = TAILQ_LAST(&g_bdev_mgr.bdev_modules, bdev_module_list);
2359 68 : } else {
2360 0 : bdev_module = TAILQ_PREV(g_resume_bdev_module, bdev_module_list,
2361 : internal.tailq);
2362 : }
2363 :
2364 266 : while (bdev_module) {
2365 198 : if (bdev_module->async_fini) {
2366 : /* Save our place so we can resume later. We must
2367 : * save the variable here, before calling module_fini()
2368 : * below, because in some cases the module may immediately
2369 : * call spdk_bdev_module_fini_done() and re-enter
2370 : * this function to continue iterating. */
2371 0 : g_resume_bdev_module = bdev_module;
2372 0 : }
2373 :
2374 198 : if (bdev_module->module_fini) {
2375 198 : bdev_module->module_fini();
2376 198 : }
2377 :
2378 198 : if (bdev_module->async_fini) {
2379 0 : return;
2380 : }
2381 :
2382 198 : bdev_module = TAILQ_PREV(bdev_module, bdev_module_list,
2383 : internal.tailq);
2384 : }
2385 :
2386 68 : g_resume_bdev_module = NULL;
2387 68 : spdk_io_device_unregister(&g_bdev_mgr, bdev_mgr_unregister_cb);
2388 68 : }
2389 :
2390 : void
2391 0 : spdk_bdev_module_fini_done(void)
2392 : {
2393 0 : if (spdk_get_thread() != g_fini_thread) {
2394 0 : spdk_thread_send_msg(g_fini_thread, bdev_module_fini_iter, NULL);
2395 0 : } else {
2396 0 : bdev_module_fini_iter(NULL);
2397 : }
2398 0 : }
2399 :
2400 : static void
2401 68 : bdev_finish_unregister_bdevs_iter(void *cb_arg, int bdeverrno)
2402 : {
2403 68 : struct spdk_bdev *bdev = cb_arg;
2404 :
2405 68 : if (bdeverrno && bdev) {
2406 0 : SPDK_WARNLOG("Unable to unregister bdev '%s' during spdk_bdev_finish()\n",
2407 : bdev->name);
2408 :
2409 : /*
2410 : * Since the call to spdk_bdev_unregister() failed, we have no way to free this
2411 : * bdev; try to continue by manually removing this bdev from the list and continue
2412 : * with the next bdev in the list.
2413 : */
2414 0 : TAILQ_REMOVE(&g_bdev_mgr.bdevs, bdev, internal.link);
2415 0 : }
2416 :
2417 68 : if (TAILQ_EMPTY(&g_bdev_mgr.bdevs)) {
2418 68 : SPDK_DEBUGLOG(bdev, "Done unregistering bdevs\n");
2419 : /*
2420 : * Bdev module finish need to be deferred as we might be in the middle of some context
2421 : * (like bdev part free) that will use this bdev (or private bdev driver ctx data)
2422 : * after returning.
2423 : */
2424 68 : spdk_thread_send_msg(spdk_get_thread(), bdev_module_fini_iter, NULL);
2425 68 : return;
2426 : }
2427 :
2428 : /*
2429 : * Unregister last unclaimed bdev in the list, to ensure that bdev subsystem
2430 : * shutdown proceeds top-down. The goal is to give virtual bdevs an opportunity
2431 : * to detect clean shutdown as opposed to run-time hot removal of the underlying
2432 : * base bdevs.
2433 : *
2434 : * Also, walk the list in the reverse order.
2435 : */
2436 0 : for (bdev = TAILQ_LAST(&g_bdev_mgr.bdevs, spdk_bdev_list);
2437 0 : bdev; bdev = TAILQ_PREV(bdev, spdk_bdev_list, internal.link)) {
2438 0 : spdk_spin_lock(&bdev->internal.spinlock);
2439 0 : if (bdev->internal.claim_type != SPDK_BDEV_CLAIM_NONE) {
2440 0 : LOG_ALREADY_CLAIMED_DEBUG("claimed, skipping", bdev);
2441 0 : spdk_spin_unlock(&bdev->internal.spinlock);
2442 0 : continue;
2443 : }
2444 0 : spdk_spin_unlock(&bdev->internal.spinlock);
2445 :
2446 0 : SPDK_DEBUGLOG(bdev, "Unregistering bdev '%s'\n", bdev->name);
2447 0 : spdk_bdev_unregister(bdev, bdev_finish_unregister_bdevs_iter, bdev);
2448 0 : return;
2449 : }
2450 :
2451 : /*
2452 : * If any bdev fails to unclaim underlying bdev properly, we may face the
2453 : * case of bdev list consisting of claimed bdevs only (if claims are managed
2454 : * correctly, this would mean there's a loop in the claims graph which is
2455 : * clearly impossible). Warn and unregister last bdev on the list then.
2456 : */
2457 0 : for (bdev = TAILQ_LAST(&g_bdev_mgr.bdevs, spdk_bdev_list);
2458 0 : bdev; bdev = TAILQ_PREV(bdev, spdk_bdev_list, internal.link)) {
2459 0 : SPDK_WARNLOG("Unregistering claimed bdev '%s'!\n", bdev->name);
2460 0 : spdk_bdev_unregister(bdev, bdev_finish_unregister_bdevs_iter, bdev);
2461 0 : return;
2462 : }
2463 68 : }
2464 :
2465 : static void
2466 68 : bdev_module_fini_start_iter(void *arg)
2467 : {
2468 : struct spdk_bdev_module *bdev_module;
2469 :
2470 68 : if (!g_resume_bdev_module) {
2471 68 : bdev_module = TAILQ_LAST(&g_bdev_mgr.bdev_modules, bdev_module_list);
2472 68 : } else {
2473 0 : bdev_module = TAILQ_PREV(g_resume_bdev_module, bdev_module_list, internal.tailq);
2474 : }
2475 :
2476 266 : while (bdev_module) {
2477 198 : if (bdev_module->async_fini_start) {
2478 : /* Save our place so we can resume later. We must
2479 : * save the variable here, before calling fini_start()
2480 : * below, because in some cases the module may immediately
2481 : * call spdk_bdev_module_fini_start_done() and re-enter
2482 : * this function to continue iterating. */
2483 0 : g_resume_bdev_module = bdev_module;
2484 0 : }
2485 :
2486 198 : if (bdev_module->fini_start) {
2487 24 : bdev_module->fini_start();
2488 24 : }
2489 :
2490 198 : if (bdev_module->async_fini_start) {
2491 0 : return;
2492 : }
2493 :
2494 198 : bdev_module = TAILQ_PREV(bdev_module, bdev_module_list, internal.tailq);
2495 : }
2496 :
2497 68 : g_resume_bdev_module = NULL;
2498 :
2499 68 : bdev_finish_unregister_bdevs_iter(NULL, 0);
2500 68 : }
2501 :
2502 : void
2503 0 : spdk_bdev_module_fini_start_done(void)
2504 : {
2505 0 : if (spdk_get_thread() != g_fini_thread) {
2506 0 : spdk_thread_send_msg(g_fini_thread, bdev_module_fini_start_iter, NULL);
2507 0 : } else {
2508 0 : bdev_module_fini_start_iter(NULL);
2509 : }
2510 0 : }
2511 :
2512 : static void
2513 68 : bdev_finish_wait_for_examine_done(void *cb_arg)
2514 : {
2515 68 : bdev_module_fini_start_iter(NULL);
2516 68 : }
2517 :
2518 : static void bdev_open_async_fini(void);
2519 :
2520 : void
2521 68 : spdk_bdev_finish(spdk_bdev_fini_cb cb_fn, void *cb_arg)
2522 : {
2523 : int rc;
2524 :
2525 68 : assert(cb_fn != NULL);
2526 :
2527 68 : g_fini_thread = spdk_get_thread();
2528 :
2529 68 : g_fini_cb_fn = cb_fn;
2530 68 : g_fini_cb_arg = cb_arg;
2531 :
2532 68 : bdev_open_async_fini();
2533 :
2534 68 : rc = spdk_bdev_wait_for_examine(bdev_finish_wait_for_examine_done, NULL);
2535 68 : if (rc != 0) {
2536 0 : SPDK_ERRLOG("wait_for_examine failed: %s\n", spdk_strerror(-rc));
2537 0 : bdev_finish_wait_for_examine_done(NULL);
2538 0 : }
2539 68 : }
2540 :
2541 : struct spdk_bdev_io *
2542 699 : bdev_channel_get_io(struct spdk_bdev_channel *channel)
2543 : {
2544 699 : struct spdk_bdev_mgmt_channel *ch = channel->shared_resource->mgmt_ch;
2545 : struct spdk_bdev_io *bdev_io;
2546 :
2547 699 : if (ch->per_thread_cache_count > 0) {
2548 639 : bdev_io = STAILQ_FIRST(&ch->per_thread_cache);
2549 639 : STAILQ_REMOVE_HEAD(&ch->per_thread_cache, internal.buf_link);
2550 639 : ch->per_thread_cache_count--;
2551 699 : } else if (spdk_unlikely(!TAILQ_EMPTY(&ch->io_wait_queue))) {
2552 : /*
2553 : * Don't try to look for bdev_ios in the global pool if there are
2554 : * waiters on bdev_ios - we don't want this caller to jump the line.
2555 : */
2556 0 : bdev_io = NULL;
2557 0 : } else {
2558 60 : bdev_io = spdk_mempool_get(g_bdev_mgr.bdev_io_pool);
2559 : }
2560 :
2561 699 : return bdev_io;
2562 : }
2563 :
2564 : void
2565 693 : spdk_bdev_free_io(struct spdk_bdev_io *bdev_io)
2566 : {
2567 : struct spdk_bdev_mgmt_channel *ch;
2568 :
2569 693 : assert(bdev_io != NULL);
2570 693 : assert(bdev_io->internal.status != SPDK_BDEV_IO_STATUS_PENDING);
2571 :
2572 693 : ch = bdev_io->internal.ch->shared_resource->mgmt_ch;
2573 :
2574 693 : if (bdev_io->internal.f.has_buf) {
2575 16 : bdev_io_put_buf(bdev_io);
2576 16 : }
2577 :
2578 693 : if (ch->per_thread_cache_count < ch->bdev_io_cache_size) {
2579 639 : ch->per_thread_cache_count++;
2580 639 : STAILQ_INSERT_HEAD(&ch->per_thread_cache, bdev_io, internal.buf_link);
2581 643 : while (ch->per_thread_cache_count > 0 && !TAILQ_EMPTY(&ch->io_wait_queue)) {
2582 : struct spdk_bdev_io_wait_entry *entry;
2583 :
2584 4 : entry = TAILQ_FIRST(&ch->io_wait_queue);
2585 4 : TAILQ_REMOVE(&ch->io_wait_queue, entry, link);
2586 4 : entry->cb_fn(entry->cb_arg);
2587 : }
2588 639 : } else {
2589 : /* We should never have a full cache with entries on the io wait queue. */
2590 54 : assert(TAILQ_EMPTY(&ch->io_wait_queue));
2591 54 : spdk_mempool_put(g_bdev_mgr.bdev_io_pool, (void *)bdev_io);
2592 : }
2593 693 : }
2594 :
2595 : static bool
2596 72 : bdev_qos_is_iops_rate_limit(enum spdk_bdev_qos_rate_limit_type limit)
2597 : {
2598 72 : assert(limit != SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES);
2599 :
2600 72 : switch (limit) {
2601 : case SPDK_BDEV_QOS_RW_IOPS_RATE_LIMIT:
2602 18 : return true;
2603 : case SPDK_BDEV_QOS_RW_BPS_RATE_LIMIT:
2604 : case SPDK_BDEV_QOS_R_BPS_RATE_LIMIT:
2605 : case SPDK_BDEV_QOS_W_BPS_RATE_LIMIT:
2606 54 : return false;
2607 0 : case SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES:
2608 : default:
2609 0 : return false;
2610 : }
2611 72 : }
2612 :
2613 : static bool
2614 25 : bdev_qos_io_to_limit(struct spdk_bdev_io *bdev_io)
2615 : {
2616 25 : switch (bdev_io->type) {
2617 : case SPDK_BDEV_IO_TYPE_NVME_IO:
2618 : case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
2619 : case SPDK_BDEV_IO_TYPE_READ:
2620 : case SPDK_BDEV_IO_TYPE_WRITE:
2621 23 : return true;
2622 : case SPDK_BDEV_IO_TYPE_ZCOPY:
2623 0 : if (bdev_io->u.bdev.zcopy.start) {
2624 0 : return true;
2625 : } else {
2626 0 : return false;
2627 : }
2628 : default:
2629 2 : return false;
2630 : }
2631 25 : }
2632 :
2633 : static bool
2634 33 : bdev_is_read_io(struct spdk_bdev_io *bdev_io)
2635 : {
2636 33 : switch (bdev_io->type) {
2637 : case SPDK_BDEV_IO_TYPE_NVME_IO:
2638 : case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
2639 : /* Bit 1 (0x2) set for read operation */
2640 0 : if (bdev_io->u.nvme_passthru.cmd.opc & SPDK_NVME_OPC_READ) {
2641 0 : return true;
2642 : } else {
2643 0 : return false;
2644 : }
2645 : case SPDK_BDEV_IO_TYPE_READ:
2646 30 : return true;
2647 : case SPDK_BDEV_IO_TYPE_ZCOPY:
2648 : /* Populate to read from disk */
2649 0 : if (bdev_io->u.bdev.zcopy.populate) {
2650 0 : return true;
2651 : } else {
2652 0 : return false;
2653 : }
2654 : default:
2655 3 : return false;
2656 : }
2657 33 : }
2658 :
2659 : static uint64_t
2660 43 : bdev_get_io_size_in_byte(struct spdk_bdev_io *bdev_io)
2661 : {
2662 43 : uint32_t blocklen = bdev_io_get_block_size(bdev_io);
2663 :
2664 43 : switch (bdev_io->type) {
2665 : case SPDK_BDEV_IO_TYPE_NVME_IO:
2666 : case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
2667 0 : return bdev_io->u.nvme_passthru.nbytes;
2668 : case SPDK_BDEV_IO_TYPE_READ:
2669 : case SPDK_BDEV_IO_TYPE_WRITE:
2670 43 : return bdev_io->u.bdev.num_blocks * blocklen;
2671 : case SPDK_BDEV_IO_TYPE_ZCOPY:
2672 : /* Track the data in the start phase only */
2673 0 : if (bdev_io->u.bdev.zcopy.start) {
2674 0 : return bdev_io->u.bdev.num_blocks * blocklen;
2675 : } else {
2676 0 : return 0;
2677 : }
2678 : default:
2679 0 : return 0;
2680 : }
2681 43 : }
2682 :
2683 : static inline bool
2684 64 : bdev_qos_rw_queue_io(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io, uint64_t delta)
2685 : {
2686 : int64_t remaining_this_timeslice;
2687 :
2688 64 : if (!limit->max_per_timeslice) {
2689 : /* The QoS is disabled */
2690 0 : return false;
2691 : }
2692 :
2693 64 : remaining_this_timeslice = __atomic_sub_fetch(&limit->remaining_this_timeslice, delta,
2694 : __ATOMIC_RELAXED);
2695 64 : if (remaining_this_timeslice + (int64_t)delta > 0) {
2696 : /* There was still a quota for this delta -> the IO shouldn't be queued
2697 : *
2698 : * We allow a slight quota overrun here so an IO bigger than the per-timeslice
2699 : * quota can be allowed once a while. Such overrun then taken into account in
2700 : * the QoS poller, where the next timeslice quota is calculated.
2701 : */
2702 59 : return false;
2703 : }
2704 :
2705 : /* There was no quota for this delta -> the IO should be queued
2706 : * The remaining_this_timeslice must be rewinded so it reflects the real
2707 : * amount of IOs or bytes allowed.
2708 : */
2709 5 : __atomic_add_fetch(
2710 5 : &limit->remaining_this_timeslice, delta, __ATOMIC_RELAXED);
2711 5 : return true;
2712 64 : }
2713 :
2714 : static inline void
2715 5 : bdev_qos_rw_rewind_io(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io, uint64_t delta)
2716 : {
2717 5 : __atomic_add_fetch(&limit->remaining_this_timeslice, delta, __ATOMIC_RELAXED);
2718 5 : }
2719 :
2720 : static bool
2721 23 : bdev_qos_rw_iops_queue(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
2722 : {
2723 23 : return bdev_qos_rw_queue_io(limit, io, 1);
2724 : }
2725 :
2726 : static void
2727 3 : bdev_qos_rw_iops_rewind_quota(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
2728 : {
2729 3 : bdev_qos_rw_rewind_io(limit, io, 1);
2730 3 : }
2731 :
2732 : static bool
2733 41 : bdev_qos_rw_bps_queue(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
2734 : {
2735 41 : return bdev_qos_rw_queue_io(limit, io, bdev_get_io_size_in_byte(io));
2736 : }
2737 :
2738 : static void
2739 2 : bdev_qos_rw_bps_rewind_quota(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
2740 : {
2741 2 : bdev_qos_rw_rewind_io(limit, io, bdev_get_io_size_in_byte(io));
2742 2 : }
2743 :
2744 : static bool
2745 19 : bdev_qos_r_bps_queue(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
2746 : {
2747 19 : if (bdev_is_read_io(io) == false) {
2748 1 : return false;
2749 : }
2750 :
2751 18 : return bdev_qos_rw_bps_queue(limit, io);
2752 19 : }
2753 :
2754 : static void
2755 0 : bdev_qos_r_bps_rewind_quota(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
2756 : {
2757 0 : if (bdev_is_read_io(io) != false) {
2758 0 : bdev_qos_rw_rewind_io(limit, io, bdev_get_io_size_in_byte(io));
2759 0 : }
2760 0 : }
2761 :
2762 : static bool
2763 14 : bdev_qos_w_bps_queue(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
2764 : {
2765 14 : if (bdev_is_read_io(io) == true) {
2766 12 : return false;
2767 : }
2768 :
2769 2 : return bdev_qos_rw_bps_queue(limit, io);
2770 14 : }
2771 :
2772 : static void
2773 0 : bdev_qos_w_bps_rewind_quota(struct spdk_bdev_qos_limit *limit, struct spdk_bdev_io *io)
2774 : {
2775 0 : if (bdev_is_read_io(io) != true) {
2776 0 : bdev_qos_rw_rewind_io(limit, io, bdev_get_io_size_in_byte(io));
2777 0 : }
2778 0 : }
2779 :
2780 : static void
2781 10 : bdev_qos_set_ops(struct spdk_bdev_qos *qos)
2782 : {
2783 : int i;
2784 :
2785 50 : for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
2786 40 : if (qos->rate_limits[i].limit == SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) {
2787 15 : qos->rate_limits[i].queue_io = NULL;
2788 15 : continue;
2789 : }
2790 :
2791 25 : switch (i) {
2792 : case SPDK_BDEV_QOS_RW_IOPS_RATE_LIMIT:
2793 9 : qos->rate_limits[i].queue_io = bdev_qos_rw_iops_queue;
2794 9 : qos->rate_limits[i].rewind_quota = bdev_qos_rw_iops_rewind_quota;
2795 9 : break;
2796 : case SPDK_BDEV_QOS_RW_BPS_RATE_LIMIT:
2797 7 : qos->rate_limits[i].queue_io = bdev_qos_rw_bps_queue;
2798 7 : qos->rate_limits[i].rewind_quota = bdev_qos_rw_bps_rewind_quota;
2799 7 : break;
2800 : case SPDK_BDEV_QOS_R_BPS_RATE_LIMIT:
2801 5 : qos->rate_limits[i].queue_io = bdev_qos_r_bps_queue;
2802 5 : qos->rate_limits[i].rewind_quota = bdev_qos_r_bps_rewind_quota;
2803 5 : break;
2804 : case SPDK_BDEV_QOS_W_BPS_RATE_LIMIT:
2805 4 : qos->rate_limits[i].queue_io = bdev_qos_w_bps_queue;
2806 4 : qos->rate_limits[i].rewind_quota = bdev_qos_w_bps_rewind_quota;
2807 4 : break;
2808 : default:
2809 0 : break;
2810 : }
2811 25 : }
2812 10 : }
2813 :
2814 : static void
2815 6 : _bdev_io_complete_in_submit(struct spdk_bdev_channel *bdev_ch,
2816 : struct spdk_bdev_io *bdev_io,
2817 : enum spdk_bdev_io_status status)
2818 : {
2819 6 : bdev_io->internal.f.in_submit_request = true;
2820 6 : bdev_io_increment_outstanding(bdev_ch, bdev_ch->shared_resource);
2821 6 : spdk_bdev_io_complete(bdev_io, status);
2822 6 : bdev_io->internal.f.in_submit_request = false;
2823 6 : }
2824 :
2825 : static inline void
2826 574 : bdev_io_do_submit(struct spdk_bdev_channel *bdev_ch, struct spdk_bdev_io *bdev_io)
2827 : {
2828 574 : struct spdk_bdev *bdev = bdev_io->bdev;
2829 574 : struct spdk_io_channel *ch = bdev_ch->channel;
2830 574 : struct spdk_bdev_shared_resource *shared_resource = bdev_ch->shared_resource;
2831 :
2832 574 : if (spdk_unlikely(bdev_io->type == SPDK_BDEV_IO_TYPE_ABORT)) {
2833 16 : struct spdk_bdev_mgmt_channel *mgmt_channel = shared_resource->mgmt_ch;
2834 16 : struct spdk_bdev_io *bio_to_abort = bdev_io->u.abort.bio_to_abort;
2835 :
2836 16 : if (bdev_abort_queued_io(&shared_resource->nomem_io, bio_to_abort) ||
2837 16 : bdev_abort_buf_io(mgmt_channel, bio_to_abort)) {
2838 0 : _bdev_io_complete_in_submit(bdev_ch, bdev_io,
2839 : SPDK_BDEV_IO_STATUS_SUCCESS);
2840 0 : return;
2841 : }
2842 16 : }
2843 :
2844 574 : if (spdk_unlikely(bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE &&
2845 : bdev_io->bdev->split_on_write_unit &&
2846 : bdev_io->u.bdev.num_blocks < bdev_io->bdev->write_unit_size)) {
2847 4 : SPDK_ERRLOG("IO num_blocks %lu does not match the write_unit_size %u\n",
2848 : bdev_io->u.bdev.num_blocks, bdev_io->bdev->write_unit_size);
2849 4 : _bdev_io_complete_in_submit(bdev_ch, bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
2850 4 : return;
2851 : }
2852 :
2853 570 : if (spdk_likely(TAILQ_EMPTY(&shared_resource->nomem_io))) {
2854 527 : bdev_io_increment_outstanding(bdev_ch, shared_resource);
2855 527 : bdev_io->internal.f.in_submit_request = true;
2856 527 : bdev_submit_request(bdev, ch, bdev_io);
2857 527 : bdev_io->internal.f.in_submit_request = false;
2858 527 : } else {
2859 43 : bdev_queue_nomem_io_tail(shared_resource, bdev_io, BDEV_IO_RETRY_STATE_SUBMIT);
2860 43 : if (shared_resource->nomem_threshold == 0 && shared_resource->io_outstanding == 0) {
2861 : /* Special case when we have nomem IOs and no outstanding IOs which completions
2862 : * could trigger retry of queued IOs */
2863 0 : bdev_shared_ch_retry_io(shared_resource);
2864 0 : }
2865 : }
2866 574 : }
2867 :
2868 : static bool
2869 25 : bdev_qos_queue_io(struct spdk_bdev_qos *qos, struct spdk_bdev_io *bdev_io)
2870 : {
2871 : int i;
2872 :
2873 25 : if (bdev_qos_io_to_limit(bdev_io) == true) {
2874 100 : for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
2875 82 : if (!qos->rate_limits[i].queue_io) {
2876 5 : continue;
2877 : }
2878 :
2879 231 : if (qos->rate_limits[i].queue_io(&qos->rate_limits[i],
2880 154 : bdev_io) == true) {
2881 10 : for (i -= 1; i >= 0 ; i--) {
2882 5 : if (!qos->rate_limits[i].queue_io) {
2883 0 : continue;
2884 : }
2885 :
2886 5 : qos->rate_limits[i].rewind_quota(&qos->rate_limits[i], bdev_io);
2887 5 : }
2888 5 : return true;
2889 : }
2890 72 : }
2891 18 : }
2892 :
2893 20 : return false;
2894 25 : }
2895 :
2896 : static int
2897 27 : bdev_qos_io_submit(struct spdk_bdev_channel *ch, struct spdk_bdev_qos *qos)
2898 : {
2899 27 : struct spdk_bdev_io *bdev_io = NULL, *tmp = NULL;
2900 27 : int submitted_ios = 0;
2901 :
2902 52 : TAILQ_FOREACH_SAFE(bdev_io, &ch->qos_queued_io, internal.link, tmp) {
2903 25 : if (!bdev_qos_queue_io(qos, bdev_io)) {
2904 20 : TAILQ_REMOVE(&ch->qos_queued_io, bdev_io, internal.link);
2905 20 : bdev_io_do_submit(ch, bdev_io);
2906 :
2907 20 : submitted_ios++;
2908 20 : }
2909 25 : }
2910 :
2911 27 : return submitted_ios;
2912 : }
2913 :
2914 : static void
2915 2 : bdev_queue_io_wait_with_cb(struct spdk_bdev_io *bdev_io, spdk_bdev_io_wait_cb cb_fn)
2916 : {
2917 : int rc;
2918 :
2919 2 : bdev_io->internal.waitq_entry.bdev = bdev_io->bdev;
2920 2 : bdev_io->internal.waitq_entry.cb_fn = cb_fn;
2921 2 : bdev_io->internal.waitq_entry.cb_arg = bdev_io;
2922 4 : rc = spdk_bdev_queue_io_wait(bdev_io->bdev, spdk_io_channel_from_ctx(bdev_io->internal.ch),
2923 2 : &bdev_io->internal.waitq_entry);
2924 2 : if (rc != 0) {
2925 0 : SPDK_ERRLOG("Queue IO failed, rc=%d\n", rc);
2926 0 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
2927 0 : bdev_io->internal.cb(bdev_io, false, bdev_io->internal.caller_ctx);
2928 0 : }
2929 2 : }
2930 :
2931 : static bool
2932 621 : bdev_rw_should_split(struct spdk_bdev_io *bdev_io)
2933 : {
2934 : uint32_t io_boundary;
2935 621 : struct spdk_bdev *bdev = bdev_io->bdev;
2936 621 : uint32_t max_segment_size = bdev->max_segment_size;
2937 621 : uint32_t max_size = bdev->max_rw_size;
2938 621 : int max_segs = bdev->max_num_segments;
2939 :
2940 621 : if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE && bdev->split_on_write_unit) {
2941 24 : io_boundary = bdev->write_unit_size;
2942 621 : } else if (bdev->split_on_optimal_io_boundary) {
2943 168 : io_boundary = bdev->optimal_io_boundary;
2944 168 : } else {
2945 429 : io_boundary = 0;
2946 : }
2947 :
2948 621 : if (spdk_likely(!io_boundary && !max_segs && !max_segment_size && !max_size)) {
2949 243 : return false;
2950 : }
2951 :
2952 378 : if (io_boundary) {
2953 : uint64_t start_stripe, end_stripe;
2954 :
2955 192 : start_stripe = bdev_io->u.bdev.offset_blocks;
2956 192 : end_stripe = start_stripe + bdev_io->u.bdev.num_blocks - 1;
2957 : /* Avoid expensive div operations if possible. These spdk_u32 functions are very cheap. */
2958 192 : if (spdk_likely(spdk_u32_is_pow2(io_boundary))) {
2959 192 : start_stripe >>= spdk_u32log2(io_boundary);
2960 192 : end_stripe >>= spdk_u32log2(io_boundary);
2961 192 : } else {
2962 0 : start_stripe /= io_boundary;
2963 0 : end_stripe /= io_boundary;
2964 : }
2965 :
2966 192 : if (start_stripe != end_stripe) {
2967 75 : return true;
2968 : }
2969 117 : }
2970 :
2971 303 : if (max_segs) {
2972 150 : if (bdev_io->u.bdev.iovcnt > max_segs) {
2973 15 : return true;
2974 : }
2975 135 : }
2976 :
2977 288 : if (max_segment_size) {
2978 470 : for (int i = 0; i < bdev_io->u.bdev.iovcnt; i++) {
2979 346 : if (bdev_io->u.bdev.iovs[i].iov_len > max_segment_size) {
2980 12 : return true;
2981 : }
2982 334 : }
2983 124 : }
2984 :
2985 276 : if (max_size) {
2986 52 : if (bdev_io->u.bdev.num_blocks > max_size) {
2987 7 : return true;
2988 : }
2989 45 : }
2990 :
2991 269 : return false;
2992 621 : }
2993 :
2994 : static bool
2995 24 : bdev_unmap_should_split(struct spdk_bdev_io *bdev_io)
2996 : {
2997 : uint32_t num_unmap_segments;
2998 :
2999 24 : if (!bdev_io->bdev->max_unmap || !bdev_io->bdev->max_unmap_segments) {
3000 3 : return false;
3001 : }
3002 21 : num_unmap_segments = spdk_divide_round_up(bdev_io->u.bdev.num_blocks, bdev_io->bdev->max_unmap);
3003 21 : if (num_unmap_segments > bdev_io->bdev->max_unmap_segments) {
3004 4 : return true;
3005 : }
3006 :
3007 17 : return false;
3008 24 : }
3009 :
3010 : static bool
3011 37 : bdev_write_zeroes_should_split(struct spdk_bdev_io *bdev_io)
3012 : {
3013 37 : if (!bdev_io->bdev->max_write_zeroes) {
3014 4 : return false;
3015 : }
3016 :
3017 33 : if (bdev_io->u.bdev.num_blocks > bdev_io->bdev->max_write_zeroes) {
3018 10 : return true;
3019 : }
3020 :
3021 23 : return false;
3022 37 : }
3023 :
3024 : static bool
3025 30 : bdev_copy_should_split(struct spdk_bdev_io *bdev_io)
3026 : {
3027 30 : if (bdev_io->bdev->max_copy != 0 &&
3028 25 : bdev_io->u.bdev.num_blocks > bdev_io->bdev->max_copy) {
3029 6 : return true;
3030 : }
3031 :
3032 24 : return false;
3033 30 : }
3034 :
3035 : static bool
3036 794 : bdev_io_should_split(struct spdk_bdev_io *bdev_io)
3037 : {
3038 794 : switch (bdev_io->type) {
3039 : case SPDK_BDEV_IO_TYPE_READ:
3040 : case SPDK_BDEV_IO_TYPE_WRITE:
3041 621 : return bdev_rw_should_split(bdev_io);
3042 : case SPDK_BDEV_IO_TYPE_UNMAP:
3043 24 : return bdev_unmap_should_split(bdev_io);
3044 : case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
3045 37 : return bdev_write_zeroes_should_split(bdev_io);
3046 : case SPDK_BDEV_IO_TYPE_COPY:
3047 30 : return bdev_copy_should_split(bdev_io);
3048 : default:
3049 82 : return false;
3050 : }
3051 794 : }
3052 :
3053 : static uint32_t
3054 249 : _to_next_boundary(uint64_t offset, uint32_t boundary)
3055 : {
3056 249 : return (boundary - (offset % boundary));
3057 : }
3058 :
3059 : static void bdev_io_split_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg);
3060 :
3061 : static void _bdev_rw_split(void *_bdev_io);
3062 :
3063 : static void bdev_unmap_split(struct spdk_bdev_io *bdev_io);
3064 :
3065 : static void
3066 0 : _bdev_unmap_split(void *_bdev_io)
3067 : {
3068 0 : return bdev_unmap_split((struct spdk_bdev_io *)_bdev_io);
3069 : }
3070 :
3071 : static void bdev_write_zeroes_split(struct spdk_bdev_io *bdev_io);
3072 :
3073 : static void
3074 0 : _bdev_write_zeroes_split(void *_bdev_io)
3075 : {
3076 0 : return bdev_write_zeroes_split((struct spdk_bdev_io *)_bdev_io);
3077 : }
3078 :
3079 : static void bdev_copy_split(struct spdk_bdev_io *bdev_io);
3080 :
3081 : static void
3082 0 : _bdev_copy_split(void *_bdev_io)
3083 : {
3084 0 : return bdev_copy_split((struct spdk_bdev_io *)_bdev_io);
3085 : }
3086 :
3087 : static int
3088 305 : bdev_io_split_submit(struct spdk_bdev_io *bdev_io, struct iovec *iov, int iovcnt, void *md_buf,
3089 : uint64_t num_blocks, uint64_t *offset, uint64_t *remaining)
3090 : {
3091 : int rc;
3092 : uint64_t current_offset, current_remaining, current_src_offset;
3093 : spdk_bdev_io_wait_cb io_wait_fn;
3094 :
3095 305 : current_offset = *offset;
3096 305 : current_remaining = *remaining;
3097 :
3098 305 : assert(bdev_io->internal.f.split);
3099 :
3100 305 : bdev_io->internal.split.outstanding++;
3101 :
3102 305 : io_wait_fn = _bdev_rw_split;
3103 305 : switch (bdev_io->type) {
3104 : case SPDK_BDEV_IO_TYPE_READ:
3105 196 : assert(bdev_io->u.bdev.accel_sequence == NULL);
3106 392 : rc = bdev_readv_blocks_with_md(bdev_io->internal.desc,
3107 196 : spdk_io_channel_from_ctx(bdev_io->internal.ch),
3108 196 : iov, iovcnt, md_buf, current_offset,
3109 196 : num_blocks,
3110 196 : bdev_io_use_memory_domain(bdev_io) ? bdev_io->internal.memory_domain : NULL,
3111 196 : bdev_io_use_memory_domain(bdev_io) ? bdev_io->internal.memory_domain_ctx : NULL,
3112 : NULL,
3113 196 : bdev_io->u.bdev.dif_check_flags,
3114 196 : bdev_io_split_done, bdev_io);
3115 196 : break;
3116 : case SPDK_BDEV_IO_TYPE_WRITE:
3117 50 : assert(bdev_io->u.bdev.accel_sequence == NULL);
3118 100 : rc = bdev_writev_blocks_with_md(bdev_io->internal.desc,
3119 50 : spdk_io_channel_from_ctx(bdev_io->internal.ch),
3120 50 : iov, iovcnt, md_buf, current_offset,
3121 50 : num_blocks,
3122 50 : bdev_io_use_memory_domain(bdev_io) ? bdev_io->internal.memory_domain : NULL,
3123 50 : bdev_io_use_memory_domain(bdev_io) ? bdev_io->internal.memory_domain_ctx : NULL,
3124 : NULL,
3125 50 : bdev_io->u.bdev.dif_check_flags,
3126 50 : bdev_io->u.bdev.nvme_cdw12.raw,
3127 50 : bdev_io->u.bdev.nvme_cdw13.raw,
3128 50 : bdev_io_split_done, bdev_io);
3129 50 : break;
3130 : case SPDK_BDEV_IO_TYPE_UNMAP:
3131 17 : io_wait_fn = _bdev_unmap_split;
3132 34 : rc = spdk_bdev_unmap_blocks(bdev_io->internal.desc,
3133 17 : spdk_io_channel_from_ctx(bdev_io->internal.ch),
3134 17 : current_offset, num_blocks,
3135 17 : bdev_io_split_done, bdev_io);
3136 17 : break;
3137 : case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
3138 23 : io_wait_fn = _bdev_write_zeroes_split;
3139 46 : rc = spdk_bdev_write_zeroes_blocks(bdev_io->internal.desc,
3140 23 : spdk_io_channel_from_ctx(bdev_io->internal.ch),
3141 23 : current_offset, num_blocks,
3142 23 : bdev_io_split_done, bdev_io);
3143 23 : break;
3144 : case SPDK_BDEV_IO_TYPE_COPY:
3145 19 : io_wait_fn = _bdev_copy_split;
3146 38 : current_src_offset = bdev_io->u.bdev.copy.src_offset_blocks +
3147 19 : (current_offset - bdev_io->u.bdev.offset_blocks);
3148 38 : rc = spdk_bdev_copy_blocks(bdev_io->internal.desc,
3149 19 : spdk_io_channel_from_ctx(bdev_io->internal.ch),
3150 19 : current_offset, current_src_offset, num_blocks,
3151 19 : bdev_io_split_done, bdev_io);
3152 19 : break;
3153 : default:
3154 0 : assert(false);
3155 : rc = -EINVAL;
3156 : break;
3157 : }
3158 :
3159 305 : if (rc == 0) {
3160 301 : current_offset += num_blocks;
3161 301 : current_remaining -= num_blocks;
3162 301 : bdev_io->internal.split.current_offset_blocks = current_offset;
3163 301 : bdev_io->internal.split.remaining_num_blocks = current_remaining;
3164 301 : *offset = current_offset;
3165 301 : *remaining = current_remaining;
3166 301 : } else {
3167 4 : bdev_io->internal.split.outstanding--;
3168 4 : if (rc == -ENOMEM) {
3169 4 : if (bdev_io->internal.split.outstanding == 0) {
3170 : /* No I/O is outstanding. Hence we should wait here. */
3171 1 : bdev_queue_io_wait_with_cb(bdev_io, io_wait_fn);
3172 1 : }
3173 4 : } else {
3174 0 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
3175 0 : if (bdev_io->internal.split.outstanding == 0) {
3176 0 : bdev_ch_remove_from_io_submitted(bdev_io);
3177 0 : spdk_trace_record(TRACE_BDEV_IO_DONE, bdev_io->internal.ch->trace_id,
3178 : 0, (uintptr_t)bdev_io, bdev_io->internal.caller_ctx,
3179 : bdev_io->internal.ch->queue_depth);
3180 0 : bdev_io->internal.cb(bdev_io, false, bdev_io->internal.caller_ctx);
3181 0 : }
3182 : }
3183 : }
3184 :
3185 305 : return rc;
3186 : }
3187 :
3188 : static void
3189 67 : _bdev_rw_split(void *_bdev_io)
3190 : {
3191 : struct iovec *parent_iov, *iov;
3192 67 : struct spdk_bdev_io *bdev_io = _bdev_io;
3193 67 : struct spdk_bdev *bdev = bdev_io->bdev;
3194 : uint64_t parent_offset, current_offset, remaining;
3195 : uint32_t parent_iov_offset, parent_iovcnt, parent_iovpos, child_iovcnt;
3196 : uint32_t to_next_boundary, to_next_boundary_bytes, to_last_block_bytes;
3197 : uint32_t iovcnt, iov_len, child_iovsize;
3198 : uint32_t blocklen;
3199 : uint32_t io_boundary;
3200 67 : uint32_t max_segment_size = bdev->max_segment_size;
3201 67 : uint32_t max_child_iovcnt = bdev->max_num_segments;
3202 67 : uint32_t max_size = bdev->max_rw_size;
3203 67 : void *md_buf = NULL;
3204 : int rc;
3205 :
3206 67 : blocklen = bdev_io_get_block_size(bdev_io);
3207 :
3208 67 : max_size = max_size ? max_size : UINT32_MAX;
3209 67 : max_segment_size = max_segment_size ? max_segment_size : UINT32_MAX;
3210 67 : max_child_iovcnt = max_child_iovcnt ? spdk_min(max_child_iovcnt, SPDK_BDEV_IO_NUM_CHILD_IOV) :
3211 : SPDK_BDEV_IO_NUM_CHILD_IOV;
3212 :
3213 67 : if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE && bdev->split_on_write_unit) {
3214 5 : io_boundary = bdev->write_unit_size;
3215 67 : } else if (bdev->split_on_optimal_io_boundary) {
3216 40 : io_boundary = bdev->optimal_io_boundary;
3217 40 : } else {
3218 22 : io_boundary = UINT32_MAX;
3219 : }
3220 :
3221 67 : assert(bdev_io->internal.f.split);
3222 :
3223 67 : remaining = bdev_io->internal.split.remaining_num_blocks;
3224 67 : current_offset = bdev_io->internal.split.current_offset_blocks;
3225 67 : parent_offset = bdev_io->u.bdev.offset_blocks;
3226 67 : parent_iov_offset = (current_offset - parent_offset) * blocklen;
3227 67 : parent_iovcnt = bdev_io->u.bdev.iovcnt;
3228 :
3229 420 : for (parent_iovpos = 0; parent_iovpos < parent_iovcnt; parent_iovpos++) {
3230 420 : parent_iov = &bdev_io->u.bdev.iovs[parent_iovpos];
3231 420 : if (parent_iov_offset < parent_iov->iov_len) {
3232 67 : break;
3233 : }
3234 353 : parent_iov_offset -= parent_iov->iov_len;
3235 353 : }
3236 :
3237 67 : child_iovcnt = 0;
3238 573 : while (remaining > 0 && parent_iovpos < parent_iovcnt &&
3239 264 : child_iovcnt < SPDK_BDEV_IO_NUM_CHILD_IOV) {
3240 249 : to_next_boundary = _to_next_boundary(current_offset, io_boundary);
3241 249 : to_next_boundary = spdk_min(remaining, to_next_boundary);
3242 249 : to_next_boundary = spdk_min(max_size, to_next_boundary);
3243 249 : to_next_boundary_bytes = to_next_boundary * blocklen;
3244 :
3245 249 : iov = &bdev_io->child_iov[child_iovcnt];
3246 249 : iovcnt = 0;
3247 :
3248 249 : if (bdev_io->u.bdev.md_buf) {
3249 48 : md_buf = (char *)bdev_io->u.bdev.md_buf +
3250 24 : (current_offset - parent_offset) * spdk_bdev_get_md_size(bdev);
3251 24 : }
3252 :
3253 249 : child_iovsize = spdk_min(SPDK_BDEV_IO_NUM_CHILD_IOV - child_iovcnt, max_child_iovcnt);
3254 1810 : while (to_next_boundary_bytes > 0 && parent_iovpos < parent_iovcnt &&
3255 836 : iovcnt < child_iovsize) {
3256 725 : parent_iov = &bdev_io->u.bdev.iovs[parent_iovpos];
3257 725 : iov_len = parent_iov->iov_len - parent_iov_offset;
3258 :
3259 725 : iov_len = spdk_min(iov_len, max_segment_size);
3260 725 : iov_len = spdk_min(iov_len, to_next_boundary_bytes);
3261 725 : to_next_boundary_bytes -= iov_len;
3262 :
3263 725 : bdev_io->child_iov[child_iovcnt].iov_base = parent_iov->iov_base + parent_iov_offset;
3264 725 : bdev_io->child_iov[child_iovcnt].iov_len = iov_len;
3265 :
3266 725 : if (iov_len < parent_iov->iov_len - parent_iov_offset) {
3267 183 : parent_iov_offset += iov_len;
3268 183 : } else {
3269 542 : parent_iovpos++;
3270 542 : parent_iov_offset = 0;
3271 : }
3272 725 : child_iovcnt++;
3273 725 : iovcnt++;
3274 : }
3275 :
3276 249 : if (to_next_boundary_bytes > 0) {
3277 : /* We had to stop this child I/O early because we ran out of
3278 : * child_iov space or were limited by max_num_segments.
3279 : * Ensure the iovs to be aligned with block size and
3280 : * then adjust to_next_boundary before starting the
3281 : * child I/O.
3282 : */
3283 111 : assert(child_iovcnt == SPDK_BDEV_IO_NUM_CHILD_IOV ||
3284 : iovcnt == child_iovsize);
3285 111 : to_last_block_bytes = to_next_boundary_bytes % blocklen;
3286 111 : if (to_last_block_bytes != 0) {
3287 24 : uint32_t child_iovpos = child_iovcnt - 1;
3288 : /* don't decrease child_iovcnt when it equals to SPDK_BDEV_IO_NUM_CHILD_IOV
3289 : * so the loop will naturally end
3290 : */
3291 :
3292 24 : to_last_block_bytes = blocklen - to_last_block_bytes;
3293 24 : to_next_boundary_bytes += to_last_block_bytes;
3294 53 : while (to_last_block_bytes > 0 && iovcnt > 0) {
3295 32 : iov_len = spdk_min(to_last_block_bytes,
3296 : bdev_io->child_iov[child_iovpos].iov_len);
3297 32 : bdev_io->child_iov[child_iovpos].iov_len -= iov_len;
3298 32 : if (bdev_io->child_iov[child_iovpos].iov_len == 0) {
3299 15 : child_iovpos--;
3300 15 : if (--iovcnt == 0) {
3301 : /* If the child IO is less than a block size just return.
3302 : * If the first child IO of any split round is less than
3303 : * a block size, an error exit.
3304 : */
3305 3 : if (bdev_io->internal.split.outstanding == 0) {
3306 1 : SPDK_ERRLOG("The first child io was less than a block size\n");
3307 1 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
3308 1 : bdev_ch_remove_from_io_submitted(bdev_io);
3309 1 : spdk_trace_record(TRACE_BDEV_IO_DONE, bdev_io->internal.ch->trace_id,
3310 : 0, (uintptr_t)bdev_io, bdev_io->internal.caller_ctx,
3311 : bdev_io->internal.ch->queue_depth);
3312 1 : bdev_io->internal.cb(bdev_io, false, bdev_io->internal.caller_ctx);
3313 1 : }
3314 :
3315 3 : return;
3316 : }
3317 12 : }
3318 :
3319 29 : to_last_block_bytes -= iov_len;
3320 :
3321 29 : if (parent_iov_offset == 0) {
3322 14 : parent_iovpos--;
3323 14 : parent_iov_offset = bdev_io->u.bdev.iovs[parent_iovpos].iov_len;
3324 14 : }
3325 29 : parent_iov_offset -= iov_len;
3326 : }
3327 :
3328 21 : assert(to_last_block_bytes == 0);
3329 21 : }
3330 108 : to_next_boundary -= to_next_boundary_bytes / blocklen;
3331 108 : }
3332 :
3333 246 : rc = bdev_io_split_submit(bdev_io, iov, iovcnt, md_buf, to_next_boundary,
3334 : ¤t_offset, &remaining);
3335 246 : if (spdk_unlikely(rc)) {
3336 4 : return;
3337 : }
3338 : }
3339 67 : }
3340 :
3341 : static void
3342 3 : bdev_unmap_split(struct spdk_bdev_io *bdev_io)
3343 : {
3344 : uint64_t offset, unmap_blocks, remaining, max_unmap_blocks;
3345 3 : uint32_t num_children_reqs = 0;
3346 : int rc;
3347 :
3348 3 : assert(bdev_io->internal.f.split);
3349 :
3350 3 : offset = bdev_io->internal.split.current_offset_blocks;
3351 3 : remaining = bdev_io->internal.split.remaining_num_blocks;
3352 3 : max_unmap_blocks = bdev_io->bdev->max_unmap * bdev_io->bdev->max_unmap_segments;
3353 :
3354 20 : while (remaining && (num_children_reqs < SPDK_BDEV_MAX_CHILDREN_UNMAP_WRITE_ZEROES_REQS)) {
3355 17 : unmap_blocks = spdk_min(remaining, max_unmap_blocks);
3356 :
3357 17 : rc = bdev_io_split_submit(bdev_io, NULL, 0, NULL, unmap_blocks,
3358 : &offset, &remaining);
3359 17 : if (spdk_likely(rc == 0)) {
3360 17 : num_children_reqs++;
3361 17 : } else {
3362 0 : return;
3363 : }
3364 : }
3365 3 : }
3366 :
3367 : static void
3368 6 : bdev_write_zeroes_split(struct spdk_bdev_io *bdev_io)
3369 : {
3370 : uint64_t offset, write_zeroes_blocks, remaining;
3371 6 : uint32_t num_children_reqs = 0;
3372 : int rc;
3373 :
3374 6 : assert(bdev_io->internal.f.split);
3375 :
3376 6 : offset = bdev_io->internal.split.current_offset_blocks;
3377 6 : remaining = bdev_io->internal.split.remaining_num_blocks;
3378 :
3379 29 : while (remaining && (num_children_reqs < SPDK_BDEV_MAX_CHILDREN_UNMAP_WRITE_ZEROES_REQS)) {
3380 23 : write_zeroes_blocks = spdk_min(remaining, bdev_io->bdev->max_write_zeroes);
3381 :
3382 23 : rc = bdev_io_split_submit(bdev_io, NULL, 0, NULL, write_zeroes_blocks,
3383 : &offset, &remaining);
3384 23 : if (spdk_likely(rc == 0)) {
3385 23 : num_children_reqs++;
3386 23 : } else {
3387 0 : return;
3388 : }
3389 : }
3390 6 : }
3391 :
3392 : static void
3393 4 : bdev_copy_split(struct spdk_bdev_io *bdev_io)
3394 : {
3395 : uint64_t offset, copy_blocks, remaining;
3396 4 : uint32_t num_children_reqs = 0;
3397 : int rc;
3398 :
3399 4 : assert(bdev_io->internal.f.split);
3400 :
3401 4 : offset = bdev_io->internal.split.current_offset_blocks;
3402 4 : remaining = bdev_io->internal.split.remaining_num_blocks;
3403 :
3404 4 : assert(bdev_io->bdev->max_copy != 0);
3405 23 : while (remaining && (num_children_reqs < SPDK_BDEV_MAX_CHILDREN_COPY_REQS)) {
3406 19 : copy_blocks = spdk_min(remaining, bdev_io->bdev->max_copy);
3407 :
3408 19 : rc = bdev_io_split_submit(bdev_io, NULL, 0, NULL, copy_blocks,
3409 : &offset, &remaining);
3410 19 : if (spdk_likely(rc == 0)) {
3411 19 : num_children_reqs++;
3412 19 : } else {
3413 0 : return;
3414 : }
3415 : }
3416 4 : }
3417 :
3418 : static void
3419 58 : parent_bdev_io_complete(void *ctx, int rc)
3420 : {
3421 58 : struct spdk_bdev_io *parent_io = ctx;
3422 :
3423 58 : if (rc) {
3424 0 : parent_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
3425 0 : }
3426 :
3427 116 : parent_io->internal.cb(parent_io, parent_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS,
3428 58 : parent_io->internal.caller_ctx);
3429 58 : }
3430 :
3431 : static void
3432 0 : bdev_io_complete_parent_sequence_cb(void *ctx, int status)
3433 : {
3434 0 : struct spdk_bdev_io *bdev_io = ctx;
3435 :
3436 : /* u.bdev.accel_sequence should have already been cleared at this point */
3437 0 : assert(bdev_io->u.bdev.accel_sequence == NULL);
3438 0 : assert(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS);
3439 0 : bdev_io->internal.f.has_accel_sequence = false;
3440 :
3441 0 : if (spdk_unlikely(status != 0)) {
3442 0 : SPDK_ERRLOG("Failed to execute accel sequence, status=%d\n", status);
3443 0 : }
3444 :
3445 0 : parent_bdev_io_complete(bdev_io, status);
3446 0 : }
3447 :
3448 : static void
3449 301 : bdev_io_split_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
3450 : {
3451 301 : struct spdk_bdev_io *parent_io = cb_arg;
3452 :
3453 301 : spdk_bdev_free_io(bdev_io);
3454 :
3455 301 : assert(parent_io->internal.f.split);
3456 :
3457 301 : if (!success) {
3458 21 : parent_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
3459 : /* If any child I/O failed, stop further splitting process. */
3460 21 : parent_io->internal.split.current_offset_blocks += parent_io->internal.split.remaining_num_blocks;
3461 21 : parent_io->internal.split.remaining_num_blocks = 0;
3462 21 : }
3463 301 : parent_io->internal.split.outstanding--;
3464 301 : if (parent_io->internal.split.outstanding != 0) {
3465 223 : return;
3466 : }
3467 :
3468 : /*
3469 : * Parent I/O finishes when all blocks are consumed.
3470 : */
3471 78 : if (parent_io->internal.split.remaining_num_blocks == 0) {
3472 58 : assert(parent_io->internal.cb != bdev_io_split_done);
3473 58 : bdev_ch_remove_from_io_submitted(parent_io);
3474 58 : spdk_trace_record(TRACE_BDEV_IO_DONE, parent_io->internal.ch->trace_id,
3475 : 0, (uintptr_t)parent_io, bdev_io->internal.caller_ctx,
3476 : parent_io->internal.ch->queue_depth);
3477 :
3478 58 : if (spdk_likely(parent_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS)) {
3479 48 : if (bdev_io_needs_sequence_exec(parent_io->internal.desc, parent_io)) {
3480 0 : bdev_io_exec_sequence(parent_io, bdev_io_complete_parent_sequence_cb);
3481 0 : return;
3482 48 : } else if (parent_io->internal.f.has_bounce_buf &&
3483 0 : !bdev_io_use_accel_sequence(bdev_io)) {
3484 : /* bdev IO will be completed in the callback */
3485 0 : _bdev_io_push_bounce_data_buffer(parent_io, parent_bdev_io_complete);
3486 0 : return;
3487 : }
3488 48 : }
3489 :
3490 58 : parent_bdev_io_complete(parent_io, 0);
3491 58 : return;
3492 : }
3493 :
3494 : /*
3495 : * Continue with the splitting process. This function will complete the parent I/O if the
3496 : * splitting is done.
3497 : */
3498 20 : switch (parent_io->type) {
3499 : case SPDK_BDEV_IO_TYPE_READ:
3500 : case SPDK_BDEV_IO_TYPE_WRITE:
3501 17 : _bdev_rw_split(parent_io);
3502 17 : break;
3503 : case SPDK_BDEV_IO_TYPE_UNMAP:
3504 1 : bdev_unmap_split(parent_io);
3505 1 : break;
3506 : case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
3507 1 : bdev_write_zeroes_split(parent_io);
3508 1 : break;
3509 : case SPDK_BDEV_IO_TYPE_COPY:
3510 1 : bdev_copy_split(parent_io);
3511 1 : break;
3512 : default:
3513 0 : assert(false);
3514 : break;
3515 : }
3516 301 : }
3517 :
3518 : static void bdev_rw_split_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
3519 : bool success);
3520 :
3521 : static void
3522 59 : bdev_io_split(struct spdk_bdev_io *bdev_io)
3523 : {
3524 59 : assert(bdev_io_should_split(bdev_io));
3525 59 : assert(bdev_io->internal.f.split);
3526 :
3527 59 : bdev_io->internal.split.current_offset_blocks = bdev_io->u.bdev.offset_blocks;
3528 59 : bdev_io->internal.split.remaining_num_blocks = bdev_io->u.bdev.num_blocks;
3529 59 : bdev_io->internal.split.outstanding = 0;
3530 59 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS;
3531 :
3532 59 : switch (bdev_io->type) {
3533 : case SPDK_BDEV_IO_TYPE_READ:
3534 : case SPDK_BDEV_IO_TYPE_WRITE:
3535 49 : if (_is_buf_allocated(bdev_io->u.bdev.iovs)) {
3536 49 : _bdev_rw_split(bdev_io);
3537 49 : } else {
3538 0 : assert(bdev_io->type == SPDK_BDEV_IO_TYPE_READ);
3539 0 : spdk_bdev_io_get_buf(bdev_io, bdev_rw_split_get_buf_cb,
3540 0 : bdev_io->u.bdev.num_blocks * bdev_io_get_block_size(bdev_io));
3541 : }
3542 49 : break;
3543 : case SPDK_BDEV_IO_TYPE_UNMAP:
3544 2 : bdev_unmap_split(bdev_io);
3545 2 : break;
3546 : case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
3547 5 : bdev_write_zeroes_split(bdev_io);
3548 5 : break;
3549 : case SPDK_BDEV_IO_TYPE_COPY:
3550 3 : bdev_copy_split(bdev_io);
3551 3 : break;
3552 : default:
3553 0 : assert(false);
3554 : break;
3555 : }
3556 59 : }
3557 :
3558 : static void
3559 0 : bdev_rw_split_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, bool success)
3560 : {
3561 0 : if (!success) {
3562 0 : spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
3563 0 : return;
3564 : }
3565 :
3566 0 : _bdev_rw_split(bdev_io);
3567 0 : }
3568 :
3569 : static inline void
3570 579 : _bdev_io_submit(struct spdk_bdev_io *bdev_io)
3571 : {
3572 579 : struct spdk_bdev *bdev = bdev_io->bdev;
3573 579 : struct spdk_bdev_channel *bdev_ch = bdev_io->internal.ch;
3574 :
3575 579 : if (spdk_likely(bdev_ch->flags == 0)) {
3576 554 : bdev_io_do_submit(bdev_ch, bdev_io);
3577 554 : return;
3578 : }
3579 :
3580 25 : if (bdev_ch->flags & BDEV_CH_RESET_IN_PROGRESS) {
3581 2 : _bdev_io_complete_in_submit(bdev_ch, bdev_io, SPDK_BDEV_IO_STATUS_ABORTED);
3582 25 : } else if (bdev_ch->flags & BDEV_CH_QOS_ENABLED) {
3583 23 : if (spdk_unlikely(bdev_io->type == SPDK_BDEV_IO_TYPE_ABORT) &&
3584 2 : bdev_abort_queued_io(&bdev_ch->qos_queued_io, bdev_io->u.abort.bio_to_abort)) {
3585 0 : _bdev_io_complete_in_submit(bdev_ch, bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
3586 0 : } else {
3587 23 : TAILQ_INSERT_TAIL(&bdev_ch->qos_queued_io, bdev_io, internal.link);
3588 23 : bdev_qos_io_submit(bdev_ch, bdev->internal.qos);
3589 : }
3590 23 : } else {
3591 0 : SPDK_ERRLOG("unknown bdev_ch flag %x found\n", bdev_ch->flags);
3592 0 : _bdev_io_complete_in_submit(bdev_ch, bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
3593 : }
3594 579 : }
3595 :
3596 : bool bdev_lba_range_overlapped(struct lba_range *range1, struct lba_range *range2);
3597 :
3598 : bool
3599 23 : bdev_lba_range_overlapped(struct lba_range *range1, struct lba_range *range2)
3600 : {
3601 23 : if (range1->length == 0 || range2->length == 0) {
3602 1 : return false;
3603 : }
3604 :
3605 22 : if (range1->offset + range1->length <= range2->offset) {
3606 1 : return false;
3607 : }
3608 :
3609 21 : if (range2->offset + range2->length <= range1->offset) {
3610 3 : return false;
3611 : }
3612 :
3613 18 : return true;
3614 23 : }
3615 :
3616 : static bool
3617 11 : bdev_io_range_is_locked(struct spdk_bdev_io *bdev_io, struct lba_range *range)
3618 : {
3619 11 : struct spdk_bdev_channel *ch = bdev_io->internal.ch;
3620 : struct lba_range r;
3621 :
3622 11 : switch (bdev_io->type) {
3623 : case SPDK_BDEV_IO_TYPE_NVME_IO:
3624 : case SPDK_BDEV_IO_TYPE_NVME_IO_MD:
3625 : /* Don't try to decode the NVMe command - just assume worst-case and that
3626 : * it overlaps a locked range.
3627 : */
3628 0 : return true;
3629 : case SPDK_BDEV_IO_TYPE_READ:
3630 6 : if (!range->quiesce) {
3631 4 : return false;
3632 : }
3633 : /* fallthrough */
3634 : case SPDK_BDEV_IO_TYPE_WRITE:
3635 : case SPDK_BDEV_IO_TYPE_UNMAP:
3636 : case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
3637 : case SPDK_BDEV_IO_TYPE_ZCOPY:
3638 : case SPDK_BDEV_IO_TYPE_COPY:
3639 7 : r.offset = bdev_io->u.bdev.offset_blocks;
3640 7 : r.length = bdev_io->u.bdev.num_blocks;
3641 7 : if (!bdev_lba_range_overlapped(range, &r)) {
3642 : /* This I/O doesn't overlap the specified LBA range. */
3643 0 : return false;
3644 7 : } else if (range->owner_ch == ch && range->locked_ctx == bdev_io->internal.caller_ctx) {
3645 : /* This I/O overlaps, but the I/O is on the same channel that locked this
3646 : * range, and the caller_ctx is the same as the locked_ctx. This means
3647 : * that this I/O is associated with the lock, and is allowed to execute.
3648 : */
3649 2 : return false;
3650 : } else {
3651 5 : return true;
3652 : }
3653 : default:
3654 0 : return false;
3655 : }
3656 11 : }
3657 :
3658 : void
3659 639 : bdev_io_submit(struct spdk_bdev_io *bdev_io)
3660 : {
3661 639 : struct spdk_bdev_channel *ch = bdev_io->internal.ch;
3662 :
3663 639 : assert(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_PENDING);
3664 :
3665 639 : if (!TAILQ_EMPTY(&ch->locked_ranges)) {
3666 : struct lba_range *range;
3667 :
3668 13 : TAILQ_FOREACH(range, &ch->locked_ranges, tailq) {
3669 8 : if (bdev_io_range_is_locked(bdev_io, range)) {
3670 3 : TAILQ_INSERT_TAIL(&ch->io_locked, bdev_io, internal.ch_link);
3671 3 : return;
3672 : }
3673 5 : }
3674 5 : }
3675 :
3676 636 : bdev_ch_add_to_io_submitted(bdev_io);
3677 :
3678 636 : bdev_io->internal.submit_tsc = spdk_get_ticks();
3679 636 : spdk_trace_record_tsc(bdev_io->internal.submit_tsc, TRACE_BDEV_IO_START,
3680 : ch->trace_id, bdev_io->u.bdev.num_blocks,
3681 : (uintptr_t)bdev_io, (uint64_t)bdev_io->type, bdev_io->internal.caller_ctx,
3682 : bdev_io->u.bdev.offset_blocks, ch->queue_depth);
3683 :
3684 636 : if (bdev_io->internal.f.split) {
3685 59 : bdev_io_split(bdev_io);
3686 59 : return;
3687 : }
3688 :
3689 577 : _bdev_io_submit(bdev_io);
3690 639 : }
3691 :
3692 : static inline void
3693 4 : _bdev_io_ext_use_bounce_buffer(struct spdk_bdev_io *bdev_io)
3694 : {
3695 : /* bdev doesn't support memory domains, thereby buffers in this IO request can't
3696 : * be accessed directly. It is needed to allocate buffers before issuing IO operation.
3697 : * For write operation we need to pull buffers from memory domain before submitting IO.
3698 : * Once read operation completes, we need to use memory_domain push functionality to
3699 : * update data in original memory domain IO buffer
3700 : * This IO request will go through a regular IO flow, so clear memory domains pointers */
3701 4 : assert(bdev_io->internal.f.has_memory_domain);
3702 4 : bdev_io->u.bdev.memory_domain = NULL;
3703 4 : bdev_io->u.bdev.memory_domain_ctx = NULL;
3704 8 : _bdev_memory_domain_io_get_buf(bdev_io, _bdev_memory_domain_get_io_cb,
3705 4 : bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
3706 4 : }
3707 :
3708 : static inline void
3709 292 : _bdev_io_submit_ext(struct spdk_bdev_desc *desc, struct spdk_bdev_io *bdev_io)
3710 : {
3711 292 : struct spdk_bdev_channel *ch = bdev_io->internal.ch;
3712 292 : bool needs_exec = bdev_io_needs_sequence_exec(desc, bdev_io);
3713 :
3714 292 : if (spdk_unlikely(ch->flags & BDEV_CH_RESET_IN_PROGRESS)) {
3715 0 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_ABORTED;
3716 0 : bdev_io_complete_unsubmitted(bdev_io);
3717 0 : return;
3718 : }
3719 :
3720 : /* We need to allocate bounce buffer if bdev doesn't support memory domains, or if it does
3721 : * support them, but we need to execute an accel sequence and the data buffer is from accel
3722 : * memory domain (to avoid doing a push/pull from that domain).
3723 : */
3724 292 : if (bdev_io_use_memory_domain(bdev_io)) {
3725 4 : if (!desc->memory_domains_supported ||
3726 0 : (needs_exec && bdev_io->internal.memory_domain == spdk_accel_get_memory_domain())) {
3727 4 : _bdev_io_ext_use_bounce_buffer(bdev_io);
3728 4 : return;
3729 : }
3730 0 : }
3731 :
3732 288 : if (needs_exec) {
3733 0 : if (bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
3734 0 : bdev_io_exec_sequence(bdev_io, bdev_io_submit_sequence_cb);
3735 0 : return;
3736 : }
3737 : /* For reads we'll execute the sequence after the data is read, so, for now, only
3738 : * clear out accel_sequence pointer and submit the IO */
3739 0 : assert(bdev_io->type == SPDK_BDEV_IO_TYPE_READ);
3740 0 : bdev_io->u.bdev.accel_sequence = NULL;
3741 0 : }
3742 :
3743 288 : bdev_io_submit(bdev_io);
3744 292 : }
3745 :
3746 : static void
3747 12 : bdev_io_submit_reset(struct spdk_bdev_io *bdev_io)
3748 : {
3749 12 : struct spdk_bdev *bdev = bdev_io->bdev;
3750 12 : struct spdk_bdev_channel *bdev_ch = bdev_io->internal.ch;
3751 12 : struct spdk_io_channel *ch = bdev_ch->channel;
3752 :
3753 12 : assert(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_PENDING);
3754 :
3755 12 : bdev_io->internal.f.in_submit_request = true;
3756 12 : bdev_submit_request(bdev, ch, bdev_io);
3757 12 : bdev_io->internal.f.in_submit_request = false;
3758 12 : }
3759 :
3760 : void
3761 693 : bdev_io_init(struct spdk_bdev_io *bdev_io,
3762 : struct spdk_bdev *bdev, void *cb_arg,
3763 : spdk_bdev_io_completion_cb cb)
3764 : {
3765 693 : bdev_io->bdev = bdev;
3766 693 : bdev_io->internal.f.raw = 0;
3767 693 : bdev_io->internal.caller_ctx = cb_arg;
3768 693 : bdev_io->internal.cb = cb;
3769 693 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_PENDING;
3770 693 : bdev_io->internal.f.in_submit_request = false;
3771 693 : bdev_io->internal.error.nvme.cdw0 = 0;
3772 693 : bdev_io->num_retries = 0;
3773 693 : bdev_io->internal.get_buf_cb = NULL;
3774 693 : bdev_io->internal.get_aux_buf_cb = NULL;
3775 693 : bdev_io->internal.data_transfer_cpl = NULL;
3776 693 : bdev_io->internal.f.split = bdev_io_should_split(bdev_io);
3777 693 : }
3778 :
3779 : static bool
3780 537 : bdev_io_type_supported(struct spdk_bdev *bdev, enum spdk_bdev_io_type io_type)
3781 : {
3782 537 : return bdev->fn_table->io_type_supported(bdev->ctxt, io_type);
3783 : }
3784 :
3785 : bool
3786 177 : spdk_bdev_io_type_supported(struct spdk_bdev *bdev, enum spdk_bdev_io_type io_type)
3787 : {
3788 : bool supported;
3789 :
3790 177 : supported = bdev_io_type_supported(bdev, io_type);
3791 :
3792 177 : if (!supported) {
3793 7 : switch (io_type) {
3794 : case SPDK_BDEV_IO_TYPE_WRITE_ZEROES:
3795 : /* The bdev layer will emulate write zeroes as long as write is supported. */
3796 0 : supported = bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE);
3797 0 : break;
3798 : default:
3799 7 : break;
3800 : }
3801 7 : }
3802 :
3803 177 : return supported;
3804 : }
3805 :
3806 : static const char *g_io_type_strings[] = {
3807 : [SPDK_BDEV_IO_TYPE_READ] = "read",
3808 : [SPDK_BDEV_IO_TYPE_WRITE] = "write",
3809 : [SPDK_BDEV_IO_TYPE_UNMAP] = "unmap",
3810 : [SPDK_BDEV_IO_TYPE_FLUSH] = "flush",
3811 : [SPDK_BDEV_IO_TYPE_RESET] = "reset",
3812 : [SPDK_BDEV_IO_TYPE_NVME_ADMIN] = "nvme_admin",
3813 : [SPDK_BDEV_IO_TYPE_NVME_IO] = "nvme_io",
3814 : [SPDK_BDEV_IO_TYPE_NVME_IO_MD] = "nvme_io_md",
3815 : [SPDK_BDEV_IO_TYPE_WRITE_ZEROES] = "write_zeroes",
3816 : [SPDK_BDEV_IO_TYPE_ZCOPY] = "zcopy",
3817 : [SPDK_BDEV_IO_TYPE_GET_ZONE_INFO] = "get_zone_info",
3818 : [SPDK_BDEV_IO_TYPE_ZONE_MANAGEMENT] = "zone_management",
3819 : [SPDK_BDEV_IO_TYPE_ZONE_APPEND] = "zone_append",
3820 : [SPDK_BDEV_IO_TYPE_COMPARE] = "compare",
3821 : [SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE] = "compare_and_write",
3822 : [SPDK_BDEV_IO_TYPE_ABORT] = "abort",
3823 : [SPDK_BDEV_IO_TYPE_SEEK_HOLE] = "seek_hole",
3824 : [SPDK_BDEV_IO_TYPE_SEEK_DATA] = "seek_data",
3825 : [SPDK_BDEV_IO_TYPE_COPY] = "copy",
3826 : [SPDK_BDEV_IO_TYPE_NVME_IOV_MD] = "nvme_iov_md",
3827 : };
3828 :
3829 : const char *
3830 0 : spdk_bdev_get_io_type_name(enum spdk_bdev_io_type io_type)
3831 : {
3832 0 : if (io_type <= SPDK_BDEV_IO_TYPE_INVALID || io_type >= SPDK_BDEV_NUM_IO_TYPES) {
3833 0 : return NULL;
3834 : }
3835 :
3836 0 : return g_io_type_strings[io_type];
3837 0 : }
3838 :
3839 : int
3840 0 : spdk_bdev_get_io_type(const char *io_type_string)
3841 : {
3842 : int i;
3843 :
3844 0 : for (i = SPDK_BDEV_IO_TYPE_READ; i < SPDK_BDEV_NUM_IO_TYPES; ++i) {
3845 0 : if (!strcmp(io_type_string, g_io_type_strings[i])) {
3846 0 : return i;
3847 : }
3848 0 : }
3849 :
3850 0 : return -1;
3851 0 : }
3852 :
3853 : uint64_t
3854 0 : spdk_bdev_io_get_submit_tsc(struct spdk_bdev_io *bdev_io)
3855 : {
3856 0 : return bdev_io->internal.submit_tsc;
3857 : }
3858 :
3859 : int
3860 0 : spdk_bdev_dump_info_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
3861 : {
3862 0 : if (bdev->fn_table->dump_info_json) {
3863 0 : return bdev->fn_table->dump_info_json(bdev->ctxt, w);
3864 : }
3865 :
3866 0 : return 0;
3867 0 : }
3868 :
3869 : static void
3870 10 : bdev_qos_update_max_quota_per_timeslice(struct spdk_bdev_qos *qos)
3871 : {
3872 10 : uint32_t max_per_timeslice = 0;
3873 : int i;
3874 :
3875 50 : for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
3876 40 : if (qos->rate_limits[i].limit == SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) {
3877 15 : qos->rate_limits[i].max_per_timeslice = 0;
3878 15 : continue;
3879 : }
3880 :
3881 50 : max_per_timeslice = qos->rate_limits[i].limit *
3882 25 : SPDK_BDEV_QOS_TIMESLICE_IN_USEC / SPDK_SEC_TO_USEC;
3883 :
3884 25 : qos->rate_limits[i].max_per_timeslice = spdk_max(max_per_timeslice,
3885 : qos->rate_limits[i].min_per_timeslice);
3886 :
3887 50 : __atomic_store_n(&qos->rate_limits[i].remaining_this_timeslice,
3888 25 : qos->rate_limits[i].max_per_timeslice, __ATOMIC_RELEASE);
3889 25 : }
3890 :
3891 10 : bdev_qos_set_ops(qos);
3892 10 : }
3893 :
3894 : static void
3895 4 : bdev_channel_submit_qos_io(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
3896 : struct spdk_io_channel *io_ch, void *ctx)
3897 : {
3898 4 : struct spdk_bdev_channel *bdev_ch = __io_ch_to_bdev_ch(io_ch);
3899 : int status;
3900 :
3901 4 : bdev_qos_io_submit(bdev_ch, bdev->internal.qos);
3902 :
3903 : /* if all IOs were sent then continue the iteration, otherwise - stop it */
3904 : /* TODO: channels round robing */
3905 4 : status = TAILQ_EMPTY(&bdev_ch->qos_queued_io) ? 0 : 1;
3906 :
3907 4 : spdk_bdev_for_each_channel_continue(i, status);
3908 4 : }
3909 :
3910 :
3911 : static void
3912 2 : bdev_channel_submit_qos_io_done(struct spdk_bdev *bdev, void *ctx, int status)
3913 : {
3914 :
3915 2 : }
3916 :
3917 : static int
3918 3 : bdev_channel_poll_qos(void *arg)
3919 : {
3920 3 : struct spdk_bdev *bdev = arg;
3921 3 : struct spdk_bdev_qos *qos = bdev->internal.qos;
3922 3 : uint64_t now = spdk_get_ticks();
3923 : int i;
3924 : int64_t remaining_last_timeslice;
3925 :
3926 3 : if (spdk_unlikely(qos->thread == NULL)) {
3927 : /* Old QoS was unbound to remove and new QoS is not enabled yet. */
3928 1 : return SPDK_POLLER_IDLE;
3929 : }
3930 :
3931 2 : if (now < (qos->last_timeslice + qos->timeslice_size)) {
3932 : /* We received our callback earlier than expected - return
3933 : * immediately and wait to do accounting until at least one
3934 : * timeslice has actually expired. This should never happen
3935 : * with a well-behaved timer implementation.
3936 : */
3937 0 : return SPDK_POLLER_IDLE;
3938 : }
3939 :
3940 : /* Reset for next round of rate limiting */
3941 10 : for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
3942 : /* We may have allowed the IOs or bytes to slightly overrun in the last
3943 : * timeslice. remaining_this_timeslice is signed, so if it's negative
3944 : * here, we'll account for the overrun so that the next timeslice will
3945 : * be appropriately reduced.
3946 : */
3947 8 : remaining_last_timeslice = __atomic_exchange_n(&qos->rate_limits[i].remaining_this_timeslice,
3948 : 0, __ATOMIC_RELAXED);
3949 8 : if (remaining_last_timeslice < 0) {
3950 : /* There could be a race condition here as both bdev_qos_rw_queue_io() and bdev_channel_poll_qos()
3951 : * potentially use 2 atomic ops each, so they can intertwine.
3952 : * This race can potentially cause the limits to be a little fuzzy but won't cause any real damage.
3953 : */
3954 0 : __atomic_store_n(&qos->rate_limits[i].remaining_this_timeslice,
3955 0 : remaining_last_timeslice, __ATOMIC_RELAXED);
3956 0 : }
3957 8 : }
3958 :
3959 4 : while (now >= (qos->last_timeslice + qos->timeslice_size)) {
3960 2 : qos->last_timeslice += qos->timeslice_size;
3961 10 : for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
3962 16 : __atomic_add_fetch(&qos->rate_limits[i].remaining_this_timeslice,
3963 8 : qos->rate_limits[i].max_per_timeslice, __ATOMIC_RELAXED);
3964 8 : }
3965 : }
3966 :
3967 2 : spdk_bdev_for_each_channel(bdev, bdev_channel_submit_qos_io, qos,
3968 : bdev_channel_submit_qos_io_done);
3969 :
3970 2 : return SPDK_POLLER_BUSY;
3971 3 : }
3972 :
3973 : static void
3974 75 : bdev_channel_destroy_resource(struct spdk_bdev_channel *ch)
3975 : {
3976 : struct spdk_bdev_shared_resource *shared_resource;
3977 : struct lba_range *range;
3978 :
3979 75 : bdev_free_io_stat(ch->stat);
3980 : #ifdef SPDK_CONFIG_VTUNE
3981 : bdev_free_io_stat(ch->prev_stat);
3982 : #endif
3983 :
3984 75 : while (!TAILQ_EMPTY(&ch->locked_ranges)) {
3985 0 : range = TAILQ_FIRST(&ch->locked_ranges);
3986 0 : TAILQ_REMOVE(&ch->locked_ranges, range, tailq);
3987 0 : free(range);
3988 : }
3989 :
3990 75 : spdk_put_io_channel(ch->channel);
3991 75 : spdk_put_io_channel(ch->accel_channel);
3992 :
3993 75 : shared_resource = ch->shared_resource;
3994 :
3995 75 : assert(TAILQ_EMPTY(&ch->io_locked));
3996 75 : assert(TAILQ_EMPTY(&ch->io_submitted));
3997 75 : assert(TAILQ_EMPTY(&ch->io_accel_exec));
3998 75 : assert(TAILQ_EMPTY(&ch->io_memory_domain));
3999 75 : assert(ch->io_outstanding == 0);
4000 75 : assert(shared_resource->ref > 0);
4001 75 : shared_resource->ref--;
4002 75 : if (shared_resource->ref == 0) {
4003 74 : assert(shared_resource->io_outstanding == 0);
4004 74 : TAILQ_REMOVE(&shared_resource->mgmt_ch->shared_resources, shared_resource, link);
4005 74 : spdk_put_io_channel(spdk_io_channel_from_ctx(shared_resource->mgmt_ch));
4006 74 : spdk_poller_unregister(&shared_resource->nomem_poller);
4007 74 : free(shared_resource);
4008 74 : }
4009 75 : }
4010 :
4011 : static void
4012 84 : bdev_enable_qos(struct spdk_bdev *bdev, struct spdk_bdev_channel *ch)
4013 : {
4014 84 : struct spdk_bdev_qos *qos = bdev->internal.qos;
4015 : int i;
4016 :
4017 84 : assert(spdk_spin_held(&bdev->internal.spinlock));
4018 :
4019 : /* Rate limiting on this bdev enabled */
4020 84 : if (qos) {
4021 17 : if (qos->ch == NULL) {
4022 : struct spdk_io_channel *io_ch;
4023 :
4024 9 : SPDK_DEBUGLOG(bdev, "Selecting channel %p as QoS channel for bdev %s on thread %p\n", ch,
4025 : bdev->name, spdk_get_thread());
4026 :
4027 : /* No qos channel has been selected, so set one up */
4028 :
4029 : /* Take another reference to ch */
4030 9 : io_ch = spdk_get_io_channel(__bdev_to_io_dev(bdev));
4031 9 : assert(io_ch != NULL);
4032 9 : qos->ch = ch;
4033 :
4034 9 : qos->thread = spdk_io_channel_get_thread(io_ch);
4035 :
4036 45 : for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
4037 36 : if (bdev_qos_is_iops_rate_limit(i) == true) {
4038 9 : qos->rate_limits[i].min_per_timeslice =
4039 : SPDK_BDEV_QOS_MIN_IO_PER_TIMESLICE;
4040 9 : } else {
4041 27 : qos->rate_limits[i].min_per_timeslice =
4042 : SPDK_BDEV_QOS_MIN_BYTE_PER_TIMESLICE;
4043 : }
4044 :
4045 36 : if (qos->rate_limits[i].limit == 0) {
4046 2 : qos->rate_limits[i].limit = SPDK_BDEV_QOS_LIMIT_NOT_DEFINED;
4047 2 : }
4048 36 : }
4049 9 : bdev_qos_update_max_quota_per_timeslice(qos);
4050 9 : qos->timeslice_size =
4051 9 : SPDK_BDEV_QOS_TIMESLICE_IN_USEC * spdk_get_ticks_hz() / SPDK_SEC_TO_USEC;
4052 9 : qos->last_timeslice = spdk_get_ticks();
4053 9 : qos->poller = SPDK_POLLER_REGISTER(bdev_channel_poll_qos,
4054 : bdev,
4055 : SPDK_BDEV_QOS_TIMESLICE_IN_USEC);
4056 9 : }
4057 :
4058 17 : ch->flags |= BDEV_CH_QOS_ENABLED;
4059 17 : }
4060 84 : }
4061 :
4062 : struct poll_timeout_ctx {
4063 : struct spdk_bdev_desc *desc;
4064 : uint64_t timeout_in_sec;
4065 : spdk_bdev_io_timeout_cb cb_fn;
4066 : void *cb_arg;
4067 : };
4068 :
4069 : static void
4070 277 : bdev_desc_free(struct spdk_bdev_desc *desc)
4071 : {
4072 277 : spdk_spin_destroy(&desc->spinlock);
4073 277 : free(desc->media_events_buffer);
4074 277 : free(desc);
4075 277 : }
4076 :
4077 : static void
4078 8 : bdev_channel_poll_timeout_io_done(struct spdk_bdev *bdev, void *_ctx, int status)
4079 : {
4080 8 : struct poll_timeout_ctx *ctx = _ctx;
4081 8 : struct spdk_bdev_desc *desc = ctx->desc;
4082 :
4083 8 : free(ctx);
4084 :
4085 8 : spdk_spin_lock(&desc->spinlock);
4086 8 : desc->refs--;
4087 8 : if (desc->closed == true && desc->refs == 0) {
4088 1 : spdk_spin_unlock(&desc->spinlock);
4089 1 : bdev_desc_free(desc);
4090 1 : return;
4091 : }
4092 7 : spdk_spin_unlock(&desc->spinlock);
4093 8 : }
4094 :
4095 : static void
4096 13 : bdev_channel_poll_timeout_io(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
4097 : struct spdk_io_channel *io_ch, void *_ctx)
4098 : {
4099 13 : struct poll_timeout_ctx *ctx = _ctx;
4100 13 : struct spdk_bdev_channel *bdev_ch = __io_ch_to_bdev_ch(io_ch);
4101 13 : struct spdk_bdev_desc *desc = ctx->desc;
4102 : struct spdk_bdev_io *bdev_io;
4103 : uint64_t now;
4104 :
4105 13 : spdk_spin_lock(&desc->spinlock);
4106 13 : if (desc->closed == true) {
4107 1 : spdk_spin_unlock(&desc->spinlock);
4108 1 : spdk_bdev_for_each_channel_continue(i, -1);
4109 1 : return;
4110 : }
4111 12 : spdk_spin_unlock(&desc->spinlock);
4112 :
4113 12 : now = spdk_get_ticks();
4114 22 : TAILQ_FOREACH(bdev_io, &bdev_ch->io_submitted, internal.ch_link) {
4115 : /* Exclude any I/O that are generated via splitting. */
4116 15 : if (bdev_io->internal.cb == bdev_io_split_done) {
4117 3 : continue;
4118 : }
4119 :
4120 : /* Once we find an I/O that has not timed out, we can immediately
4121 : * exit the loop.
4122 : */
4123 24 : if (now < (bdev_io->internal.submit_tsc +
4124 12 : ctx->timeout_in_sec * spdk_get_ticks_hz())) {
4125 5 : goto end;
4126 : }
4127 :
4128 7 : if (bdev_io->internal.desc == desc) {
4129 7 : ctx->cb_fn(ctx->cb_arg, bdev_io);
4130 7 : }
4131 14 : }
4132 :
4133 : end:
4134 12 : spdk_bdev_for_each_channel_continue(i, 0);
4135 13 : }
4136 :
4137 : static int
4138 8 : bdev_poll_timeout_io(void *arg)
4139 : {
4140 8 : struct spdk_bdev_desc *desc = arg;
4141 8 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
4142 : struct poll_timeout_ctx *ctx;
4143 :
4144 8 : ctx = calloc(1, sizeof(struct poll_timeout_ctx));
4145 8 : if (!ctx) {
4146 0 : SPDK_ERRLOG("failed to allocate memory\n");
4147 0 : return SPDK_POLLER_BUSY;
4148 : }
4149 8 : ctx->desc = desc;
4150 8 : ctx->cb_arg = desc->cb_arg;
4151 8 : ctx->cb_fn = desc->cb_fn;
4152 8 : ctx->timeout_in_sec = desc->timeout_in_sec;
4153 :
4154 : /* Take a ref on the descriptor in case it gets closed while we are checking
4155 : * all of the channels.
4156 : */
4157 8 : spdk_spin_lock(&desc->spinlock);
4158 8 : desc->refs++;
4159 8 : spdk_spin_unlock(&desc->spinlock);
4160 :
4161 8 : spdk_bdev_for_each_channel(bdev, bdev_channel_poll_timeout_io, ctx,
4162 : bdev_channel_poll_timeout_io_done);
4163 :
4164 8 : return SPDK_POLLER_BUSY;
4165 8 : }
4166 :
4167 : int
4168 5 : spdk_bdev_set_timeout(struct spdk_bdev_desc *desc, uint64_t timeout_in_sec,
4169 : spdk_bdev_io_timeout_cb cb_fn, void *cb_arg)
4170 : {
4171 5 : assert(desc->thread == spdk_get_thread());
4172 :
4173 5 : spdk_poller_unregister(&desc->io_timeout_poller);
4174 :
4175 5 : if (timeout_in_sec) {
4176 4 : assert(cb_fn != NULL);
4177 4 : desc->io_timeout_poller = SPDK_POLLER_REGISTER(bdev_poll_timeout_io,
4178 : desc,
4179 : SPDK_BDEV_IO_POLL_INTERVAL_IN_MSEC * SPDK_SEC_TO_USEC /
4180 : 1000);
4181 4 : if (desc->io_timeout_poller == NULL) {
4182 0 : SPDK_ERRLOG("can not register the desc timeout IO poller\n");
4183 0 : return -1;
4184 : }
4185 4 : }
4186 :
4187 5 : desc->cb_fn = cb_fn;
4188 5 : desc->cb_arg = cb_arg;
4189 5 : desc->timeout_in_sec = timeout_in_sec;
4190 :
4191 5 : return 0;
4192 5 : }
4193 :
4194 : static int
4195 77 : bdev_channel_create(void *io_device, void *ctx_buf)
4196 : {
4197 77 : struct spdk_bdev *bdev = __bdev_from_io_dev(io_device);
4198 77 : struct spdk_bdev_channel *ch = ctx_buf;
4199 : struct spdk_io_channel *mgmt_io_ch;
4200 : struct spdk_bdev_mgmt_channel *mgmt_ch;
4201 : struct spdk_bdev_shared_resource *shared_resource;
4202 : struct lba_range *range;
4203 :
4204 77 : ch->bdev = bdev;
4205 77 : ch->channel = bdev->fn_table->get_io_channel(bdev->ctxt);
4206 77 : if (!ch->channel) {
4207 2 : return -1;
4208 : }
4209 :
4210 75 : ch->accel_channel = spdk_accel_get_io_channel();
4211 75 : if (!ch->accel_channel) {
4212 0 : spdk_put_io_channel(ch->channel);
4213 0 : return -1;
4214 : }
4215 :
4216 75 : spdk_trace_record(TRACE_BDEV_IOCH_CREATE, bdev->internal.trace_id, 0, 0,
4217 : spdk_thread_get_id(spdk_io_channel_get_thread(ch->channel)));
4218 :
4219 75 : assert(ch->histogram == NULL);
4220 75 : if (bdev->internal.histogram_enabled) {
4221 0 : ch->histogram = spdk_histogram_data_alloc();
4222 0 : if (ch->histogram == NULL) {
4223 0 : SPDK_ERRLOG("Could not allocate histogram\n");
4224 0 : }
4225 0 : }
4226 :
4227 75 : mgmt_io_ch = spdk_get_io_channel(&g_bdev_mgr);
4228 75 : if (!mgmt_io_ch) {
4229 0 : spdk_put_io_channel(ch->channel);
4230 0 : spdk_put_io_channel(ch->accel_channel);
4231 0 : return -1;
4232 : }
4233 :
4234 75 : mgmt_ch = __io_ch_to_bdev_mgmt_ch(mgmt_io_ch);
4235 77 : TAILQ_FOREACH(shared_resource, &mgmt_ch->shared_resources, link) {
4236 3 : if (shared_resource->shared_ch == ch->channel) {
4237 1 : spdk_put_io_channel(mgmt_io_ch);
4238 1 : shared_resource->ref++;
4239 1 : break;
4240 : }
4241 2 : }
4242 :
4243 75 : if (shared_resource == NULL) {
4244 74 : shared_resource = calloc(1, sizeof(*shared_resource));
4245 74 : if (shared_resource == NULL) {
4246 0 : spdk_put_io_channel(ch->channel);
4247 0 : spdk_put_io_channel(ch->accel_channel);
4248 0 : spdk_put_io_channel(mgmt_io_ch);
4249 0 : return -1;
4250 : }
4251 :
4252 74 : shared_resource->mgmt_ch = mgmt_ch;
4253 74 : shared_resource->io_outstanding = 0;
4254 74 : TAILQ_INIT(&shared_resource->nomem_io);
4255 74 : shared_resource->nomem_threshold = 0;
4256 74 : shared_resource->shared_ch = ch->channel;
4257 74 : shared_resource->ref = 1;
4258 74 : TAILQ_INSERT_TAIL(&mgmt_ch->shared_resources, shared_resource, link);
4259 74 : }
4260 :
4261 75 : ch->io_outstanding = 0;
4262 75 : TAILQ_INIT(&ch->locked_ranges);
4263 75 : TAILQ_INIT(&ch->qos_queued_io);
4264 75 : ch->flags = 0;
4265 75 : ch->trace_id = bdev->internal.trace_id;
4266 75 : ch->shared_resource = shared_resource;
4267 :
4268 75 : TAILQ_INIT(&ch->io_submitted);
4269 75 : TAILQ_INIT(&ch->io_locked);
4270 75 : TAILQ_INIT(&ch->io_accel_exec);
4271 75 : TAILQ_INIT(&ch->io_memory_domain);
4272 :
4273 75 : ch->stat = bdev_alloc_io_stat(false);
4274 75 : if (ch->stat == NULL) {
4275 0 : bdev_channel_destroy_resource(ch);
4276 0 : return -1;
4277 : }
4278 :
4279 75 : ch->stat->ticks_rate = spdk_get_ticks_hz();
4280 :
4281 : #ifdef SPDK_CONFIG_VTUNE
4282 : {
4283 : char *name;
4284 : __itt_init_ittlib(NULL, 0);
4285 : name = spdk_sprintf_alloc("spdk_bdev_%s_%p", ch->bdev->name, ch);
4286 : if (!name) {
4287 : bdev_channel_destroy_resource(ch);
4288 : return -1;
4289 : }
4290 : ch->handle = __itt_string_handle_create(name);
4291 : free(name);
4292 : ch->start_tsc = spdk_get_ticks();
4293 : ch->interval_tsc = spdk_get_ticks_hz() / 100;
4294 : ch->prev_stat = bdev_alloc_io_stat(false);
4295 : if (ch->prev_stat == NULL) {
4296 : bdev_channel_destroy_resource(ch);
4297 : return -1;
4298 : }
4299 : }
4300 : #endif
4301 :
4302 75 : spdk_spin_lock(&bdev->internal.spinlock);
4303 75 : bdev_enable_qos(bdev, ch);
4304 :
4305 76 : TAILQ_FOREACH(range, &bdev->internal.locked_ranges, tailq) {
4306 : struct lba_range *new_range;
4307 :
4308 1 : new_range = calloc(1, sizeof(*new_range));
4309 1 : if (new_range == NULL) {
4310 0 : spdk_spin_unlock(&bdev->internal.spinlock);
4311 0 : bdev_channel_destroy_resource(ch);
4312 0 : return -1;
4313 : }
4314 1 : new_range->length = range->length;
4315 1 : new_range->offset = range->offset;
4316 1 : new_range->locked_ctx = range->locked_ctx;
4317 1 : TAILQ_INSERT_TAIL(&ch->locked_ranges, new_range, tailq);
4318 1 : }
4319 :
4320 75 : spdk_spin_unlock(&bdev->internal.spinlock);
4321 :
4322 75 : return 0;
4323 77 : }
4324 :
4325 : static int
4326 0 : bdev_abort_all_buf_io_cb(struct spdk_iobuf_channel *ch, struct spdk_iobuf_entry *entry,
4327 : void *cb_ctx)
4328 : {
4329 0 : struct spdk_bdev_channel *bdev_ch = cb_ctx;
4330 : struct spdk_bdev_io *bdev_io;
4331 : uint64_t buf_len;
4332 :
4333 0 : bdev_io = SPDK_CONTAINEROF(entry, struct spdk_bdev_io, internal.iobuf);
4334 0 : if (bdev_io->internal.ch == bdev_ch) {
4335 0 : buf_len = bdev_io_get_max_buf_len(bdev_io, bdev_io->internal.buf.len);
4336 0 : spdk_iobuf_entry_abort(ch, entry, buf_len);
4337 0 : spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_ABORTED);
4338 0 : }
4339 :
4340 0 : return 0;
4341 : }
4342 :
4343 : /*
4344 : * Abort I/O that are waiting on a data buffer.
4345 : */
4346 : static void
4347 98 : bdev_abort_all_buf_io(struct spdk_bdev_mgmt_channel *mgmt_ch, struct spdk_bdev_channel *ch)
4348 : {
4349 98 : spdk_iobuf_for_each_entry(&mgmt_ch->iobuf, bdev_abort_all_buf_io_cb, ch);
4350 98 : }
4351 :
4352 : /*
4353 : * Abort I/O that are queued waiting for submission. These types of I/O are
4354 : * linked using the spdk_bdev_io link TAILQ_ENTRY.
4355 : */
4356 : static void
4357 117 : bdev_abort_all_queued_io(bdev_io_tailq_t *queue, struct spdk_bdev_channel *ch)
4358 : {
4359 : struct spdk_bdev_io *bdev_io, *tmp;
4360 :
4361 156 : TAILQ_FOREACH_SAFE(bdev_io, queue, internal.link, tmp) {
4362 39 : if (bdev_io->internal.ch == ch) {
4363 39 : TAILQ_REMOVE(queue, bdev_io, internal.link);
4364 : /*
4365 : * spdk_bdev_io_complete() assumes that the completed I/O had
4366 : * been submitted to the bdev module. Since in this case it
4367 : * hadn't, bump io_outstanding to account for the decrement
4368 : * that spdk_bdev_io_complete() will do.
4369 : */
4370 39 : if (bdev_io->type != SPDK_BDEV_IO_TYPE_RESET) {
4371 39 : bdev_io_increment_outstanding(ch, ch->shared_resource);
4372 39 : }
4373 39 : spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_ABORTED);
4374 39 : }
4375 39 : }
4376 117 : }
4377 :
4378 : static bool
4379 18 : bdev_abort_queued_io(bdev_io_tailq_t *queue, struct spdk_bdev_io *bio_to_abort)
4380 : {
4381 : struct spdk_bdev_io *bdev_io;
4382 :
4383 18 : TAILQ_FOREACH(bdev_io, queue, internal.link) {
4384 0 : if (bdev_io == bio_to_abort) {
4385 0 : TAILQ_REMOVE(queue, bio_to_abort, internal.link);
4386 0 : spdk_bdev_io_complete(bio_to_abort, SPDK_BDEV_IO_STATUS_ABORTED);
4387 0 : return true;
4388 : }
4389 0 : }
4390 :
4391 18 : return false;
4392 18 : }
4393 :
4394 : static int
4395 0 : bdev_abort_buf_io_cb(struct spdk_iobuf_channel *ch, struct spdk_iobuf_entry *entry, void *cb_ctx)
4396 : {
4397 0 : struct spdk_bdev_io *bdev_io, *bio_to_abort = cb_ctx;
4398 : uint64_t buf_len;
4399 :
4400 0 : bdev_io = SPDK_CONTAINEROF(entry, struct spdk_bdev_io, internal.iobuf);
4401 0 : if (bdev_io == bio_to_abort) {
4402 0 : buf_len = bdev_io_get_max_buf_len(bdev_io, bdev_io->internal.buf.len);
4403 0 : spdk_iobuf_entry_abort(ch, entry, buf_len);
4404 0 : spdk_bdev_io_complete(bio_to_abort, SPDK_BDEV_IO_STATUS_ABORTED);
4405 0 : return 1;
4406 : }
4407 :
4408 0 : return 0;
4409 0 : }
4410 :
4411 : static bool
4412 16 : bdev_abort_buf_io(struct spdk_bdev_mgmt_channel *mgmt_ch, struct spdk_bdev_io *bio_to_abort)
4413 : {
4414 : int rc;
4415 :
4416 16 : rc = spdk_iobuf_for_each_entry(&mgmt_ch->iobuf, bdev_abort_buf_io_cb, bio_to_abort);
4417 16 : return rc == 1;
4418 : }
4419 :
4420 : static void
4421 7 : bdev_qos_channel_destroy(void *cb_arg)
4422 : {
4423 7 : struct spdk_bdev_qos *qos = cb_arg;
4424 :
4425 7 : spdk_put_io_channel(spdk_io_channel_from_ctx(qos->ch));
4426 7 : spdk_poller_unregister(&qos->poller);
4427 :
4428 7 : SPDK_DEBUGLOG(bdev, "Free QoS %p.\n", qos);
4429 :
4430 7 : free(qos);
4431 7 : }
4432 :
4433 : static int
4434 7 : bdev_qos_destroy(struct spdk_bdev *bdev)
4435 : {
4436 : int i;
4437 :
4438 : /*
4439 : * Cleanly shutting down the QoS poller is tricky, because
4440 : * during the asynchronous operation the user could open
4441 : * a new descriptor and create a new channel, spawning
4442 : * a new QoS poller.
4443 : *
4444 : * The strategy is to create a new QoS structure here and swap it
4445 : * in. The shutdown path then continues to refer to the old one
4446 : * until it completes and then releases it.
4447 : */
4448 : struct spdk_bdev_qos *new_qos, *old_qos;
4449 :
4450 7 : old_qos = bdev->internal.qos;
4451 :
4452 7 : new_qos = calloc(1, sizeof(*new_qos));
4453 7 : if (!new_qos) {
4454 0 : SPDK_ERRLOG("Unable to allocate memory to shut down QoS.\n");
4455 0 : return -ENOMEM;
4456 : }
4457 :
4458 : /* Copy the old QoS data into the newly allocated structure */
4459 7 : memcpy(new_qos, old_qos, sizeof(*new_qos));
4460 :
4461 : /* Zero out the key parts of the QoS structure */
4462 7 : new_qos->ch = NULL;
4463 7 : new_qos->thread = NULL;
4464 7 : new_qos->poller = NULL;
4465 : /*
4466 : * The limit member of spdk_bdev_qos_limit structure is not zeroed.
4467 : * It will be used later for the new QoS structure.
4468 : */
4469 35 : for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
4470 28 : new_qos->rate_limits[i].remaining_this_timeslice = 0;
4471 28 : new_qos->rate_limits[i].min_per_timeslice = 0;
4472 28 : new_qos->rate_limits[i].max_per_timeslice = 0;
4473 28 : }
4474 :
4475 7 : bdev->internal.qos = new_qos;
4476 :
4477 7 : if (old_qos->thread == NULL) {
4478 0 : free(old_qos);
4479 0 : } else {
4480 7 : spdk_thread_send_msg(old_qos->thread, bdev_qos_channel_destroy, old_qos);
4481 : }
4482 :
4483 : /* It is safe to continue with destroying the bdev even though the QoS channel hasn't
4484 : * been destroyed yet. The destruction path will end up waiting for the final
4485 : * channel to be put before it releases resources. */
4486 :
4487 7 : return 0;
4488 7 : }
4489 :
4490 : void
4491 79 : spdk_bdev_add_io_stat(struct spdk_bdev_io_stat *total, struct spdk_bdev_io_stat *add)
4492 : {
4493 79 : total->bytes_read += add->bytes_read;
4494 79 : total->num_read_ops += add->num_read_ops;
4495 79 : total->bytes_written += add->bytes_written;
4496 79 : total->num_write_ops += add->num_write_ops;
4497 79 : total->bytes_unmapped += add->bytes_unmapped;
4498 79 : total->num_unmap_ops += add->num_unmap_ops;
4499 79 : total->bytes_copied += add->bytes_copied;
4500 79 : total->num_copy_ops += add->num_copy_ops;
4501 79 : total->read_latency_ticks += add->read_latency_ticks;
4502 79 : total->write_latency_ticks += add->write_latency_ticks;
4503 79 : total->unmap_latency_ticks += add->unmap_latency_ticks;
4504 79 : total->copy_latency_ticks += add->copy_latency_ticks;
4505 79 : if (total->max_read_latency_ticks < add->max_read_latency_ticks) {
4506 7 : total->max_read_latency_ticks = add->max_read_latency_ticks;
4507 7 : }
4508 79 : if (total->min_read_latency_ticks > add->min_read_latency_ticks) {
4509 39 : total->min_read_latency_ticks = add->min_read_latency_ticks;
4510 39 : }
4511 79 : if (total->max_write_latency_ticks < add->max_write_latency_ticks) {
4512 4 : total->max_write_latency_ticks = add->max_write_latency_ticks;
4513 4 : }
4514 79 : if (total->min_write_latency_ticks > add->min_write_latency_ticks) {
4515 24 : total->min_write_latency_ticks = add->min_write_latency_ticks;
4516 24 : }
4517 79 : if (total->max_unmap_latency_ticks < add->max_unmap_latency_ticks) {
4518 0 : total->max_unmap_latency_ticks = add->max_unmap_latency_ticks;
4519 0 : }
4520 79 : if (total->min_unmap_latency_ticks > add->min_unmap_latency_ticks) {
4521 3 : total->min_unmap_latency_ticks = add->min_unmap_latency_ticks;
4522 3 : }
4523 79 : if (total->max_copy_latency_ticks < add->max_copy_latency_ticks) {
4524 0 : total->max_copy_latency_ticks = add->max_copy_latency_ticks;
4525 0 : }
4526 79 : if (total->min_copy_latency_ticks > add->min_copy_latency_ticks) {
4527 4 : total->min_copy_latency_ticks = add->min_copy_latency_ticks;
4528 4 : }
4529 79 : }
4530 :
4531 : static void
4532 5 : bdev_get_io_stat(struct spdk_bdev_io_stat *to_stat, struct spdk_bdev_io_stat *from_stat)
4533 : {
4534 5 : memcpy(to_stat, from_stat, offsetof(struct spdk_bdev_io_stat, io_error));
4535 :
4536 5 : if (to_stat->io_error != NULL && from_stat->io_error != NULL) {
4537 0 : memcpy(to_stat->io_error, from_stat->io_error,
4538 : sizeof(struct spdk_bdev_io_error_stat));
4539 0 : }
4540 5 : }
4541 :
4542 : void
4543 215 : spdk_bdev_reset_io_stat(struct spdk_bdev_io_stat *stat, enum spdk_bdev_reset_stat_mode mode)
4544 : {
4545 215 : if (mode == SPDK_BDEV_RESET_STAT_NONE) {
4546 5 : return;
4547 : }
4548 :
4549 210 : stat->max_read_latency_ticks = 0;
4550 210 : stat->min_read_latency_ticks = UINT64_MAX;
4551 210 : stat->max_write_latency_ticks = 0;
4552 210 : stat->min_write_latency_ticks = UINT64_MAX;
4553 210 : stat->max_unmap_latency_ticks = 0;
4554 210 : stat->min_unmap_latency_ticks = UINT64_MAX;
4555 210 : stat->max_copy_latency_ticks = 0;
4556 210 : stat->min_copy_latency_ticks = UINT64_MAX;
4557 :
4558 210 : if (mode != SPDK_BDEV_RESET_STAT_ALL) {
4559 2 : return;
4560 : }
4561 :
4562 208 : stat->bytes_read = 0;
4563 208 : stat->num_read_ops = 0;
4564 208 : stat->bytes_written = 0;
4565 208 : stat->num_write_ops = 0;
4566 208 : stat->bytes_unmapped = 0;
4567 208 : stat->num_unmap_ops = 0;
4568 208 : stat->bytes_copied = 0;
4569 208 : stat->num_copy_ops = 0;
4570 208 : stat->read_latency_ticks = 0;
4571 208 : stat->write_latency_ticks = 0;
4572 208 : stat->unmap_latency_ticks = 0;
4573 208 : stat->copy_latency_ticks = 0;
4574 :
4575 208 : if (stat->io_error != NULL) {
4576 132 : memset(stat->io_error, 0, sizeof(struct spdk_bdev_io_error_stat));
4577 132 : }
4578 215 : }
4579 :
4580 : struct spdk_bdev_io_stat *
4581 206 : bdev_alloc_io_stat(bool io_error_stat)
4582 : {
4583 : struct spdk_bdev_io_stat *stat;
4584 :
4585 206 : stat = malloc(sizeof(struct spdk_bdev_io_stat));
4586 206 : if (stat == NULL) {
4587 0 : return NULL;
4588 : }
4589 :
4590 206 : if (io_error_stat) {
4591 131 : stat->io_error = malloc(sizeof(struct spdk_bdev_io_error_stat));
4592 131 : if (stat->io_error == NULL) {
4593 0 : free(stat);
4594 0 : return NULL;
4595 : }
4596 131 : } else {
4597 75 : stat->io_error = NULL;
4598 : }
4599 :
4600 206 : spdk_bdev_reset_io_stat(stat, SPDK_BDEV_RESET_STAT_ALL);
4601 :
4602 206 : return stat;
4603 206 : }
4604 :
4605 : void
4606 206 : bdev_free_io_stat(struct spdk_bdev_io_stat *stat)
4607 : {
4608 206 : if (stat != NULL) {
4609 206 : free(stat->io_error);
4610 206 : free(stat);
4611 206 : }
4612 206 : }
4613 :
4614 : void
4615 0 : spdk_bdev_dump_io_stat_json(struct spdk_bdev_io_stat *stat, struct spdk_json_write_ctx *w)
4616 : {
4617 : int i;
4618 :
4619 0 : spdk_json_write_named_uint64(w, "bytes_read", stat->bytes_read);
4620 0 : spdk_json_write_named_uint64(w, "num_read_ops", stat->num_read_ops);
4621 0 : spdk_json_write_named_uint64(w, "bytes_written", stat->bytes_written);
4622 0 : spdk_json_write_named_uint64(w, "num_write_ops", stat->num_write_ops);
4623 0 : spdk_json_write_named_uint64(w, "bytes_unmapped", stat->bytes_unmapped);
4624 0 : spdk_json_write_named_uint64(w, "num_unmap_ops", stat->num_unmap_ops);
4625 0 : spdk_json_write_named_uint64(w, "bytes_copied", stat->bytes_copied);
4626 0 : spdk_json_write_named_uint64(w, "num_copy_ops", stat->num_copy_ops);
4627 0 : spdk_json_write_named_uint64(w, "read_latency_ticks", stat->read_latency_ticks);
4628 0 : spdk_json_write_named_uint64(w, "max_read_latency_ticks", stat->max_read_latency_ticks);
4629 0 : spdk_json_write_named_uint64(w, "min_read_latency_ticks",
4630 0 : stat->min_read_latency_ticks != UINT64_MAX ?
4631 0 : stat->min_read_latency_ticks : 0);
4632 0 : spdk_json_write_named_uint64(w, "write_latency_ticks", stat->write_latency_ticks);
4633 0 : spdk_json_write_named_uint64(w, "max_write_latency_ticks", stat->max_write_latency_ticks);
4634 0 : spdk_json_write_named_uint64(w, "min_write_latency_ticks",
4635 0 : stat->min_write_latency_ticks != UINT64_MAX ?
4636 0 : stat->min_write_latency_ticks : 0);
4637 0 : spdk_json_write_named_uint64(w, "unmap_latency_ticks", stat->unmap_latency_ticks);
4638 0 : spdk_json_write_named_uint64(w, "max_unmap_latency_ticks", stat->max_unmap_latency_ticks);
4639 0 : spdk_json_write_named_uint64(w, "min_unmap_latency_ticks",
4640 0 : stat->min_unmap_latency_ticks != UINT64_MAX ?
4641 0 : stat->min_unmap_latency_ticks : 0);
4642 0 : spdk_json_write_named_uint64(w, "copy_latency_ticks", stat->copy_latency_ticks);
4643 0 : spdk_json_write_named_uint64(w, "max_copy_latency_ticks", stat->max_copy_latency_ticks);
4644 0 : spdk_json_write_named_uint64(w, "min_copy_latency_ticks",
4645 0 : stat->min_copy_latency_ticks != UINT64_MAX ?
4646 0 : stat->min_copy_latency_ticks : 0);
4647 :
4648 0 : if (stat->io_error != NULL) {
4649 0 : spdk_json_write_named_object_begin(w, "io_error");
4650 0 : for (i = 0; i < -SPDK_MIN_BDEV_IO_STATUS; i++) {
4651 0 : if (stat->io_error->error_status[i] != 0) {
4652 0 : spdk_json_write_named_uint32(w, bdev_io_status_get_string(-(i + 1)),
4653 0 : stat->io_error->error_status[i]);
4654 0 : }
4655 0 : }
4656 0 : spdk_json_write_object_end(w);
4657 0 : }
4658 0 : }
4659 :
4660 : static void
4661 79 : bdev_channel_abort_queued_ios(struct spdk_bdev_channel *ch)
4662 : {
4663 79 : struct spdk_bdev_shared_resource *shared_resource = ch->shared_resource;
4664 79 : struct spdk_bdev_mgmt_channel *mgmt_ch = shared_resource->mgmt_ch;
4665 :
4666 79 : bdev_abort_all_queued_io(&shared_resource->nomem_io, ch);
4667 79 : bdev_abort_all_buf_io(mgmt_ch, ch);
4668 79 : }
4669 :
4670 : static void
4671 75 : bdev_channel_destroy(void *io_device, void *ctx_buf)
4672 : {
4673 75 : struct spdk_bdev_channel *ch = ctx_buf;
4674 :
4675 75 : SPDK_DEBUGLOG(bdev, "Destroying channel %p for bdev %s on thread %p\n", ch, ch->bdev->name,
4676 : spdk_get_thread());
4677 :
4678 75 : spdk_trace_record(TRACE_BDEV_IOCH_DESTROY, ch->bdev->internal.trace_id, 0, 0,
4679 : spdk_thread_get_id(spdk_io_channel_get_thread(ch->channel)));
4680 :
4681 : /* This channel is going away, so add its statistics into the bdev so that they don't get lost. */
4682 75 : spdk_spin_lock(&ch->bdev->internal.spinlock);
4683 75 : spdk_bdev_add_io_stat(ch->bdev->internal.stat, ch->stat);
4684 75 : spdk_spin_unlock(&ch->bdev->internal.spinlock);
4685 :
4686 75 : bdev_channel_abort_queued_ios(ch);
4687 :
4688 75 : if (ch->histogram) {
4689 0 : spdk_histogram_data_free(ch->histogram);
4690 0 : }
4691 :
4692 75 : bdev_channel_destroy_resource(ch);
4693 75 : }
4694 :
4695 : /*
4696 : * If the name already exists in the global bdev name tree, RB_INSERT() returns a pointer
4697 : * to it. Hence we do not have to call bdev_get_by_name() when using this function.
4698 : */
4699 : static int
4700 265 : bdev_name_add(struct spdk_bdev_name *bdev_name, struct spdk_bdev *bdev, const char *name)
4701 : {
4702 : struct spdk_bdev_name *tmp;
4703 :
4704 265 : bdev_name->name = strdup(name);
4705 265 : if (bdev_name->name == NULL) {
4706 0 : SPDK_ERRLOG("Unable to allocate bdev name\n");
4707 0 : return -ENOMEM;
4708 : }
4709 :
4710 265 : bdev_name->bdev = bdev;
4711 :
4712 265 : spdk_spin_lock(&g_bdev_mgr.spinlock);
4713 265 : tmp = RB_INSERT(bdev_name_tree, &g_bdev_mgr.bdev_names, bdev_name);
4714 265 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
4715 :
4716 265 : if (tmp != NULL) {
4717 4 : SPDK_ERRLOG("Bdev name %s already exists\n", name);
4718 4 : free(bdev_name->name);
4719 4 : return -EEXIST;
4720 : }
4721 :
4722 261 : return 0;
4723 265 : }
4724 :
4725 : static void
4726 261 : bdev_name_del_unsafe(struct spdk_bdev_name *bdev_name)
4727 : {
4728 261 : RB_REMOVE(bdev_name_tree, &g_bdev_mgr.bdev_names, bdev_name);
4729 261 : free(bdev_name->name);
4730 261 : }
4731 :
4732 : static void
4733 5 : bdev_name_del(struct spdk_bdev_name *bdev_name)
4734 : {
4735 5 : spdk_spin_lock(&g_bdev_mgr.spinlock);
4736 5 : bdev_name_del_unsafe(bdev_name);
4737 5 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
4738 5 : }
4739 :
4740 : int
4741 137 : spdk_bdev_alias_add(struct spdk_bdev *bdev, const char *alias)
4742 : {
4743 : struct spdk_bdev_alias *tmp;
4744 : int ret;
4745 :
4746 137 : if (alias == NULL) {
4747 1 : SPDK_ERRLOG("Empty alias passed\n");
4748 1 : return -EINVAL;
4749 : }
4750 :
4751 136 : tmp = calloc(1, sizeof(*tmp));
4752 136 : if (tmp == NULL) {
4753 0 : SPDK_ERRLOG("Unable to allocate alias\n");
4754 0 : return -ENOMEM;
4755 : }
4756 :
4757 136 : ret = bdev_name_add(&tmp->alias, bdev, alias);
4758 136 : if (ret != 0) {
4759 4 : free(tmp);
4760 4 : return ret;
4761 : }
4762 :
4763 132 : TAILQ_INSERT_TAIL(&bdev->aliases, tmp, tailq);
4764 :
4765 132 : return 0;
4766 137 : }
4767 :
4768 : static int
4769 133 : bdev_alias_del(struct spdk_bdev *bdev, const char *alias,
4770 : void (*alias_del_fn)(struct spdk_bdev_name *n))
4771 : {
4772 : struct spdk_bdev_alias *tmp;
4773 :
4774 138 : TAILQ_FOREACH(tmp, &bdev->aliases, tailq) {
4775 134 : if (strcmp(alias, tmp->alias.name) == 0) {
4776 129 : TAILQ_REMOVE(&bdev->aliases, tmp, tailq);
4777 129 : alias_del_fn(&tmp->alias);
4778 129 : free(tmp);
4779 129 : return 0;
4780 : }
4781 5 : }
4782 :
4783 4 : return -ENOENT;
4784 133 : }
4785 :
4786 : int
4787 4 : spdk_bdev_alias_del(struct spdk_bdev *bdev, const char *alias)
4788 : {
4789 : int rc;
4790 :
4791 4 : rc = bdev_alias_del(bdev, alias, bdev_name_del);
4792 4 : if (rc == -ENOENT) {
4793 2 : SPDK_INFOLOG(bdev, "Alias %s does not exist\n", alias);
4794 2 : }
4795 :
4796 4 : return rc;
4797 : }
4798 :
4799 : void
4800 2 : spdk_bdev_alias_del_all(struct spdk_bdev *bdev)
4801 : {
4802 : struct spdk_bdev_alias *p, *tmp;
4803 :
4804 5 : TAILQ_FOREACH_SAFE(p, &bdev->aliases, tailq, tmp) {
4805 3 : TAILQ_REMOVE(&bdev->aliases, p, tailq);
4806 3 : bdev_name_del(&p->alias);
4807 3 : free(p);
4808 3 : }
4809 2 : }
4810 :
4811 : struct spdk_io_channel *
4812 77 : spdk_bdev_get_io_channel(struct spdk_bdev_desc *desc)
4813 : {
4814 77 : return spdk_get_io_channel(__bdev_to_io_dev(spdk_bdev_desc_get_bdev(desc)));
4815 : }
4816 :
4817 : void *
4818 0 : spdk_bdev_get_module_ctx(struct spdk_bdev_desc *desc)
4819 : {
4820 0 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
4821 0 : void *ctx = NULL;
4822 :
4823 0 : if (bdev->fn_table->get_module_ctx) {
4824 0 : ctx = bdev->fn_table->get_module_ctx(bdev->ctxt);
4825 0 : }
4826 :
4827 0 : return ctx;
4828 : }
4829 :
4830 : const char *
4831 0 : spdk_bdev_get_module_name(const struct spdk_bdev *bdev)
4832 : {
4833 0 : return bdev->module->name;
4834 : }
4835 :
4836 : const char *
4837 261 : spdk_bdev_get_name(const struct spdk_bdev *bdev)
4838 : {
4839 261 : return bdev->name;
4840 : }
4841 :
4842 : const char *
4843 0 : spdk_bdev_get_product_name(const struct spdk_bdev *bdev)
4844 : {
4845 0 : return bdev->product_name;
4846 : }
4847 :
4848 : const struct spdk_bdev_aliases_list *
4849 0 : spdk_bdev_get_aliases(const struct spdk_bdev *bdev)
4850 : {
4851 0 : return &bdev->aliases;
4852 : }
4853 :
4854 : uint32_t
4855 5 : spdk_bdev_get_block_size(const struct spdk_bdev *bdev)
4856 : {
4857 5 : return bdev->blocklen;
4858 : }
4859 :
4860 : uint32_t
4861 0 : spdk_bdev_get_write_unit_size(const struct spdk_bdev *bdev)
4862 : {
4863 0 : return bdev->write_unit_size;
4864 : }
4865 :
4866 : uint64_t
4867 0 : spdk_bdev_get_num_blocks(const struct spdk_bdev *bdev)
4868 : {
4869 0 : return bdev->blockcnt;
4870 : }
4871 :
4872 : const char *
4873 0 : spdk_bdev_get_qos_rpc_type(enum spdk_bdev_qos_rate_limit_type type)
4874 : {
4875 0 : return qos_rpc_type[type];
4876 : }
4877 :
4878 : void
4879 0 : spdk_bdev_get_qos_rate_limits(struct spdk_bdev *bdev, uint64_t *limits)
4880 : {
4881 : int i;
4882 :
4883 0 : memset(limits, 0, sizeof(*limits) * SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES);
4884 :
4885 0 : spdk_spin_lock(&bdev->internal.spinlock);
4886 0 : if (bdev->internal.qos) {
4887 0 : for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
4888 0 : if (bdev->internal.qos->rate_limits[i].limit !=
4889 : SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) {
4890 0 : limits[i] = bdev->internal.qos->rate_limits[i].limit;
4891 0 : if (bdev_qos_is_iops_rate_limit(i) == false) {
4892 : /* Change from Byte to Megabyte which is user visible. */
4893 0 : limits[i] = limits[i] / 1024 / 1024;
4894 0 : }
4895 0 : }
4896 0 : }
4897 0 : }
4898 0 : spdk_spin_unlock(&bdev->internal.spinlock);
4899 0 : }
4900 :
4901 : size_t
4902 321 : spdk_bdev_get_buf_align(const struct spdk_bdev *bdev)
4903 : {
4904 321 : return 1 << bdev->required_alignment;
4905 : }
4906 :
4907 : uint32_t
4908 0 : spdk_bdev_get_optimal_io_boundary(const struct spdk_bdev *bdev)
4909 : {
4910 0 : return bdev->optimal_io_boundary;
4911 : }
4912 :
4913 : bool
4914 0 : spdk_bdev_has_write_cache(const struct spdk_bdev *bdev)
4915 : {
4916 0 : return bdev->write_cache;
4917 : }
4918 :
4919 : const struct spdk_uuid *
4920 0 : spdk_bdev_get_uuid(const struct spdk_bdev *bdev)
4921 : {
4922 0 : return &bdev->uuid;
4923 : }
4924 :
4925 : uint16_t
4926 0 : spdk_bdev_get_acwu(const struct spdk_bdev *bdev)
4927 : {
4928 0 : return bdev->acwu;
4929 : }
4930 :
4931 : uint32_t
4932 29 : spdk_bdev_get_md_size(const struct spdk_bdev *bdev)
4933 : {
4934 29 : return bdev->md_len;
4935 : }
4936 :
4937 : bool
4938 134 : spdk_bdev_is_md_interleaved(const struct spdk_bdev *bdev)
4939 : {
4940 134 : return (bdev->md_len != 0) && bdev->md_interleave;
4941 : }
4942 :
4943 : bool
4944 159 : spdk_bdev_is_md_separate(const struct spdk_bdev *bdev)
4945 : {
4946 159 : return (bdev->md_len != 0) && !bdev->md_interleave;
4947 : }
4948 :
4949 : bool
4950 0 : spdk_bdev_is_zoned(const struct spdk_bdev *bdev)
4951 : {
4952 0 : return bdev->zoned;
4953 : }
4954 :
4955 : uint32_t
4956 125 : spdk_bdev_get_data_block_size(const struct spdk_bdev *bdev)
4957 : {
4958 125 : if (spdk_bdev_is_md_interleaved(bdev)) {
4959 0 : return bdev->blocklen - bdev->md_len;
4960 : } else {
4961 125 : return bdev->blocklen;
4962 : }
4963 125 : }
4964 :
4965 : uint32_t
4966 0 : spdk_bdev_get_physical_block_size(const struct spdk_bdev *bdev)
4967 : {
4968 0 : return bdev->phys_blocklen;
4969 : }
4970 :
4971 : static uint32_t
4972 9 : _bdev_get_block_size_with_md(const struct spdk_bdev *bdev)
4973 : {
4974 9 : if (!spdk_bdev_is_md_interleaved(bdev)) {
4975 6 : return bdev->blocklen + bdev->md_len;
4976 : } else {
4977 3 : return bdev->blocklen;
4978 : }
4979 9 : }
4980 :
4981 : /* We have to use the typedef in the function declaration to appease astyle. */
4982 : typedef enum spdk_dif_type spdk_dif_type_t;
4983 : typedef enum spdk_dif_pi_format spdk_dif_pi_format_t;
4984 :
4985 : spdk_dif_type_t
4986 0 : spdk_bdev_get_dif_type(const struct spdk_bdev *bdev)
4987 : {
4988 0 : if (bdev->md_len != 0) {
4989 0 : return bdev->dif_type;
4990 : } else {
4991 0 : return SPDK_DIF_DISABLE;
4992 : }
4993 0 : }
4994 :
4995 : spdk_dif_pi_format_t
4996 0 : spdk_bdev_get_dif_pi_format(const struct spdk_bdev *bdev)
4997 : {
4998 0 : return bdev->dif_pi_format;
4999 : }
5000 :
5001 : bool
5002 0 : spdk_bdev_is_dif_head_of_md(const struct spdk_bdev *bdev)
5003 : {
5004 0 : if (spdk_bdev_get_dif_type(bdev) != SPDK_DIF_DISABLE) {
5005 0 : return bdev->dif_is_head_of_md;
5006 : } else {
5007 0 : return false;
5008 : }
5009 0 : }
5010 :
5011 : bool
5012 0 : spdk_bdev_is_dif_check_enabled(const struct spdk_bdev *bdev,
5013 : enum spdk_dif_check_type check_type)
5014 : {
5015 0 : if (spdk_bdev_get_dif_type(bdev) == SPDK_DIF_DISABLE) {
5016 0 : return false;
5017 : }
5018 :
5019 0 : switch (check_type) {
5020 : case SPDK_DIF_CHECK_TYPE_REFTAG:
5021 0 : return (bdev->dif_check_flags & SPDK_DIF_FLAGS_REFTAG_CHECK) != 0;
5022 : case SPDK_DIF_CHECK_TYPE_APPTAG:
5023 0 : return (bdev->dif_check_flags & SPDK_DIF_FLAGS_APPTAG_CHECK) != 0;
5024 : case SPDK_DIF_CHECK_TYPE_GUARD:
5025 0 : return (bdev->dif_check_flags & SPDK_DIF_FLAGS_GUARD_CHECK) != 0;
5026 : default:
5027 0 : return false;
5028 : }
5029 0 : }
5030 :
5031 : static uint32_t
5032 3 : bdev_get_max_write(const struct spdk_bdev *bdev, uint64_t num_bytes)
5033 : {
5034 : uint64_t aligned_length, max_write_blocks;
5035 :
5036 3 : aligned_length = num_bytes - (spdk_bdev_get_buf_align(bdev) - 1);
5037 3 : max_write_blocks = aligned_length / _bdev_get_block_size_with_md(bdev);
5038 3 : max_write_blocks -= max_write_blocks % bdev->write_unit_size;
5039 :
5040 3 : return max_write_blocks;
5041 : }
5042 :
5043 : uint32_t
5044 1 : spdk_bdev_get_max_copy(const struct spdk_bdev *bdev)
5045 : {
5046 1 : return bdev->max_copy;
5047 : }
5048 :
5049 : uint64_t
5050 0 : spdk_bdev_get_qd(const struct spdk_bdev *bdev)
5051 : {
5052 0 : return bdev->internal.measured_queue_depth;
5053 : }
5054 :
5055 : uint64_t
5056 0 : spdk_bdev_get_qd_sampling_period(const struct spdk_bdev *bdev)
5057 : {
5058 0 : return bdev->internal.period;
5059 : }
5060 :
5061 : uint64_t
5062 0 : spdk_bdev_get_weighted_io_time(const struct spdk_bdev *bdev)
5063 : {
5064 0 : return bdev->internal.weighted_io_time;
5065 : }
5066 :
5067 : uint64_t
5068 0 : spdk_bdev_get_io_time(const struct spdk_bdev *bdev)
5069 : {
5070 0 : return bdev->internal.io_time;
5071 : }
5072 :
5073 0 : union spdk_bdev_nvme_ctratt spdk_bdev_get_nvme_ctratt(struct spdk_bdev *bdev)
5074 : {
5075 0 : return bdev->ctratt;
5076 : }
5077 :
5078 : uint32_t
5079 0 : spdk_bdev_get_nvme_nsid(struct spdk_bdev *bdev)
5080 : {
5081 0 : return bdev->nsid;
5082 : }
5083 :
5084 : uint32_t
5085 0 : spdk_bdev_desc_get_block_size(struct spdk_bdev_desc *desc)
5086 : {
5087 0 : struct spdk_bdev *bdev = desc->bdev;
5088 :
5089 0 : return desc->opts.hide_metadata ? bdev->blocklen - bdev->md_len : bdev->blocklen;
5090 : }
5091 :
5092 : uint32_t
5093 0 : spdk_bdev_desc_get_md_size(struct spdk_bdev_desc *desc)
5094 : {
5095 0 : struct spdk_bdev *bdev = desc->bdev;
5096 :
5097 0 : return desc->opts.hide_metadata ? 0 : bdev->md_len;
5098 : }
5099 :
5100 : bool
5101 0 : spdk_bdev_desc_is_md_interleaved(struct spdk_bdev_desc *desc)
5102 : {
5103 0 : struct spdk_bdev *bdev = desc->bdev;
5104 :
5105 0 : return desc->opts.hide_metadata ? false : spdk_bdev_is_md_interleaved(bdev);
5106 : }
5107 :
5108 : bool
5109 0 : spdk_bdev_desc_is_md_separate(struct spdk_bdev_desc *desc)
5110 : {
5111 0 : struct spdk_bdev *bdev = desc->bdev;
5112 :
5113 0 : return desc->opts.hide_metadata ? false : spdk_bdev_is_md_separate(bdev);
5114 : }
5115 :
5116 : spdk_dif_type_t
5117 0 : spdk_bdev_desc_get_dif_type(struct spdk_bdev_desc *desc)
5118 : {
5119 0 : struct spdk_bdev *bdev = desc->bdev;
5120 :
5121 0 : return desc->opts.hide_metadata ? SPDK_DIF_DISABLE : spdk_bdev_get_dif_type(bdev);
5122 : }
5123 :
5124 : spdk_dif_pi_format_t
5125 0 : spdk_bdev_desc_get_dif_pi_format(struct spdk_bdev_desc *desc)
5126 : {
5127 0 : struct spdk_bdev *bdev = desc->bdev;
5128 :
5129 0 : return desc->opts.hide_metadata ? SPDK_DIF_PI_FORMAT_16 : spdk_bdev_get_dif_pi_format(bdev);
5130 : }
5131 :
5132 : bool
5133 0 : spdk_bdev_desc_is_dif_head_of_md(struct spdk_bdev_desc *desc)
5134 : {
5135 0 : struct spdk_bdev *bdev = desc->bdev;
5136 :
5137 0 : return desc->opts.hide_metadata ? false : spdk_bdev_is_dif_head_of_md(bdev);
5138 : }
5139 :
5140 : bool
5141 0 : spdk_bdev_desc_is_dif_check_enabled(struct spdk_bdev_desc *desc,
5142 : enum spdk_dif_check_type check_type)
5143 : {
5144 0 : struct spdk_bdev *bdev = desc->bdev;
5145 :
5146 0 : return desc->opts.hide_metadata ? false : spdk_bdev_is_dif_check_enabled(bdev, check_type);
5147 : }
5148 :
5149 : static void bdev_update_qd_sampling_period(void *ctx);
5150 :
5151 : static void
5152 1 : _calculate_measured_qd_cpl(struct spdk_bdev *bdev, void *_ctx, int status)
5153 : {
5154 1 : bdev->internal.measured_queue_depth = bdev->internal.temporary_queue_depth;
5155 :
5156 1 : if (bdev->internal.measured_queue_depth) {
5157 0 : bdev->internal.io_time += bdev->internal.period;
5158 0 : bdev->internal.weighted_io_time += bdev->internal.period * bdev->internal.measured_queue_depth;
5159 0 : }
5160 :
5161 1 : bdev->internal.qd_poll_in_progress = false;
5162 :
5163 1 : bdev_update_qd_sampling_period(bdev);
5164 1 : }
5165 :
5166 : static void
5167 1 : _calculate_measured_qd(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
5168 : struct spdk_io_channel *io_ch, void *_ctx)
5169 : {
5170 1 : struct spdk_bdev_channel *ch = __io_ch_to_bdev_ch(io_ch);
5171 :
5172 1 : bdev->internal.temporary_queue_depth += ch->io_outstanding;
5173 1 : spdk_bdev_for_each_channel_continue(i, 0);
5174 1 : }
5175 :
5176 : static int
5177 1 : bdev_calculate_measured_queue_depth(void *ctx)
5178 : {
5179 1 : struct spdk_bdev *bdev = ctx;
5180 :
5181 1 : bdev->internal.qd_poll_in_progress = true;
5182 1 : bdev->internal.temporary_queue_depth = 0;
5183 1 : spdk_bdev_for_each_channel(bdev, _calculate_measured_qd, bdev, _calculate_measured_qd_cpl);
5184 1 : return SPDK_POLLER_BUSY;
5185 : }
5186 :
5187 : static void
5188 5 : bdev_update_qd_sampling_period(void *ctx)
5189 : {
5190 5 : struct spdk_bdev *bdev = ctx;
5191 :
5192 5 : if (bdev->internal.period == bdev->internal.new_period) {
5193 0 : return;
5194 : }
5195 :
5196 5 : if (bdev->internal.qd_poll_in_progress) {
5197 1 : return;
5198 : }
5199 :
5200 4 : bdev->internal.period = bdev->internal.new_period;
5201 :
5202 4 : spdk_poller_unregister(&bdev->internal.qd_poller);
5203 4 : if (bdev->internal.period != 0) {
5204 2 : bdev->internal.qd_poller = SPDK_POLLER_REGISTER(bdev_calculate_measured_queue_depth,
5205 : bdev, bdev->internal.period);
5206 2 : } else {
5207 2 : spdk_bdev_close(bdev->internal.qd_desc);
5208 2 : bdev->internal.qd_desc = NULL;
5209 : }
5210 5 : }
5211 :
5212 : static void
5213 0 : _tmp_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *ctx)
5214 : {
5215 0 : SPDK_NOTICELOG("Unexpected event type: %d\n", type);
5216 0 : }
5217 :
5218 : void
5219 134 : spdk_bdev_set_qd_sampling_period(struct spdk_bdev *bdev, uint64_t period)
5220 : {
5221 : int rc;
5222 :
5223 134 : if (bdev->internal.new_period == period) {
5224 128 : return;
5225 : }
5226 :
5227 6 : bdev->internal.new_period = period;
5228 :
5229 6 : if (bdev->internal.qd_desc != NULL) {
5230 4 : assert(bdev->internal.period != 0);
5231 :
5232 8 : spdk_thread_send_msg(bdev->internal.qd_desc->thread,
5233 4 : bdev_update_qd_sampling_period, bdev);
5234 4 : return;
5235 : }
5236 :
5237 2 : assert(bdev->internal.period == 0);
5238 :
5239 4 : rc = spdk_bdev_open_ext(spdk_bdev_get_name(bdev), false, _tmp_bdev_event_cb,
5240 2 : NULL, &bdev->internal.qd_desc);
5241 2 : if (rc != 0) {
5242 0 : return;
5243 : }
5244 :
5245 2 : bdev->internal.period = period;
5246 2 : bdev->internal.qd_poller = SPDK_POLLER_REGISTER(bdev_calculate_measured_queue_depth,
5247 : bdev, period);
5248 134 : }
5249 :
5250 : struct bdev_get_current_qd_ctx {
5251 : uint64_t current_qd;
5252 : spdk_bdev_get_current_qd_cb cb_fn;
5253 : void *cb_arg;
5254 : };
5255 :
5256 : static void
5257 0 : bdev_get_current_qd_done(struct spdk_bdev *bdev, void *_ctx, int status)
5258 : {
5259 0 : struct bdev_get_current_qd_ctx *ctx = _ctx;
5260 :
5261 0 : ctx->cb_fn(bdev, ctx->current_qd, ctx->cb_arg, 0);
5262 :
5263 0 : free(ctx);
5264 0 : }
5265 :
5266 : static void
5267 0 : bdev_get_current_qd(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
5268 : struct spdk_io_channel *io_ch, void *_ctx)
5269 : {
5270 0 : struct bdev_get_current_qd_ctx *ctx = _ctx;
5271 0 : struct spdk_bdev_channel *bdev_ch = __io_ch_to_bdev_ch(io_ch);
5272 :
5273 0 : ctx->current_qd += bdev_ch->io_outstanding;
5274 :
5275 0 : spdk_bdev_for_each_channel_continue(i, 0);
5276 0 : }
5277 :
5278 : void
5279 0 : spdk_bdev_get_current_qd(struct spdk_bdev *bdev, spdk_bdev_get_current_qd_cb cb_fn,
5280 : void *cb_arg)
5281 : {
5282 : struct bdev_get_current_qd_ctx *ctx;
5283 :
5284 0 : assert(cb_fn != NULL);
5285 :
5286 0 : ctx = calloc(1, sizeof(*ctx));
5287 0 : if (ctx == NULL) {
5288 0 : cb_fn(bdev, 0, cb_arg, -ENOMEM);
5289 0 : return;
5290 : }
5291 :
5292 0 : ctx->cb_fn = cb_fn;
5293 0 : ctx->cb_arg = cb_arg;
5294 :
5295 0 : spdk_bdev_for_each_channel(bdev, bdev_get_current_qd, ctx, bdev_get_current_qd_done);
5296 0 : }
5297 :
5298 : static void
5299 25 : _event_notify(struct spdk_bdev_desc *desc, enum spdk_bdev_event_type type)
5300 : {
5301 25 : assert(desc->thread == spdk_get_thread());
5302 :
5303 25 : spdk_spin_lock(&desc->spinlock);
5304 25 : desc->refs--;
5305 25 : if (!desc->closed) {
5306 14 : spdk_spin_unlock(&desc->spinlock);
5307 28 : desc->callback.event_fn(type,
5308 14 : desc->bdev,
5309 14 : desc->callback.ctx);
5310 14 : return;
5311 11 : } else if (desc->refs == 0) {
5312 : /* This descriptor was closed after this event_notify message was sent.
5313 : * spdk_bdev_close() could not free the descriptor since this message was
5314 : * in flight, so we free it now using bdev_desc_free().
5315 : */
5316 10 : spdk_spin_unlock(&desc->spinlock);
5317 10 : bdev_desc_free(desc);
5318 10 : return;
5319 : }
5320 1 : spdk_spin_unlock(&desc->spinlock);
5321 25 : }
5322 :
5323 : static void
5324 25 : event_notify(struct spdk_bdev_desc *desc, spdk_msg_fn event_notify_fn)
5325 : {
5326 25 : spdk_spin_lock(&desc->spinlock);
5327 25 : desc->refs++;
5328 25 : spdk_thread_send_msg(desc->thread, event_notify_fn, desc);
5329 25 : spdk_spin_unlock(&desc->spinlock);
5330 25 : }
5331 :
5332 : static void
5333 6 : _resize_notify(void *ctx)
5334 : {
5335 6 : struct spdk_bdev_desc *desc = ctx;
5336 :
5337 6 : _event_notify(desc, SPDK_BDEV_EVENT_RESIZE);
5338 6 : }
5339 :
5340 : int
5341 11 : spdk_bdev_notify_blockcnt_change(struct spdk_bdev *bdev, uint64_t size)
5342 : {
5343 : struct spdk_bdev_desc *desc;
5344 : int ret;
5345 :
5346 11 : if (size == bdev->blockcnt) {
5347 0 : return 0;
5348 : }
5349 :
5350 11 : spdk_spin_lock(&bdev->internal.spinlock);
5351 :
5352 : /* bdev has open descriptors */
5353 11 : if (!TAILQ_EMPTY(&bdev->internal.open_descs) &&
5354 7 : bdev->blockcnt > size) {
5355 1 : ret = -EBUSY;
5356 1 : } else {
5357 10 : bdev->blockcnt = size;
5358 16 : TAILQ_FOREACH(desc, &bdev->internal.open_descs, link) {
5359 6 : event_notify(desc, _resize_notify);
5360 6 : }
5361 10 : ret = 0;
5362 : }
5363 :
5364 11 : spdk_spin_unlock(&bdev->internal.spinlock);
5365 :
5366 11 : return ret;
5367 11 : }
5368 :
5369 : /*
5370 : * Convert I/O offset and length from bytes to blocks.
5371 : *
5372 : * Returns zero on success or non-zero if the byte parameters aren't divisible by the block size.
5373 : */
5374 : static uint64_t
5375 20 : bdev_bytes_to_blocks(struct spdk_bdev_desc *desc, uint64_t offset_bytes,
5376 : uint64_t *offset_blocks, uint64_t num_bytes, uint64_t *num_blocks)
5377 : {
5378 20 : uint32_t block_size = bdev_desc_get_block_size(desc);
5379 : uint8_t shift_cnt;
5380 :
5381 : /* Avoid expensive div operations if possible. These spdk_u32 functions are very cheap. */
5382 20 : if (spdk_likely(spdk_u32_is_pow2(block_size))) {
5383 17 : shift_cnt = spdk_u32log2(block_size);
5384 17 : *offset_blocks = offset_bytes >> shift_cnt;
5385 17 : *num_blocks = num_bytes >> shift_cnt;
5386 34 : return (offset_bytes - (*offset_blocks << shift_cnt)) |
5387 17 : (num_bytes - (*num_blocks << shift_cnt));
5388 : } else {
5389 3 : *offset_blocks = offset_bytes / block_size;
5390 3 : *num_blocks = num_bytes / block_size;
5391 3 : return (offset_bytes % block_size) | (num_bytes % block_size);
5392 : }
5393 20 : }
5394 :
5395 : static bool
5396 689 : bdev_io_valid_blocks(struct spdk_bdev *bdev, uint64_t offset_blocks, uint64_t num_blocks)
5397 : {
5398 : /* Return failure if offset_blocks + num_blocks is less than offset_blocks; indicates there
5399 : * has been an overflow and hence the offset has been wrapped around */
5400 689 : if (offset_blocks + num_blocks < offset_blocks) {
5401 1 : return false;
5402 : }
5403 :
5404 : /* Return failure if offset_blocks + num_blocks exceeds the size of the bdev */
5405 688 : if (offset_blocks + num_blocks > bdev->blockcnt) {
5406 2 : return false;
5407 : }
5408 :
5409 686 : return true;
5410 689 : }
5411 :
5412 : static void
5413 2 : bdev_seek_complete_cb(void *ctx)
5414 : {
5415 2 : struct spdk_bdev_io *bdev_io = ctx;
5416 :
5417 2 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS;
5418 2 : bdev_io->internal.cb(bdev_io, true, bdev_io->internal.caller_ctx);
5419 2 : }
5420 :
5421 : static int
5422 4 : bdev_seek(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5423 : uint64_t offset_blocks, enum spdk_bdev_io_type io_type,
5424 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5425 : {
5426 4 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
5427 : struct spdk_bdev_io *bdev_io;
5428 4 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
5429 :
5430 4 : assert(io_type == SPDK_BDEV_IO_TYPE_SEEK_DATA || io_type == SPDK_BDEV_IO_TYPE_SEEK_HOLE);
5431 :
5432 : /* Check if offset_blocks is valid looking at the validity of one block */
5433 4 : if (!bdev_io_valid_blocks(bdev, offset_blocks, 1)) {
5434 0 : return -EINVAL;
5435 : }
5436 :
5437 4 : bdev_io = bdev_channel_get_io(channel);
5438 4 : if (!bdev_io) {
5439 0 : return -ENOMEM;
5440 : }
5441 :
5442 4 : bdev_io->internal.ch = channel;
5443 4 : bdev_io->internal.desc = desc;
5444 4 : bdev_io->type = io_type;
5445 4 : bdev_io->u.bdev.offset_blocks = offset_blocks;
5446 4 : bdev_io->u.bdev.memory_domain = NULL;
5447 4 : bdev_io->u.bdev.memory_domain_ctx = NULL;
5448 4 : bdev_io->u.bdev.accel_sequence = NULL;
5449 4 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
5450 :
5451 4 : if (!spdk_bdev_io_type_supported(bdev, io_type)) {
5452 : /* In case bdev doesn't support seek to next data/hole offset,
5453 : * it is assumed that only data and no holes are present */
5454 2 : if (io_type == SPDK_BDEV_IO_TYPE_SEEK_DATA) {
5455 1 : bdev_io->u.bdev.seek.offset = offset_blocks;
5456 1 : } else {
5457 1 : bdev_io->u.bdev.seek.offset = UINT64_MAX;
5458 : }
5459 :
5460 2 : spdk_thread_send_msg(spdk_get_thread(), bdev_seek_complete_cb, bdev_io);
5461 2 : return 0;
5462 : }
5463 :
5464 2 : bdev_io_submit(bdev_io);
5465 2 : return 0;
5466 4 : }
5467 :
5468 : int
5469 2 : spdk_bdev_seek_data(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5470 : uint64_t offset_blocks,
5471 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5472 : {
5473 2 : return bdev_seek(desc, ch, offset_blocks, SPDK_BDEV_IO_TYPE_SEEK_DATA, cb, cb_arg);
5474 : }
5475 :
5476 : int
5477 2 : spdk_bdev_seek_hole(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5478 : uint64_t offset_blocks,
5479 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5480 : {
5481 2 : return bdev_seek(desc, ch, offset_blocks, SPDK_BDEV_IO_TYPE_SEEK_HOLE, cb, cb_arg);
5482 : }
5483 :
5484 : uint64_t
5485 4 : spdk_bdev_io_get_seek_offset(const struct spdk_bdev_io *bdev_io)
5486 : {
5487 4 : return bdev_io->u.bdev.seek.offset;
5488 : }
5489 :
5490 : static int
5491 204 : bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch, void *buf,
5492 : void *md_buf, uint64_t offset_blocks, uint64_t num_blocks,
5493 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5494 : {
5495 204 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
5496 : struct spdk_bdev_io *bdev_io;
5497 204 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
5498 :
5499 204 : if (!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
5500 0 : return -EINVAL;
5501 : }
5502 :
5503 204 : bdev_io = bdev_channel_get_io(channel);
5504 204 : if (!bdev_io) {
5505 1 : return -ENOMEM;
5506 : }
5507 :
5508 203 : bdev_io->internal.ch = channel;
5509 203 : bdev_io->internal.desc = desc;
5510 203 : bdev_io->type = SPDK_BDEV_IO_TYPE_READ;
5511 203 : bdev_io->u.bdev.iovs = &bdev_io->iov;
5512 203 : bdev_io->u.bdev.iovs[0].iov_base = buf;
5513 203 : bdev_io->u.bdev.iovs[0].iov_len = num_blocks * bdev_desc_get_block_size(desc);
5514 203 : bdev_io->u.bdev.iovcnt = 1;
5515 203 : bdev_io->u.bdev.md_buf = md_buf;
5516 203 : bdev_io->u.bdev.num_blocks = num_blocks;
5517 203 : bdev_io->u.bdev.offset_blocks = offset_blocks;
5518 203 : bdev_io->u.bdev.memory_domain = NULL;
5519 203 : bdev_io->u.bdev.memory_domain_ctx = NULL;
5520 203 : bdev_io->u.bdev.accel_sequence = NULL;
5521 203 : bdev_io->u.bdev.dif_check_flags = bdev->dif_check_flags;
5522 203 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
5523 :
5524 203 : bdev_io_submit(bdev_io);
5525 203 : return 0;
5526 204 : }
5527 :
5528 : int
5529 3 : spdk_bdev_read(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5530 : void *buf, uint64_t offset, uint64_t nbytes,
5531 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5532 : {
5533 : uint64_t offset_blocks, num_blocks;
5534 :
5535 3 : if (bdev_bytes_to_blocks(desc, offset, &offset_blocks, nbytes, &num_blocks) != 0) {
5536 0 : return -EINVAL;
5537 : }
5538 :
5539 3 : return spdk_bdev_read_blocks(desc, ch, buf, offset_blocks, num_blocks, cb, cb_arg);
5540 3 : }
5541 :
5542 : int
5543 200 : spdk_bdev_read_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5544 : void *buf, uint64_t offset_blocks, uint64_t num_blocks,
5545 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5546 : {
5547 200 : return bdev_read_blocks_with_md(desc, ch, buf, NULL, offset_blocks, num_blocks, cb, cb_arg);
5548 : }
5549 :
5550 : int
5551 4 : spdk_bdev_read_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5552 : void *buf, void *md_buf, uint64_t offset_blocks, uint64_t num_blocks,
5553 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5554 : {
5555 8 : struct iovec iov = {
5556 4 : .iov_base = buf,
5557 : };
5558 :
5559 4 : if (md_buf && !spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) {
5560 0 : return -EINVAL;
5561 : }
5562 :
5563 4 : if (md_buf && !_is_buf_allocated(&iov)) {
5564 0 : return -EINVAL;
5565 : }
5566 :
5567 8 : return bdev_read_blocks_with_md(desc, ch, buf, md_buf, offset_blocks, num_blocks,
5568 4 : cb, cb_arg);
5569 4 : }
5570 :
5571 : int
5572 5 : spdk_bdev_readv(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5573 : struct iovec *iov, int iovcnt,
5574 : uint64_t offset, uint64_t nbytes,
5575 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5576 : {
5577 : uint64_t offset_blocks, num_blocks;
5578 :
5579 5 : if (bdev_bytes_to_blocks(desc, offset, &offset_blocks, nbytes, &num_blocks) != 0) {
5580 0 : return -EINVAL;
5581 : }
5582 :
5583 5 : return spdk_bdev_readv_blocks(desc, ch, iov, iovcnt, offset_blocks, num_blocks, cb, cb_arg);
5584 5 : }
5585 :
5586 : static int
5587 226 : bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5588 : struct iovec *iov, int iovcnt, void *md_buf, uint64_t offset_blocks,
5589 : uint64_t num_blocks, struct spdk_memory_domain *domain, void *domain_ctx,
5590 : struct spdk_accel_sequence *seq, uint32_t dif_check_flags,
5591 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5592 : {
5593 226 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
5594 : struct spdk_bdev_io *bdev_io;
5595 226 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
5596 :
5597 226 : if (spdk_unlikely(!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks))) {
5598 0 : return -EINVAL;
5599 : }
5600 :
5601 226 : bdev_io = bdev_channel_get_io(channel);
5602 226 : if (spdk_unlikely(!bdev_io)) {
5603 2 : return -ENOMEM;
5604 : }
5605 :
5606 224 : bdev_io->internal.ch = channel;
5607 224 : bdev_io->internal.desc = desc;
5608 224 : bdev_io->type = SPDK_BDEV_IO_TYPE_READ;
5609 224 : bdev_io->u.bdev.iovs = iov;
5610 224 : bdev_io->u.bdev.iovcnt = iovcnt;
5611 224 : bdev_io->u.bdev.md_buf = md_buf;
5612 224 : bdev_io->u.bdev.num_blocks = num_blocks;
5613 224 : bdev_io->u.bdev.offset_blocks = offset_blocks;
5614 224 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
5615 :
5616 224 : if (seq != NULL) {
5617 0 : bdev_io->internal.f.has_accel_sequence = true;
5618 0 : bdev_io->internal.accel_sequence = seq;
5619 0 : }
5620 :
5621 224 : if (domain != NULL) {
5622 2 : bdev_io->internal.f.has_memory_domain = true;
5623 2 : bdev_io->internal.memory_domain = domain;
5624 2 : bdev_io->internal.memory_domain_ctx = domain_ctx;
5625 2 : }
5626 :
5627 224 : bdev_io->u.bdev.memory_domain = domain;
5628 224 : bdev_io->u.bdev.memory_domain_ctx = domain_ctx;
5629 224 : bdev_io->u.bdev.accel_sequence = seq;
5630 224 : bdev_io->u.bdev.dif_check_flags = dif_check_flags;
5631 :
5632 224 : _bdev_io_submit_ext(desc, bdev_io);
5633 :
5634 224 : return 0;
5635 226 : }
5636 :
5637 : int
5638 21 : spdk_bdev_readv_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5639 : struct iovec *iov, int iovcnt,
5640 : uint64_t offset_blocks, uint64_t num_blocks,
5641 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5642 : {
5643 21 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
5644 :
5645 42 : return bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, NULL, offset_blocks,
5646 21 : num_blocks, NULL, NULL, NULL, bdev->dif_check_flags, cb, cb_arg);
5647 : }
5648 :
5649 : int
5650 4 : spdk_bdev_readv_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5651 : struct iovec *iov, int iovcnt, void *md_buf,
5652 : uint64_t offset_blocks, uint64_t num_blocks,
5653 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5654 : {
5655 4 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
5656 :
5657 4 : if (md_buf && !spdk_bdev_is_md_separate(bdev)) {
5658 0 : return -EINVAL;
5659 : }
5660 :
5661 4 : if (md_buf && !_is_buf_allocated(iov)) {
5662 0 : return -EINVAL;
5663 : }
5664 :
5665 8 : return bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, md_buf, offset_blocks,
5666 4 : num_blocks, NULL, NULL, NULL, bdev->dif_check_flags, cb, cb_arg);
5667 4 : }
5668 :
5669 : static inline bool
5670 14 : _bdev_io_check_opts(struct spdk_bdev_ext_io_opts *opts, struct iovec *iov)
5671 : {
5672 : /*
5673 : * We check if opts size is at least of size when we first introduced
5674 : * spdk_bdev_ext_io_opts (ac6f2bdd8d) since access to those members
5675 : * are not checked internal.
5676 : */
5677 24 : return opts->size >= offsetof(struct spdk_bdev_ext_io_opts, metadata) +
5678 14 : sizeof(opts->metadata) &&
5679 10 : opts->size <= sizeof(*opts) &&
5680 : /* When memory domain is used, the user must provide data buffers */
5681 8 : (!opts->memory_domain || (iov && iov[0].iov_base));
5682 : }
5683 :
5684 : int
5685 8 : spdk_bdev_readv_blocks_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5686 : struct iovec *iov, int iovcnt,
5687 : uint64_t offset_blocks, uint64_t num_blocks,
5688 : spdk_bdev_io_completion_cb cb, void *cb_arg,
5689 : struct spdk_bdev_ext_io_opts *opts)
5690 : {
5691 8 : struct spdk_memory_domain *domain = NULL;
5692 8 : struct spdk_accel_sequence *seq = NULL;
5693 8 : void *domain_ctx = NULL, *md = NULL;
5694 8 : uint32_t dif_check_flags = 0;
5695 8 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
5696 :
5697 8 : if (opts) {
5698 7 : if (spdk_unlikely(!_bdev_io_check_opts(opts, iov))) {
5699 3 : return -EINVAL;
5700 : }
5701 :
5702 4 : md = opts->metadata;
5703 4 : domain = bdev_get_ext_io_opt(opts, memory_domain, NULL);
5704 4 : domain_ctx = bdev_get_ext_io_opt(opts, memory_domain_ctx, NULL);
5705 4 : seq = bdev_get_ext_io_opt(opts, accel_sequence, NULL);
5706 4 : if (md) {
5707 4 : if (spdk_unlikely(!spdk_bdev_is_md_separate(bdev))) {
5708 0 : return -EINVAL;
5709 : }
5710 :
5711 4 : if (spdk_unlikely(!_is_buf_allocated(iov))) {
5712 0 : return -EINVAL;
5713 : }
5714 :
5715 4 : if (spdk_unlikely(seq != NULL)) {
5716 0 : return -EINVAL;
5717 : }
5718 4 : }
5719 4 : }
5720 :
5721 10 : dif_check_flags = bdev->dif_check_flags &
5722 5 : ~(bdev_get_ext_io_opt(opts, dif_check_flags_exclude_mask, 0));
5723 :
5724 10 : return bdev_readv_blocks_with_md(desc, ch, iov, iovcnt, md, offset_blocks,
5725 5 : num_blocks, domain, domain_ctx, seq, dif_check_flags, cb, cb_arg);
5726 8 : }
5727 :
5728 : static int
5729 36 : bdev_write_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5730 : void *buf, void *md_buf, uint64_t offset_blocks, uint64_t num_blocks,
5731 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5732 : {
5733 36 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
5734 : struct spdk_bdev_io *bdev_io;
5735 36 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
5736 :
5737 36 : if (!desc->write) {
5738 0 : return -EBADF;
5739 : }
5740 :
5741 36 : if (!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
5742 0 : return -EINVAL;
5743 : }
5744 :
5745 36 : bdev_io = bdev_channel_get_io(channel);
5746 36 : if (!bdev_io) {
5747 0 : return -ENOMEM;
5748 : }
5749 :
5750 36 : bdev_io->internal.ch = channel;
5751 36 : bdev_io->internal.desc = desc;
5752 36 : bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE;
5753 36 : bdev_io->u.bdev.iovs = &bdev_io->iov;
5754 36 : bdev_io->u.bdev.iovs[0].iov_base = buf;
5755 36 : bdev_io->u.bdev.iovs[0].iov_len = num_blocks * bdev_desc_get_block_size(desc);
5756 36 : bdev_io->u.bdev.iovcnt = 1;
5757 36 : bdev_io->u.bdev.md_buf = md_buf;
5758 36 : bdev_io->u.bdev.num_blocks = num_blocks;
5759 36 : bdev_io->u.bdev.offset_blocks = offset_blocks;
5760 36 : bdev_io->u.bdev.memory_domain = NULL;
5761 36 : bdev_io->u.bdev.memory_domain_ctx = NULL;
5762 36 : bdev_io->u.bdev.accel_sequence = NULL;
5763 36 : bdev_io->u.bdev.dif_check_flags = bdev->dif_check_flags;
5764 36 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
5765 :
5766 36 : bdev_io_submit(bdev_io);
5767 36 : return 0;
5768 36 : }
5769 :
5770 : int
5771 3 : spdk_bdev_write(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5772 : void *buf, uint64_t offset, uint64_t nbytes,
5773 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5774 : {
5775 : uint64_t offset_blocks, num_blocks;
5776 :
5777 3 : if (bdev_bytes_to_blocks(desc, offset, &offset_blocks, nbytes, &num_blocks) != 0) {
5778 0 : return -EINVAL;
5779 : }
5780 :
5781 3 : return spdk_bdev_write_blocks(desc, ch, buf, offset_blocks, num_blocks, cb, cb_arg);
5782 3 : }
5783 :
5784 : int
5785 27 : spdk_bdev_write_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5786 : void *buf, uint64_t offset_blocks, uint64_t num_blocks,
5787 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5788 : {
5789 54 : return bdev_write_blocks_with_md(desc, ch, buf, NULL, offset_blocks, num_blocks,
5790 27 : cb, cb_arg);
5791 : }
5792 :
5793 : int
5794 3 : spdk_bdev_write_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5795 : void *buf, void *md_buf, uint64_t offset_blocks, uint64_t num_blocks,
5796 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5797 : {
5798 6 : struct iovec iov = {
5799 3 : .iov_base = buf,
5800 : };
5801 :
5802 3 : if (md_buf && !spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) {
5803 0 : return -EINVAL;
5804 : }
5805 :
5806 3 : if (md_buf && !_is_buf_allocated(&iov)) {
5807 0 : return -EINVAL;
5808 : }
5809 :
5810 6 : return bdev_write_blocks_with_md(desc, ch, buf, md_buf, offset_blocks, num_blocks,
5811 3 : cb, cb_arg);
5812 3 : }
5813 :
5814 : static int
5815 70 : bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5816 : struct iovec *iov, int iovcnt, void *md_buf,
5817 : uint64_t offset_blocks, uint64_t num_blocks,
5818 : struct spdk_memory_domain *domain, void *domain_ctx,
5819 : struct spdk_accel_sequence *seq, uint32_t dif_check_flags,
5820 : uint32_t nvme_cdw12_raw, uint32_t nvme_cdw13_raw,
5821 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5822 : {
5823 70 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
5824 : struct spdk_bdev_io *bdev_io;
5825 70 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
5826 :
5827 70 : if (spdk_unlikely(!desc->write)) {
5828 0 : return -EBADF;
5829 : }
5830 :
5831 70 : if (spdk_unlikely(!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks))) {
5832 0 : return -EINVAL;
5833 : }
5834 :
5835 70 : bdev_io = bdev_channel_get_io(channel);
5836 70 : if (spdk_unlikely(!bdev_io)) {
5837 2 : return -ENOMEM;
5838 : }
5839 :
5840 68 : bdev_io->internal.ch = channel;
5841 68 : bdev_io->internal.desc = desc;
5842 68 : bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE;
5843 68 : bdev_io->u.bdev.iovs = iov;
5844 68 : bdev_io->u.bdev.iovcnt = iovcnt;
5845 68 : bdev_io->u.bdev.md_buf = md_buf;
5846 68 : bdev_io->u.bdev.num_blocks = num_blocks;
5847 68 : bdev_io->u.bdev.offset_blocks = offset_blocks;
5848 68 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
5849 68 : if (seq != NULL) {
5850 0 : bdev_io->internal.f.has_accel_sequence = true;
5851 0 : bdev_io->internal.accel_sequence = seq;
5852 0 : }
5853 :
5854 68 : if (domain != NULL) {
5855 2 : bdev_io->internal.f.has_memory_domain = true;
5856 2 : bdev_io->internal.memory_domain = domain;
5857 2 : bdev_io->internal.memory_domain_ctx = domain_ctx;
5858 2 : }
5859 :
5860 68 : bdev_io->u.bdev.memory_domain = domain;
5861 68 : bdev_io->u.bdev.memory_domain_ctx = domain_ctx;
5862 68 : bdev_io->u.bdev.accel_sequence = seq;
5863 68 : bdev_io->u.bdev.dif_check_flags = dif_check_flags;
5864 68 : bdev_io->u.bdev.nvme_cdw12.raw = nvme_cdw12_raw;
5865 68 : bdev_io->u.bdev.nvme_cdw13.raw = nvme_cdw13_raw;
5866 :
5867 68 : _bdev_io_submit_ext(desc, bdev_io);
5868 :
5869 68 : return 0;
5870 70 : }
5871 :
5872 : int
5873 3 : spdk_bdev_writev(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5874 : struct iovec *iov, int iovcnt,
5875 : uint64_t offset, uint64_t len,
5876 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5877 : {
5878 : uint64_t offset_blocks, num_blocks;
5879 :
5880 3 : if (bdev_bytes_to_blocks(desc, offset, &offset_blocks, len, &num_blocks) != 0) {
5881 0 : return -EINVAL;
5882 : }
5883 :
5884 3 : return spdk_bdev_writev_blocks(desc, ch, iov, iovcnt, offset_blocks, num_blocks, cb, cb_arg);
5885 3 : }
5886 :
5887 : int
5888 14 : spdk_bdev_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5889 : struct iovec *iov, int iovcnt,
5890 : uint64_t offset_blocks, uint64_t num_blocks,
5891 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5892 : {
5893 14 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
5894 :
5895 28 : return bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, NULL, offset_blocks,
5896 14 : num_blocks, NULL, NULL, NULL, bdev->dif_check_flags, 0, 0,
5897 14 : cb, cb_arg);
5898 : }
5899 :
5900 : int
5901 1 : spdk_bdev_writev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5902 : struct iovec *iov, int iovcnt, void *md_buf,
5903 : uint64_t offset_blocks, uint64_t num_blocks,
5904 : spdk_bdev_io_completion_cb cb, void *cb_arg)
5905 : {
5906 1 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
5907 :
5908 1 : if (md_buf && !spdk_bdev_is_md_separate(bdev)) {
5909 0 : return -EINVAL;
5910 : }
5911 :
5912 1 : if (md_buf && !_is_buf_allocated(iov)) {
5913 0 : return -EINVAL;
5914 : }
5915 :
5916 2 : return bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, md_buf, offset_blocks,
5917 1 : num_blocks, NULL, NULL, NULL, bdev->dif_check_flags, 0, 0,
5918 1 : cb, cb_arg);
5919 1 : }
5920 :
5921 : int
5922 8 : spdk_bdev_writev_blocks_ext(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
5923 : struct iovec *iov, int iovcnt,
5924 : uint64_t offset_blocks, uint64_t num_blocks,
5925 : spdk_bdev_io_completion_cb cb, void *cb_arg,
5926 : struct spdk_bdev_ext_io_opts *opts)
5927 : {
5928 8 : struct spdk_memory_domain *domain = NULL;
5929 8 : struct spdk_accel_sequence *seq = NULL;
5930 8 : void *domain_ctx = NULL, *md = NULL;
5931 8 : uint32_t dif_check_flags = 0;
5932 8 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
5933 8 : uint32_t nvme_cdw12_raw = 0;
5934 8 : uint32_t nvme_cdw13_raw = 0;
5935 :
5936 8 : if (opts) {
5937 7 : if (spdk_unlikely(!_bdev_io_check_opts(opts, iov))) {
5938 3 : return -EINVAL;
5939 : }
5940 4 : md = opts->metadata;
5941 4 : domain = bdev_get_ext_io_opt(opts, memory_domain, NULL);
5942 4 : domain_ctx = bdev_get_ext_io_opt(opts, memory_domain_ctx, NULL);
5943 4 : seq = bdev_get_ext_io_opt(opts, accel_sequence, NULL);
5944 4 : nvme_cdw12_raw = bdev_get_ext_io_opt(opts, nvme_cdw12.raw, 0);
5945 4 : nvme_cdw13_raw = bdev_get_ext_io_opt(opts, nvme_cdw13.raw, 0);
5946 4 : if (md) {
5947 4 : if (spdk_unlikely(!spdk_bdev_is_md_separate(bdev))) {
5948 0 : return -EINVAL;
5949 : }
5950 :
5951 4 : if (spdk_unlikely(!_is_buf_allocated(iov))) {
5952 0 : return -EINVAL;
5953 : }
5954 :
5955 4 : if (spdk_unlikely(seq != NULL)) {
5956 0 : return -EINVAL;
5957 : }
5958 4 : }
5959 4 : }
5960 :
5961 10 : dif_check_flags = bdev->dif_check_flags &
5962 5 : ~(bdev_get_ext_io_opt(opts, dif_check_flags_exclude_mask, 0));
5963 :
5964 10 : return bdev_writev_blocks_with_md(desc, ch, iov, iovcnt, md, offset_blocks, num_blocks,
5965 5 : domain, domain_ctx, seq, dif_check_flags,
5966 5 : nvme_cdw12_raw, nvme_cdw13_raw, cb, cb_arg);
5967 8 : }
5968 :
5969 : static void
5970 11 : bdev_compare_do_read_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
5971 : {
5972 11 : struct spdk_bdev_io *parent_io = cb_arg;
5973 11 : struct spdk_bdev *bdev = parent_io->bdev;
5974 11 : uint8_t *read_buf = bdev_io->u.bdev.iovs[0].iov_base;
5975 11 : int i, rc = 0;
5976 :
5977 11 : if (!success) {
5978 0 : parent_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
5979 0 : parent_io->internal.cb(parent_io, false, parent_io->internal.caller_ctx);
5980 0 : spdk_bdev_free_io(bdev_io);
5981 0 : return;
5982 : }
5983 :
5984 17 : for (i = 0; i < parent_io->u.bdev.iovcnt; i++) {
5985 22 : rc = memcmp(read_buf,
5986 11 : parent_io->u.bdev.iovs[i].iov_base,
5987 11 : parent_io->u.bdev.iovs[i].iov_len);
5988 11 : if (rc) {
5989 5 : break;
5990 : }
5991 6 : read_buf += parent_io->u.bdev.iovs[i].iov_len;
5992 6 : }
5993 :
5994 11 : if (rc == 0 && parent_io->u.bdev.md_buf && spdk_bdev_is_md_separate(bdev)) {
5995 4 : rc = memcmp(bdev_io->u.bdev.md_buf,
5996 2 : parent_io->u.bdev.md_buf,
5997 2 : spdk_bdev_get_md_size(bdev));
5998 2 : }
5999 :
6000 11 : spdk_bdev_free_io(bdev_io);
6001 :
6002 11 : if (rc == 0) {
6003 5 : parent_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS;
6004 5 : parent_io->internal.cb(parent_io, true, parent_io->internal.caller_ctx);
6005 5 : } else {
6006 6 : parent_io->internal.status = SPDK_BDEV_IO_STATUS_MISCOMPARE;
6007 6 : parent_io->internal.cb(parent_io, false, parent_io->internal.caller_ctx);
6008 : }
6009 11 : }
6010 :
6011 : static void
6012 11 : bdev_compare_do_read(void *_bdev_io)
6013 : {
6014 11 : struct spdk_bdev_io *bdev_io = _bdev_io;
6015 : int rc;
6016 :
6017 22 : rc = spdk_bdev_read_blocks(bdev_io->internal.desc,
6018 11 : spdk_io_channel_from_ctx(bdev_io->internal.ch), NULL,
6019 11 : bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks,
6020 11 : bdev_compare_do_read_done, bdev_io);
6021 :
6022 11 : if (rc == -ENOMEM) {
6023 0 : bdev_queue_io_wait_with_cb(bdev_io, bdev_compare_do_read);
6024 11 : } else if (rc != 0) {
6025 0 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
6026 0 : bdev_io->internal.cb(bdev_io, false, bdev_io->internal.caller_ctx);
6027 0 : }
6028 11 : }
6029 :
6030 : static int
6031 16 : bdev_comparev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6032 : struct iovec *iov, int iovcnt, void *md_buf,
6033 : uint64_t offset_blocks, uint64_t num_blocks,
6034 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6035 : {
6036 16 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
6037 : struct spdk_bdev_io *bdev_io;
6038 16 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
6039 :
6040 16 : if (!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
6041 0 : return -EINVAL;
6042 : }
6043 :
6044 16 : bdev_io = bdev_channel_get_io(channel);
6045 16 : if (!bdev_io) {
6046 0 : return -ENOMEM;
6047 : }
6048 :
6049 16 : bdev_io->internal.ch = channel;
6050 16 : bdev_io->internal.desc = desc;
6051 16 : bdev_io->type = SPDK_BDEV_IO_TYPE_COMPARE;
6052 16 : bdev_io->u.bdev.iovs = iov;
6053 16 : bdev_io->u.bdev.iovcnt = iovcnt;
6054 16 : bdev_io->u.bdev.md_buf = md_buf;
6055 16 : bdev_io->u.bdev.num_blocks = num_blocks;
6056 16 : bdev_io->u.bdev.offset_blocks = offset_blocks;
6057 16 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
6058 16 : bdev_io->u.bdev.memory_domain = NULL;
6059 16 : bdev_io->u.bdev.memory_domain_ctx = NULL;
6060 16 : bdev_io->u.bdev.accel_sequence = NULL;
6061 :
6062 16 : if (bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_COMPARE)) {
6063 7 : bdev_io_submit(bdev_io);
6064 7 : return 0;
6065 : }
6066 :
6067 9 : bdev_compare_do_read(bdev_io);
6068 :
6069 9 : return 0;
6070 16 : }
6071 :
6072 : int
6073 10 : spdk_bdev_comparev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6074 : struct iovec *iov, int iovcnt,
6075 : uint64_t offset_blocks, uint64_t num_blocks,
6076 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6077 : {
6078 20 : return bdev_comparev_blocks_with_md(desc, ch, iov, iovcnt, NULL, offset_blocks,
6079 10 : num_blocks, cb, cb_arg);
6080 : }
6081 :
6082 : int
6083 6 : spdk_bdev_comparev_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6084 : struct iovec *iov, int iovcnt, void *md_buf,
6085 : uint64_t offset_blocks, uint64_t num_blocks,
6086 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6087 : {
6088 6 : if (md_buf && !spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) {
6089 0 : return -EINVAL;
6090 : }
6091 :
6092 6 : if (md_buf && !_is_buf_allocated(iov)) {
6093 0 : return -EINVAL;
6094 : }
6095 :
6096 12 : return bdev_comparev_blocks_with_md(desc, ch, iov, iovcnt, md_buf, offset_blocks,
6097 6 : num_blocks, cb, cb_arg);
6098 6 : }
6099 :
6100 : static int
6101 4 : bdev_compare_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6102 : void *buf, void *md_buf, uint64_t offset_blocks, uint64_t num_blocks,
6103 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6104 : {
6105 4 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
6106 : struct spdk_bdev_io *bdev_io;
6107 4 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
6108 :
6109 4 : if (!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
6110 0 : return -EINVAL;
6111 : }
6112 :
6113 4 : bdev_io = bdev_channel_get_io(channel);
6114 4 : if (!bdev_io) {
6115 0 : return -ENOMEM;
6116 : }
6117 :
6118 4 : bdev_io->internal.ch = channel;
6119 4 : bdev_io->internal.desc = desc;
6120 4 : bdev_io->type = SPDK_BDEV_IO_TYPE_COMPARE;
6121 4 : bdev_io->u.bdev.iovs = &bdev_io->iov;
6122 4 : bdev_io->u.bdev.iovs[0].iov_base = buf;
6123 4 : bdev_io->u.bdev.iovs[0].iov_len = num_blocks * bdev_desc_get_block_size(desc);
6124 4 : bdev_io->u.bdev.iovcnt = 1;
6125 4 : bdev_io->u.bdev.md_buf = md_buf;
6126 4 : bdev_io->u.bdev.num_blocks = num_blocks;
6127 4 : bdev_io->u.bdev.offset_blocks = offset_blocks;
6128 4 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
6129 4 : bdev_io->u.bdev.memory_domain = NULL;
6130 4 : bdev_io->u.bdev.memory_domain_ctx = NULL;
6131 4 : bdev_io->u.bdev.accel_sequence = NULL;
6132 :
6133 4 : if (bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_COMPARE)) {
6134 2 : bdev_io_submit(bdev_io);
6135 2 : return 0;
6136 : }
6137 :
6138 2 : bdev_compare_do_read(bdev_io);
6139 :
6140 2 : return 0;
6141 4 : }
6142 :
6143 : int
6144 4 : spdk_bdev_compare_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6145 : void *buf, uint64_t offset_blocks, uint64_t num_blocks,
6146 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6147 : {
6148 8 : return bdev_compare_blocks_with_md(desc, ch, buf, NULL, offset_blocks, num_blocks,
6149 4 : cb, cb_arg);
6150 : }
6151 :
6152 : int
6153 0 : spdk_bdev_compare_blocks_with_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6154 : void *buf, void *md_buf, uint64_t offset_blocks, uint64_t num_blocks,
6155 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6156 : {
6157 0 : struct iovec iov = {
6158 0 : .iov_base = buf,
6159 : };
6160 :
6161 0 : if (md_buf && !spdk_bdev_is_md_separate(spdk_bdev_desc_get_bdev(desc))) {
6162 0 : return -EINVAL;
6163 : }
6164 :
6165 0 : if (md_buf && !_is_buf_allocated(&iov)) {
6166 0 : return -EINVAL;
6167 : }
6168 :
6169 0 : return bdev_compare_blocks_with_md(desc, ch, buf, md_buf, offset_blocks, num_blocks,
6170 0 : cb, cb_arg);
6171 0 : }
6172 :
6173 : static void
6174 2 : bdev_comparev_and_writev_blocks_unlocked(struct lba_range *range, void *ctx, int unlock_status)
6175 : {
6176 2 : struct spdk_bdev_io *bdev_io = ctx;
6177 :
6178 2 : if (unlock_status) {
6179 0 : SPDK_ERRLOG("LBA range unlock failed\n");
6180 0 : }
6181 :
6182 4 : bdev_io->internal.cb(bdev_io, bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS ? true :
6183 2 : false, bdev_io->internal.caller_ctx);
6184 2 : }
6185 :
6186 : static void
6187 2 : bdev_comparev_and_writev_blocks_unlock(struct spdk_bdev_io *bdev_io, int status)
6188 : {
6189 2 : bdev_io->internal.status = status;
6190 :
6191 4 : bdev_unlock_lba_range(bdev_io->internal.desc, spdk_io_channel_from_ctx(bdev_io->internal.ch),
6192 2 : bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks,
6193 2 : bdev_comparev_and_writev_blocks_unlocked, bdev_io);
6194 2 : }
6195 :
6196 : static void
6197 1 : bdev_compare_and_write_do_write_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
6198 : {
6199 1 : struct spdk_bdev_io *parent_io = cb_arg;
6200 :
6201 1 : if (!success) {
6202 0 : SPDK_ERRLOG("Compare and write operation failed\n");
6203 0 : }
6204 :
6205 1 : spdk_bdev_free_io(bdev_io);
6206 :
6207 2 : bdev_comparev_and_writev_blocks_unlock(parent_io,
6208 1 : success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED);
6209 1 : }
6210 :
6211 : static void
6212 1 : bdev_compare_and_write_do_write(void *_bdev_io)
6213 : {
6214 1 : struct spdk_bdev_io *bdev_io = _bdev_io;
6215 : int rc;
6216 :
6217 2 : rc = spdk_bdev_writev_blocks(bdev_io->internal.desc,
6218 1 : spdk_io_channel_from_ctx(bdev_io->internal.ch),
6219 1 : bdev_io->u.bdev.fused_iovs, bdev_io->u.bdev.fused_iovcnt,
6220 1 : bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks,
6221 1 : bdev_compare_and_write_do_write_done, bdev_io);
6222 :
6223 :
6224 1 : if (rc == -ENOMEM) {
6225 0 : bdev_queue_io_wait_with_cb(bdev_io, bdev_compare_and_write_do_write);
6226 1 : } else if (rc != 0) {
6227 0 : bdev_comparev_and_writev_blocks_unlock(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
6228 0 : }
6229 1 : }
6230 :
6231 : static void
6232 2 : bdev_compare_and_write_do_compare_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
6233 : {
6234 2 : struct spdk_bdev_io *parent_io = cb_arg;
6235 :
6236 2 : spdk_bdev_free_io(bdev_io);
6237 :
6238 2 : if (!success) {
6239 1 : bdev_comparev_and_writev_blocks_unlock(parent_io, SPDK_BDEV_IO_STATUS_MISCOMPARE);
6240 1 : return;
6241 : }
6242 :
6243 1 : bdev_compare_and_write_do_write(parent_io);
6244 2 : }
6245 :
6246 : static void
6247 2 : bdev_compare_and_write_do_compare(void *_bdev_io)
6248 : {
6249 2 : struct spdk_bdev_io *bdev_io = _bdev_io;
6250 : int rc;
6251 :
6252 4 : rc = spdk_bdev_comparev_blocks(bdev_io->internal.desc,
6253 2 : spdk_io_channel_from_ctx(bdev_io->internal.ch), bdev_io->u.bdev.iovs,
6254 2 : bdev_io->u.bdev.iovcnt, bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks,
6255 2 : bdev_compare_and_write_do_compare_done, bdev_io);
6256 :
6257 2 : if (rc == -ENOMEM) {
6258 0 : bdev_queue_io_wait_with_cb(bdev_io, bdev_compare_and_write_do_compare);
6259 2 : } else if (rc != 0) {
6260 0 : bdev_comparev_and_writev_blocks_unlock(bdev_io, SPDK_BDEV_IO_STATUS_FIRST_FUSED_FAILED);
6261 0 : }
6262 2 : }
6263 :
6264 : static void
6265 2 : bdev_comparev_and_writev_blocks_locked(struct lba_range *range, void *ctx, int status)
6266 : {
6267 2 : struct spdk_bdev_io *bdev_io = ctx;
6268 :
6269 2 : if (status) {
6270 0 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FIRST_FUSED_FAILED;
6271 0 : bdev_io->internal.cb(bdev_io, false, bdev_io->internal.caller_ctx);
6272 0 : return;
6273 : }
6274 :
6275 2 : bdev_compare_and_write_do_compare(bdev_io);
6276 2 : }
6277 :
6278 : int
6279 2 : spdk_bdev_comparev_and_writev_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6280 : struct iovec *compare_iov, int compare_iovcnt,
6281 : struct iovec *write_iov, int write_iovcnt,
6282 : uint64_t offset_blocks, uint64_t num_blocks,
6283 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6284 : {
6285 2 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
6286 : struct spdk_bdev_io *bdev_io;
6287 2 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
6288 :
6289 2 : if (!desc->write) {
6290 0 : return -EBADF;
6291 : }
6292 :
6293 2 : if (!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
6294 0 : return -EINVAL;
6295 : }
6296 :
6297 2 : if (num_blocks > bdev->acwu) {
6298 0 : return -EINVAL;
6299 : }
6300 :
6301 2 : bdev_io = bdev_channel_get_io(channel);
6302 2 : if (!bdev_io) {
6303 0 : return -ENOMEM;
6304 : }
6305 :
6306 2 : bdev_io->internal.ch = channel;
6307 2 : bdev_io->internal.desc = desc;
6308 2 : bdev_io->type = SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE;
6309 2 : bdev_io->u.bdev.iovs = compare_iov;
6310 2 : bdev_io->u.bdev.iovcnt = compare_iovcnt;
6311 2 : bdev_io->u.bdev.fused_iovs = write_iov;
6312 2 : bdev_io->u.bdev.fused_iovcnt = write_iovcnt;
6313 2 : bdev_io->u.bdev.md_buf = NULL;
6314 2 : bdev_io->u.bdev.num_blocks = num_blocks;
6315 2 : bdev_io->u.bdev.offset_blocks = offset_blocks;
6316 2 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
6317 2 : bdev_io->u.bdev.memory_domain = NULL;
6318 2 : bdev_io->u.bdev.memory_domain_ctx = NULL;
6319 2 : bdev_io->u.bdev.accel_sequence = NULL;
6320 :
6321 2 : if (bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_COMPARE_AND_WRITE)) {
6322 0 : bdev_io_submit(bdev_io);
6323 0 : return 0;
6324 : }
6325 :
6326 4 : return bdev_lock_lba_range(desc, ch, offset_blocks, num_blocks,
6327 2 : bdev_comparev_and_writev_blocks_locked, bdev_io);
6328 2 : }
6329 :
6330 : int
6331 2 : spdk_bdev_zcopy_start(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6332 : struct iovec *iov, int iovcnt,
6333 : uint64_t offset_blocks, uint64_t num_blocks,
6334 : bool populate,
6335 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6336 : {
6337 2 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
6338 : struct spdk_bdev_io *bdev_io;
6339 2 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
6340 :
6341 2 : if (!desc->write) {
6342 0 : return -EBADF;
6343 : }
6344 :
6345 2 : if (!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
6346 0 : return -EINVAL;
6347 : }
6348 :
6349 2 : if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ZCOPY)) {
6350 0 : return -ENOTSUP;
6351 : }
6352 :
6353 2 : bdev_io = bdev_channel_get_io(channel);
6354 2 : if (!bdev_io) {
6355 0 : return -ENOMEM;
6356 : }
6357 :
6358 2 : bdev_io->internal.ch = channel;
6359 2 : bdev_io->internal.desc = desc;
6360 2 : bdev_io->type = SPDK_BDEV_IO_TYPE_ZCOPY;
6361 2 : bdev_io->u.bdev.num_blocks = num_blocks;
6362 2 : bdev_io->u.bdev.offset_blocks = offset_blocks;
6363 2 : bdev_io->u.bdev.iovs = iov;
6364 2 : bdev_io->u.bdev.iovcnt = iovcnt;
6365 2 : bdev_io->u.bdev.md_buf = NULL;
6366 2 : bdev_io->u.bdev.zcopy.populate = populate ? 1 : 0;
6367 2 : bdev_io->u.bdev.zcopy.commit = 0;
6368 2 : bdev_io->u.bdev.zcopy.start = 1;
6369 2 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
6370 2 : bdev_io->u.bdev.memory_domain = NULL;
6371 2 : bdev_io->u.bdev.memory_domain_ctx = NULL;
6372 2 : bdev_io->u.bdev.accel_sequence = NULL;
6373 :
6374 2 : bdev_io_submit(bdev_io);
6375 :
6376 2 : return 0;
6377 2 : }
6378 :
6379 : int
6380 2 : spdk_bdev_zcopy_end(struct spdk_bdev_io *bdev_io, bool commit,
6381 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6382 : {
6383 2 : if (bdev_io->type != SPDK_BDEV_IO_TYPE_ZCOPY) {
6384 0 : return -EINVAL;
6385 : }
6386 :
6387 2 : bdev_io->u.bdev.zcopy.commit = commit ? 1 : 0;
6388 2 : bdev_io->u.bdev.zcopy.start = 0;
6389 2 : bdev_io->internal.caller_ctx = cb_arg;
6390 2 : bdev_io->internal.cb = cb;
6391 2 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_PENDING;
6392 :
6393 2 : bdev_io_submit(bdev_io);
6394 :
6395 2 : return 0;
6396 2 : }
6397 :
6398 : int
6399 0 : spdk_bdev_write_zeroes(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6400 : uint64_t offset, uint64_t len,
6401 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6402 : {
6403 : uint64_t offset_blocks, num_blocks;
6404 :
6405 0 : if (bdev_bytes_to_blocks(desc, offset, &offset_blocks, len, &num_blocks) != 0) {
6406 0 : return -EINVAL;
6407 : }
6408 :
6409 0 : return spdk_bdev_write_zeroes_blocks(desc, ch, offset_blocks, num_blocks, cb, cb_arg);
6410 0 : }
6411 :
6412 : int
6413 33 : spdk_bdev_write_zeroes_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6414 : uint64_t offset_blocks, uint64_t num_blocks,
6415 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6416 : {
6417 33 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
6418 : struct spdk_bdev_io *bdev_io;
6419 33 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
6420 :
6421 33 : if (!desc->write) {
6422 0 : return -EBADF;
6423 : }
6424 :
6425 33 : if (!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
6426 0 : return -EINVAL;
6427 : }
6428 :
6429 33 : if (!bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES) &&
6430 10 : !bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE)) {
6431 1 : return -ENOTSUP;
6432 : }
6433 :
6434 32 : bdev_io = bdev_channel_get_io(channel);
6435 :
6436 32 : if (!bdev_io) {
6437 0 : return -ENOMEM;
6438 : }
6439 :
6440 32 : bdev_io->type = SPDK_BDEV_IO_TYPE_WRITE_ZEROES;
6441 32 : bdev_io->internal.ch = channel;
6442 32 : bdev_io->internal.desc = desc;
6443 32 : bdev_io->u.bdev.offset_blocks = offset_blocks;
6444 32 : bdev_io->u.bdev.num_blocks = num_blocks;
6445 32 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
6446 32 : bdev_io->u.bdev.memory_domain = NULL;
6447 32 : bdev_io->u.bdev.memory_domain_ctx = NULL;
6448 32 : bdev_io->u.bdev.accel_sequence = NULL;
6449 :
6450 : /* If the write_zeroes size is large and should be split, use the generic split
6451 : * logic regardless of whether SPDK_BDEV_IO_TYPE_WRITE_ZEREOS is supported or not.
6452 : *
6453 : * Then, send the write_zeroes request if SPDK_BDEV_IO_TYPE_WRITE_ZEROES is supported
6454 : * or emulate it using regular write request otherwise.
6455 : */
6456 32 : if (bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES) ||
6457 9 : bdev_io->internal.f.split) {
6458 26 : bdev_io_submit(bdev_io);
6459 26 : return 0;
6460 : }
6461 :
6462 6 : assert(_bdev_get_block_size_with_md(bdev) <= ZERO_BUFFER_SIZE);
6463 :
6464 6 : return bdev_write_zero_buffer(bdev_io);
6465 33 : }
6466 :
6467 : int
6468 0 : spdk_bdev_unmap(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6469 : uint64_t offset, uint64_t nbytes,
6470 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6471 : {
6472 : uint64_t offset_blocks, num_blocks;
6473 :
6474 0 : if (bdev_bytes_to_blocks(desc, offset, &offset_blocks, nbytes, &num_blocks) != 0) {
6475 0 : return -EINVAL;
6476 : }
6477 :
6478 0 : return spdk_bdev_unmap_blocks(desc, ch, offset_blocks, num_blocks, cb, cb_arg);
6479 0 : }
6480 :
6481 : static void
6482 0 : bdev_io_complete_cb(void *ctx)
6483 : {
6484 0 : struct spdk_bdev_io *bdev_io = ctx;
6485 :
6486 0 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS;
6487 0 : bdev_io->internal.cb(bdev_io, true, bdev_io->internal.caller_ctx);
6488 0 : }
6489 :
6490 : int
6491 22 : spdk_bdev_unmap_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6492 : uint64_t offset_blocks, uint64_t num_blocks,
6493 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6494 : {
6495 22 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
6496 : struct spdk_bdev_io *bdev_io;
6497 22 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
6498 :
6499 22 : if (!desc->write) {
6500 0 : return -EBADF;
6501 : }
6502 :
6503 22 : if (!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
6504 0 : return -EINVAL;
6505 : }
6506 :
6507 22 : bdev_io = bdev_channel_get_io(channel);
6508 22 : if (!bdev_io) {
6509 0 : return -ENOMEM;
6510 : }
6511 :
6512 22 : bdev_io->internal.ch = channel;
6513 22 : bdev_io->internal.desc = desc;
6514 22 : bdev_io->type = SPDK_BDEV_IO_TYPE_UNMAP;
6515 :
6516 22 : bdev_io->u.bdev.iovs = &bdev_io->iov;
6517 22 : bdev_io->u.bdev.iovs[0].iov_base = NULL;
6518 22 : bdev_io->u.bdev.iovs[0].iov_len = 0;
6519 22 : bdev_io->u.bdev.iovcnt = 1;
6520 :
6521 22 : bdev_io->u.bdev.offset_blocks = offset_blocks;
6522 22 : bdev_io->u.bdev.num_blocks = num_blocks;
6523 22 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
6524 22 : bdev_io->u.bdev.memory_domain = NULL;
6525 22 : bdev_io->u.bdev.memory_domain_ctx = NULL;
6526 22 : bdev_io->u.bdev.accel_sequence = NULL;
6527 :
6528 22 : if (num_blocks == 0) {
6529 0 : spdk_thread_send_msg(spdk_get_thread(), bdev_io_complete_cb, bdev_io);
6530 0 : return 0;
6531 : }
6532 :
6533 22 : bdev_io_submit(bdev_io);
6534 22 : return 0;
6535 22 : }
6536 :
6537 : int
6538 0 : spdk_bdev_flush(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6539 : uint64_t offset, uint64_t length,
6540 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6541 : {
6542 : uint64_t offset_blocks, num_blocks;
6543 :
6544 0 : if (bdev_bytes_to_blocks(desc, offset, &offset_blocks, length, &num_blocks) != 0) {
6545 0 : return -EINVAL;
6546 : }
6547 :
6548 0 : return spdk_bdev_flush_blocks(desc, ch, offset_blocks, num_blocks, cb, cb_arg);
6549 0 : }
6550 :
6551 : int
6552 2 : spdk_bdev_flush_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6553 : uint64_t offset_blocks, uint64_t num_blocks,
6554 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6555 : {
6556 2 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
6557 : struct spdk_bdev_io *bdev_io;
6558 2 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
6559 :
6560 2 : if (!desc->write) {
6561 0 : return -EBADF;
6562 : }
6563 :
6564 2 : if (spdk_unlikely(!bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_FLUSH))) {
6565 0 : return -ENOTSUP;
6566 : }
6567 :
6568 2 : if (!bdev_io_valid_blocks(bdev, offset_blocks, num_blocks)) {
6569 0 : return -EINVAL;
6570 : }
6571 :
6572 2 : bdev_io = bdev_channel_get_io(channel);
6573 2 : if (!bdev_io) {
6574 0 : return -ENOMEM;
6575 : }
6576 :
6577 2 : bdev_io->internal.ch = channel;
6578 2 : bdev_io->internal.desc = desc;
6579 2 : bdev_io->type = SPDK_BDEV_IO_TYPE_FLUSH;
6580 2 : bdev_io->u.bdev.iovs = NULL;
6581 2 : bdev_io->u.bdev.iovcnt = 0;
6582 2 : bdev_io->u.bdev.offset_blocks = offset_blocks;
6583 2 : bdev_io->u.bdev.num_blocks = num_blocks;
6584 2 : bdev_io->u.bdev.memory_domain = NULL;
6585 2 : bdev_io->u.bdev.memory_domain_ctx = NULL;
6586 2 : bdev_io->u.bdev.accel_sequence = NULL;
6587 2 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
6588 :
6589 2 : bdev_io_submit(bdev_io);
6590 2 : return 0;
6591 2 : }
6592 :
6593 : static int bdev_reset_poll_for_outstanding_io(void *ctx);
6594 :
6595 : static void
6596 13 : bdev_reset_check_outstanding_io_done(struct spdk_bdev *bdev, void *_ctx, int status)
6597 : {
6598 13 : struct spdk_bdev_io *bdev_io = _ctx;
6599 13 : struct spdk_bdev_channel *ch = bdev_io->internal.ch;
6600 :
6601 13 : if (status == -EBUSY) {
6602 9 : if (spdk_get_ticks() < bdev_io->u.reset.wait_poller.stop_time_tsc) {
6603 8 : bdev_io->u.reset.wait_poller.poller = SPDK_POLLER_REGISTER(bdev_reset_poll_for_outstanding_io,
6604 : bdev_io, BDEV_RESET_CHECK_OUTSTANDING_IO_PERIOD);
6605 8 : } else {
6606 1 : if (TAILQ_EMPTY(&ch->io_memory_domain) && TAILQ_EMPTY(&ch->io_accel_exec)) {
6607 : /* If outstanding IOs are still present and reset_io_drain_timeout
6608 : * seconds passed, start the reset. */
6609 1 : bdev_io_submit_reset(bdev_io);
6610 1 : } else {
6611 : /* We still have in progress memory domain pull/push or we're
6612 : * executing accel sequence. Since we cannot abort either of those
6613 : * operations, fail the reset request. */
6614 0 : spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_FAILED);
6615 : }
6616 : }
6617 9 : } else {
6618 4 : SPDK_DEBUGLOG(bdev,
6619 : "Skipping reset for underlying device of bdev: %s - no outstanding I/O.\n",
6620 : ch->bdev->name);
6621 : /* Mark the completion status as a SUCCESS and complete the reset. */
6622 4 : spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_SUCCESS);
6623 : }
6624 13 : }
6625 :
6626 : static void
6627 13 : bdev_reset_check_outstanding_io(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
6628 : struct spdk_io_channel *io_ch, void *_ctx)
6629 : {
6630 13 : struct spdk_bdev_channel *cur_ch = __io_ch_to_bdev_ch(io_ch);
6631 13 : int status = 0;
6632 :
6633 17 : if (cur_ch->io_outstanding > 0 ||
6634 4 : !TAILQ_EMPTY(&cur_ch->io_memory_domain) ||
6635 4 : !TAILQ_EMPTY(&cur_ch->io_accel_exec)) {
6636 : /* If a channel has outstanding IO, set status to -EBUSY code. This will stop
6637 : * further iteration over the rest of the channels and pass non-zero status
6638 : * to the callback function. */
6639 9 : status = -EBUSY;
6640 9 : }
6641 13 : spdk_bdev_for_each_channel_continue(i, status);
6642 13 : }
6643 :
6644 : static int
6645 8 : bdev_reset_poll_for_outstanding_io(void *ctx)
6646 : {
6647 8 : struct spdk_bdev_io *bdev_io = ctx;
6648 :
6649 8 : spdk_poller_unregister(&bdev_io->u.reset.wait_poller.poller);
6650 8 : spdk_bdev_for_each_channel(bdev_io->bdev, bdev_reset_check_outstanding_io, bdev_io,
6651 : bdev_reset_check_outstanding_io_done);
6652 :
6653 8 : return SPDK_POLLER_BUSY;
6654 : }
6655 :
6656 : static void
6657 16 : bdev_reset_freeze_channel_done(struct spdk_bdev *bdev, void *_ctx, int status)
6658 : {
6659 16 : struct spdk_bdev_io *bdev_io = _ctx;
6660 :
6661 16 : if (bdev->reset_io_drain_timeout == 0) {
6662 11 : bdev_io_submit_reset(bdev_io);
6663 11 : return;
6664 : }
6665 :
6666 10 : bdev_io->u.reset.wait_poller.stop_time_tsc = spdk_get_ticks() +
6667 5 : (bdev->reset_io_drain_timeout * spdk_get_ticks_hz());
6668 :
6669 : /* In case bdev->reset_io_drain_timeout is not equal to zero,
6670 : * submit the reset to the underlying module only if outstanding I/O
6671 : * remain after reset_io_drain_timeout seconds have passed. */
6672 5 : spdk_bdev_for_each_channel(bdev, bdev_reset_check_outstanding_io, bdev_io,
6673 : bdev_reset_check_outstanding_io_done);
6674 16 : }
6675 :
6676 : static void
6677 19 : bdev_reset_freeze_channel(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
6678 : struct spdk_io_channel *ch, void *_ctx)
6679 : {
6680 : struct spdk_bdev_channel *channel;
6681 : struct spdk_bdev_mgmt_channel *mgmt_channel;
6682 : struct spdk_bdev_shared_resource *shared_resource;
6683 : bdev_io_tailq_t tmp_queued;
6684 :
6685 19 : TAILQ_INIT(&tmp_queued);
6686 :
6687 19 : channel = __io_ch_to_bdev_ch(ch);
6688 19 : shared_resource = channel->shared_resource;
6689 19 : mgmt_channel = shared_resource->mgmt_ch;
6690 :
6691 19 : channel->flags |= BDEV_CH_RESET_IN_PROGRESS;
6692 :
6693 19 : if ((channel->flags & BDEV_CH_QOS_ENABLED) != 0) {
6694 2 : TAILQ_SWAP(&channel->qos_queued_io, &tmp_queued, spdk_bdev_io, internal.link);
6695 2 : }
6696 :
6697 19 : bdev_abort_all_queued_io(&shared_resource->nomem_io, channel);
6698 19 : bdev_abort_all_buf_io(mgmt_channel, channel);
6699 19 : bdev_abort_all_queued_io(&tmp_queued, channel);
6700 :
6701 19 : spdk_bdev_for_each_channel_continue(i, 0);
6702 19 : }
6703 :
6704 : static void
6705 18 : bdev_start_reset(struct spdk_bdev_io *bdev_io)
6706 : {
6707 18 : struct spdk_bdev *bdev = bdev_io->bdev;
6708 18 : bool freeze_channel = false;
6709 :
6710 18 : bdev_ch_add_to_io_submitted(bdev_io);
6711 :
6712 : /**
6713 : * Take a channel reference for the target bdev for the life of this
6714 : * reset. This guards against the channel getting destroyed before
6715 : * the reset is completed. We will release the reference when this
6716 : * reset is completed.
6717 : */
6718 18 : bdev_io->u.reset.ch_ref = spdk_get_io_channel(__bdev_to_io_dev(bdev));
6719 :
6720 18 : spdk_spin_lock(&bdev->internal.spinlock);
6721 18 : if (bdev->internal.reset_in_progress == NULL) {
6722 16 : bdev->internal.reset_in_progress = bdev_io;
6723 16 : freeze_channel = true;
6724 16 : } else {
6725 2 : TAILQ_INSERT_TAIL(&bdev->internal.queued_resets, bdev_io, internal.link);
6726 : }
6727 18 : spdk_spin_unlock(&bdev->internal.spinlock);
6728 :
6729 18 : if (freeze_channel) {
6730 16 : spdk_bdev_for_each_channel(bdev, bdev_reset_freeze_channel, bdev_io,
6731 : bdev_reset_freeze_channel_done);
6732 16 : }
6733 18 : }
6734 :
6735 : int
6736 18 : spdk_bdev_reset(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6737 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6738 : {
6739 18 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
6740 : struct spdk_bdev_io *bdev_io;
6741 18 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
6742 :
6743 18 : bdev_io = bdev_channel_get_io(channel);
6744 18 : if (!bdev_io) {
6745 0 : return -ENOMEM;
6746 : }
6747 :
6748 18 : bdev_io->internal.ch = channel;
6749 18 : bdev_io->internal.desc = desc;
6750 18 : bdev_io->internal.submit_tsc = spdk_get_ticks();
6751 18 : bdev_io->type = SPDK_BDEV_IO_TYPE_RESET;
6752 18 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
6753 :
6754 18 : bdev_start_reset(bdev_io);
6755 18 : return 0;
6756 18 : }
6757 :
6758 : void
6759 0 : spdk_bdev_get_io_stat(struct spdk_bdev *bdev, struct spdk_io_channel *ch,
6760 : struct spdk_bdev_io_stat *stat, enum spdk_bdev_reset_stat_mode reset_mode)
6761 : {
6762 0 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
6763 :
6764 0 : bdev_get_io_stat(stat, channel->stat);
6765 0 : spdk_bdev_reset_io_stat(channel->stat, reset_mode);
6766 0 : }
6767 :
6768 : static void
6769 5 : bdev_get_device_stat_done(struct spdk_bdev *bdev, void *_ctx, int status)
6770 : {
6771 5 : struct spdk_bdev_iostat_ctx *bdev_iostat_ctx = _ctx;
6772 :
6773 10 : bdev_iostat_ctx->cb(bdev, bdev_iostat_ctx->stat,
6774 5 : bdev_iostat_ctx->cb_arg, 0);
6775 5 : free(bdev_iostat_ctx);
6776 5 : }
6777 :
6778 : static void
6779 4 : bdev_get_each_channel_stat(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
6780 : struct spdk_io_channel *ch, void *_ctx)
6781 : {
6782 4 : struct spdk_bdev_iostat_ctx *bdev_iostat_ctx = _ctx;
6783 4 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
6784 :
6785 4 : spdk_bdev_add_io_stat(bdev_iostat_ctx->stat, channel->stat);
6786 4 : spdk_bdev_reset_io_stat(channel->stat, bdev_iostat_ctx->reset_mode);
6787 4 : spdk_bdev_for_each_channel_continue(i, 0);
6788 4 : }
6789 :
6790 : void
6791 5 : spdk_bdev_get_device_stat(struct spdk_bdev *bdev, struct spdk_bdev_io_stat *stat,
6792 : enum spdk_bdev_reset_stat_mode reset_mode, spdk_bdev_get_device_stat_cb cb, void *cb_arg)
6793 : {
6794 : struct spdk_bdev_iostat_ctx *bdev_iostat_ctx;
6795 :
6796 5 : assert(bdev != NULL);
6797 5 : assert(stat != NULL);
6798 5 : assert(cb != NULL);
6799 :
6800 5 : bdev_iostat_ctx = calloc(1, sizeof(struct spdk_bdev_iostat_ctx));
6801 5 : if (bdev_iostat_ctx == NULL) {
6802 0 : SPDK_ERRLOG("Unable to allocate memory for spdk_bdev_iostat_ctx\n");
6803 0 : cb(bdev, stat, cb_arg, -ENOMEM);
6804 0 : return;
6805 : }
6806 :
6807 5 : bdev_iostat_ctx->stat = stat;
6808 5 : bdev_iostat_ctx->cb = cb;
6809 5 : bdev_iostat_ctx->cb_arg = cb_arg;
6810 5 : bdev_iostat_ctx->reset_mode = reset_mode;
6811 :
6812 : /* Start with the statistics from previously deleted channels. */
6813 5 : spdk_spin_lock(&bdev->internal.spinlock);
6814 5 : bdev_get_io_stat(bdev_iostat_ctx->stat, bdev->internal.stat);
6815 5 : spdk_bdev_reset_io_stat(bdev->internal.stat, reset_mode);
6816 5 : spdk_spin_unlock(&bdev->internal.spinlock);
6817 :
6818 : /* Then iterate and add the statistics from each existing channel. */
6819 5 : spdk_bdev_for_each_channel(bdev, bdev_get_each_channel_stat, bdev_iostat_ctx,
6820 : bdev_get_device_stat_done);
6821 5 : }
6822 :
6823 : struct bdev_iostat_reset_ctx {
6824 : enum spdk_bdev_reset_stat_mode mode;
6825 : bdev_reset_device_stat_cb cb;
6826 : void *cb_arg;
6827 : };
6828 :
6829 : static void
6830 0 : bdev_reset_device_stat_done(struct spdk_bdev *bdev, void *_ctx, int status)
6831 : {
6832 0 : struct bdev_iostat_reset_ctx *ctx = _ctx;
6833 :
6834 0 : ctx->cb(bdev, ctx->cb_arg, 0);
6835 :
6836 0 : free(ctx);
6837 0 : }
6838 :
6839 : static void
6840 0 : bdev_reset_each_channel_stat(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
6841 : struct spdk_io_channel *ch, void *_ctx)
6842 : {
6843 0 : struct bdev_iostat_reset_ctx *ctx = _ctx;
6844 0 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
6845 :
6846 0 : spdk_bdev_reset_io_stat(channel->stat, ctx->mode);
6847 :
6848 0 : spdk_bdev_for_each_channel_continue(i, 0);
6849 0 : }
6850 :
6851 : void
6852 0 : bdev_reset_device_stat(struct spdk_bdev *bdev, enum spdk_bdev_reset_stat_mode mode,
6853 : bdev_reset_device_stat_cb cb, void *cb_arg)
6854 : {
6855 : struct bdev_iostat_reset_ctx *ctx;
6856 :
6857 0 : assert(bdev != NULL);
6858 0 : assert(cb != NULL);
6859 :
6860 0 : ctx = calloc(1, sizeof(*ctx));
6861 0 : if (ctx == NULL) {
6862 0 : SPDK_ERRLOG("Unable to allocate bdev_iostat_reset_ctx.\n");
6863 0 : cb(bdev, cb_arg, -ENOMEM);
6864 0 : return;
6865 : }
6866 :
6867 0 : ctx->mode = mode;
6868 0 : ctx->cb = cb;
6869 0 : ctx->cb_arg = cb_arg;
6870 :
6871 0 : spdk_spin_lock(&bdev->internal.spinlock);
6872 0 : spdk_bdev_reset_io_stat(bdev->internal.stat, mode);
6873 0 : spdk_spin_unlock(&bdev->internal.spinlock);
6874 :
6875 0 : spdk_bdev_for_each_channel(bdev,
6876 : bdev_reset_each_channel_stat,
6877 0 : ctx,
6878 : bdev_reset_device_stat_done);
6879 0 : }
6880 :
6881 : int
6882 1 : spdk_bdev_nvme_admin_passthru(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6883 : const struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes,
6884 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6885 : {
6886 1 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
6887 : struct spdk_bdev_io *bdev_io;
6888 1 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
6889 :
6890 1 : if (!desc->write) {
6891 0 : return -EBADF;
6892 : }
6893 :
6894 1 : if (spdk_unlikely(!bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_NVME_ADMIN))) {
6895 1 : return -ENOTSUP;
6896 : }
6897 :
6898 0 : bdev_io = bdev_channel_get_io(channel);
6899 0 : if (!bdev_io) {
6900 0 : return -ENOMEM;
6901 : }
6902 :
6903 0 : bdev_io->internal.ch = channel;
6904 0 : bdev_io->internal.desc = desc;
6905 0 : bdev_io->type = SPDK_BDEV_IO_TYPE_NVME_ADMIN;
6906 0 : bdev_io->u.nvme_passthru.cmd = *cmd;
6907 0 : bdev_io->u.nvme_passthru.buf = buf;
6908 0 : bdev_io->u.nvme_passthru.nbytes = nbytes;
6909 0 : bdev_io->u.nvme_passthru.md_buf = NULL;
6910 0 : bdev_io->u.nvme_passthru.md_len = 0;
6911 :
6912 0 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
6913 :
6914 0 : bdev_io_submit(bdev_io);
6915 0 : return 0;
6916 1 : }
6917 :
6918 : int
6919 1 : spdk_bdev_nvme_io_passthru(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6920 : const struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes,
6921 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6922 : {
6923 1 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
6924 : struct spdk_bdev_io *bdev_io;
6925 1 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
6926 :
6927 1 : if (!desc->write) {
6928 : /*
6929 : * Do not try to parse the NVMe command - we could maybe use bits in the opcode
6930 : * to easily determine if the command is a read or write, but for now just
6931 : * do not allow io_passthru with a read-only descriptor.
6932 : */
6933 0 : return -EBADF;
6934 : }
6935 :
6936 1 : if (spdk_unlikely(!bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_NVME_IO))) {
6937 1 : return -ENOTSUP;
6938 : }
6939 :
6940 0 : bdev_io = bdev_channel_get_io(channel);
6941 0 : if (!bdev_io) {
6942 0 : return -ENOMEM;
6943 : }
6944 :
6945 0 : bdev_io->internal.ch = channel;
6946 0 : bdev_io->internal.desc = desc;
6947 0 : bdev_io->type = SPDK_BDEV_IO_TYPE_NVME_IO;
6948 0 : bdev_io->u.nvme_passthru.cmd = *cmd;
6949 0 : bdev_io->u.nvme_passthru.buf = buf;
6950 0 : bdev_io->u.nvme_passthru.nbytes = nbytes;
6951 0 : bdev_io->u.nvme_passthru.md_buf = NULL;
6952 0 : bdev_io->u.nvme_passthru.md_len = 0;
6953 :
6954 0 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
6955 :
6956 0 : bdev_io_submit(bdev_io);
6957 0 : return 0;
6958 1 : }
6959 :
6960 : int
6961 1 : spdk_bdev_nvme_io_passthru_md(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
6962 : const struct spdk_nvme_cmd *cmd, void *buf, size_t nbytes, void *md_buf, size_t md_len,
6963 : spdk_bdev_io_completion_cb cb, void *cb_arg)
6964 : {
6965 1 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
6966 : struct spdk_bdev_io *bdev_io;
6967 1 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
6968 :
6969 1 : if (!desc->write) {
6970 : /*
6971 : * Do not try to parse the NVMe command - we could maybe use bits in the opcode
6972 : * to easily determine if the command is a read or write, but for now just
6973 : * do not allow io_passthru with a read-only descriptor.
6974 : */
6975 0 : return -EBADF;
6976 : }
6977 :
6978 1 : if (spdk_unlikely(!bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_NVME_IO_MD))) {
6979 1 : return -ENOTSUP;
6980 : }
6981 :
6982 0 : bdev_io = bdev_channel_get_io(channel);
6983 0 : if (!bdev_io) {
6984 0 : return -ENOMEM;
6985 : }
6986 :
6987 0 : bdev_io->internal.ch = channel;
6988 0 : bdev_io->internal.desc = desc;
6989 0 : bdev_io->type = SPDK_BDEV_IO_TYPE_NVME_IO_MD;
6990 0 : bdev_io->u.nvme_passthru.cmd = *cmd;
6991 0 : bdev_io->u.nvme_passthru.buf = buf;
6992 0 : bdev_io->u.nvme_passthru.nbytes = nbytes;
6993 0 : bdev_io->u.nvme_passthru.md_buf = md_buf;
6994 0 : bdev_io->u.nvme_passthru.md_len = md_len;
6995 :
6996 0 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
6997 :
6998 0 : bdev_io_submit(bdev_io);
6999 0 : return 0;
7000 1 : }
7001 :
7002 : int
7003 0 : spdk_bdev_nvme_iov_passthru_md(struct spdk_bdev_desc *desc,
7004 : struct spdk_io_channel *ch,
7005 : const struct spdk_nvme_cmd *cmd,
7006 : struct iovec *iov, int iovcnt, size_t nbytes,
7007 : void *md_buf, size_t md_len,
7008 : spdk_bdev_io_completion_cb cb, void *cb_arg)
7009 : {
7010 0 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
7011 : struct spdk_bdev_io *bdev_io;
7012 0 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
7013 :
7014 0 : if (!desc->write) {
7015 : /*
7016 : * Do not try to parse the NVMe command - we could maybe use bits in the opcode
7017 : * to easily determine if the command is a read or write, but for now just
7018 : * do not allow io_passthru with a read-only descriptor.
7019 : */
7020 0 : return -EBADF;
7021 : }
7022 :
7023 0 : if (md_buf && spdk_unlikely(!bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_NVME_IO_MD))) {
7024 0 : return -ENOTSUP;
7025 0 : } else if (spdk_unlikely(!bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_NVME_IO))) {
7026 0 : return -ENOTSUP;
7027 : }
7028 :
7029 0 : bdev_io = bdev_channel_get_io(channel);
7030 0 : if (!bdev_io) {
7031 0 : return -ENOMEM;
7032 : }
7033 :
7034 0 : bdev_io->internal.ch = channel;
7035 0 : bdev_io->internal.desc = desc;
7036 0 : bdev_io->type = SPDK_BDEV_IO_TYPE_NVME_IOV_MD;
7037 0 : bdev_io->u.nvme_passthru.cmd = *cmd;
7038 0 : bdev_io->u.nvme_passthru.iovs = iov;
7039 0 : bdev_io->u.nvme_passthru.iovcnt = iovcnt;
7040 0 : bdev_io->u.nvme_passthru.nbytes = nbytes;
7041 0 : bdev_io->u.nvme_passthru.md_buf = md_buf;
7042 0 : bdev_io->u.nvme_passthru.md_len = md_len;
7043 :
7044 0 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
7045 :
7046 0 : bdev_io_submit(bdev_io);
7047 0 : return 0;
7048 0 : }
7049 :
7050 : static void bdev_abort_retry(void *ctx);
7051 : static void bdev_abort(struct spdk_bdev_io *parent_io);
7052 :
7053 : static void
7054 22 : bdev_abort_io_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
7055 : {
7056 22 : struct spdk_bdev_channel *channel = bdev_io->internal.ch;
7057 22 : struct spdk_bdev_io *parent_io = cb_arg;
7058 : struct spdk_bdev_io *bio_to_abort, *tmp_io;
7059 :
7060 22 : bio_to_abort = bdev_io->u.abort.bio_to_abort;
7061 :
7062 22 : spdk_bdev_free_io(bdev_io);
7063 :
7064 22 : if (!success) {
7065 : /* Check if the target I/O completed in the meantime. */
7066 2 : TAILQ_FOREACH(tmp_io, &channel->io_submitted, internal.ch_link) {
7067 1 : if (tmp_io == bio_to_abort) {
7068 0 : break;
7069 : }
7070 1 : }
7071 :
7072 : /* If the target I/O still exists, set the parent to failed. */
7073 1 : if (tmp_io != NULL) {
7074 0 : parent_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
7075 0 : }
7076 1 : }
7077 :
7078 22 : assert(parent_io->internal.f.split);
7079 :
7080 22 : parent_io->internal.split.outstanding--;
7081 22 : if (parent_io->internal.split.outstanding == 0) {
7082 16 : if (parent_io->internal.status == SPDK_BDEV_IO_STATUS_NOMEM) {
7083 0 : bdev_abort_retry(parent_io);
7084 0 : } else {
7085 16 : bdev_io_complete(parent_io);
7086 : }
7087 16 : }
7088 22 : }
7089 :
7090 : static int
7091 23 : bdev_abort_io(struct spdk_bdev_desc *desc, struct spdk_bdev_channel *channel,
7092 : struct spdk_bdev_io *bio_to_abort,
7093 : spdk_bdev_io_completion_cb cb, void *cb_arg)
7094 : {
7095 23 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
7096 : struct spdk_bdev_io *bdev_io;
7097 :
7098 23 : if (bio_to_abort->type == SPDK_BDEV_IO_TYPE_ABORT ||
7099 23 : bio_to_abort->type == SPDK_BDEV_IO_TYPE_RESET) {
7100 : /* TODO: Abort reset or abort request. */
7101 0 : return -ENOTSUP;
7102 : }
7103 :
7104 23 : bdev_io = bdev_channel_get_io(channel);
7105 23 : if (bdev_io == NULL) {
7106 1 : return -ENOMEM;
7107 : }
7108 :
7109 22 : bdev_io->internal.ch = channel;
7110 22 : bdev_io->internal.desc = desc;
7111 22 : bdev_io->type = SPDK_BDEV_IO_TYPE_ABORT;
7112 22 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
7113 :
7114 22 : if (bio_to_abort->internal.f.split) {
7115 6 : assert(bdev_io_should_split(bio_to_abort));
7116 6 : bdev_io->u.bdev.abort.bio_cb_arg = bio_to_abort;
7117 :
7118 : /* Parent abort request is not submitted directly, but to manage its
7119 : * execution add it to the submitted list here.
7120 : */
7121 6 : bdev_io->internal.submit_tsc = spdk_get_ticks();
7122 6 : bdev_ch_add_to_io_submitted(bdev_io);
7123 :
7124 6 : bdev_abort(bdev_io);
7125 :
7126 6 : return 0;
7127 : }
7128 :
7129 16 : bdev_io->u.abort.bio_to_abort = bio_to_abort;
7130 :
7131 : /* Submit the abort request to the underlying bdev module. */
7132 16 : bdev_io_submit(bdev_io);
7133 :
7134 16 : return 0;
7135 23 : }
7136 :
7137 : static bool
7138 46 : bdev_io_on_tailq(struct spdk_bdev_io *bdev_io, bdev_io_tailq_t *tailq)
7139 : {
7140 : struct spdk_bdev_io *iter;
7141 :
7142 46 : TAILQ_FOREACH(iter, tailq, internal.link) {
7143 0 : if (iter == bdev_io) {
7144 0 : return true;
7145 : }
7146 0 : }
7147 :
7148 46 : return false;
7149 46 : }
7150 :
7151 : static uint32_t
7152 18 : _bdev_abort(struct spdk_bdev_io *parent_io)
7153 : {
7154 18 : struct spdk_bdev_desc *desc = parent_io->internal.desc;
7155 18 : struct spdk_bdev_channel *channel = parent_io->internal.ch;
7156 : void *bio_cb_arg;
7157 : struct spdk_bdev_io *bio_to_abort;
7158 : uint32_t matched_ios;
7159 : int rc;
7160 :
7161 18 : bio_cb_arg = parent_io->u.bdev.abort.bio_cb_arg;
7162 :
7163 : /* matched_ios is returned and will be kept by the caller.
7164 : *
7165 : * This function will be used for two cases, 1) the same cb_arg is used for
7166 : * multiple I/Os, 2) a single large I/O is split into smaller ones.
7167 : * Incrementing split_outstanding directly here may confuse readers especially
7168 : * for the 1st case.
7169 : *
7170 : * Completion of I/O abort is processed after stack unwinding. Hence this trick
7171 : * works as expected.
7172 : */
7173 18 : matched_ios = 0;
7174 18 : parent_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS;
7175 :
7176 105 : TAILQ_FOREACH(bio_to_abort, &channel->io_submitted, internal.ch_link) {
7177 88 : if (bio_to_abort->internal.caller_ctx != bio_cb_arg) {
7178 65 : continue;
7179 : }
7180 :
7181 23 : if (bio_to_abort->internal.submit_tsc > parent_io->internal.submit_tsc) {
7182 : /* Any I/O which was submitted after this abort command should be excluded. */
7183 0 : continue;
7184 : }
7185 :
7186 : /* We can't abort a request that's being pushed/pulled or executed by accel */
7187 23 : if (bdev_io_on_tailq(bio_to_abort, &channel->io_accel_exec) ||
7188 23 : bdev_io_on_tailq(bio_to_abort, &channel->io_memory_domain)) {
7189 0 : parent_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
7190 0 : break;
7191 : }
7192 :
7193 23 : rc = bdev_abort_io(desc, channel, bio_to_abort, bdev_abort_io_done, parent_io);
7194 23 : if (rc != 0) {
7195 1 : if (rc == -ENOMEM) {
7196 1 : parent_io->internal.status = SPDK_BDEV_IO_STATUS_NOMEM;
7197 1 : } else {
7198 0 : parent_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
7199 : }
7200 1 : break;
7201 : }
7202 22 : matched_ios++;
7203 22 : }
7204 :
7205 18 : return matched_ios;
7206 : }
7207 :
7208 : static void
7209 1 : bdev_abort_retry(void *ctx)
7210 : {
7211 1 : struct spdk_bdev_io *parent_io = ctx;
7212 : uint32_t matched_ios;
7213 :
7214 1 : matched_ios = _bdev_abort(parent_io);
7215 :
7216 1 : if (matched_ios == 0) {
7217 0 : if (parent_io->internal.status == SPDK_BDEV_IO_STATUS_NOMEM) {
7218 0 : bdev_queue_io_wait_with_cb(parent_io, bdev_abort_retry);
7219 0 : } else {
7220 : /* For retry, the case that no target I/O was found is success
7221 : * because it means target I/Os completed in the meantime.
7222 : */
7223 0 : bdev_io_complete(parent_io);
7224 : }
7225 0 : return;
7226 : }
7227 :
7228 : /* Use split_outstanding to manage the progress of aborting I/Os. */
7229 1 : parent_io->internal.f.split = true;
7230 1 : parent_io->internal.split.outstanding = matched_ios;
7231 1 : }
7232 :
7233 : static void
7234 17 : bdev_abort(struct spdk_bdev_io *parent_io)
7235 : {
7236 : uint32_t matched_ios;
7237 :
7238 17 : matched_ios = _bdev_abort(parent_io);
7239 :
7240 17 : if (matched_ios == 0) {
7241 2 : if (parent_io->internal.status == SPDK_BDEV_IO_STATUS_NOMEM) {
7242 1 : bdev_queue_io_wait_with_cb(parent_io, bdev_abort_retry);
7243 1 : } else {
7244 : /* The case the no target I/O was found is failure. */
7245 1 : parent_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
7246 1 : bdev_io_complete(parent_io);
7247 : }
7248 2 : return;
7249 : }
7250 :
7251 : /* Use split_outstanding to manage the progress of aborting I/Os. */
7252 15 : parent_io->internal.f.split = true;
7253 15 : parent_io->internal.split.outstanding = matched_ios;
7254 17 : }
7255 :
7256 : int
7257 12 : spdk_bdev_abort(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
7258 : void *bio_cb_arg,
7259 : spdk_bdev_io_completion_cb cb, void *cb_arg)
7260 : {
7261 12 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
7262 12 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
7263 : struct spdk_bdev_io *bdev_io;
7264 :
7265 12 : if (bio_cb_arg == NULL) {
7266 0 : return -EINVAL;
7267 : }
7268 :
7269 12 : if (!spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_ABORT)) {
7270 1 : return -ENOTSUP;
7271 : }
7272 :
7273 11 : bdev_io = bdev_channel_get_io(channel);
7274 11 : if (bdev_io == NULL) {
7275 0 : return -ENOMEM;
7276 : }
7277 :
7278 11 : bdev_io->internal.ch = channel;
7279 11 : bdev_io->internal.desc = desc;
7280 11 : bdev_io->internal.submit_tsc = spdk_get_ticks();
7281 11 : bdev_io->type = SPDK_BDEV_IO_TYPE_ABORT;
7282 11 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
7283 :
7284 11 : bdev_io->u.bdev.abort.bio_cb_arg = bio_cb_arg;
7285 :
7286 : /* Parent abort request is not submitted directly, but to manage its execution,
7287 : * add it to the submitted list here.
7288 : */
7289 11 : bdev_ch_add_to_io_submitted(bdev_io);
7290 :
7291 11 : bdev_abort(bdev_io);
7292 :
7293 11 : return 0;
7294 12 : }
7295 :
7296 : int
7297 4 : spdk_bdev_queue_io_wait(struct spdk_bdev *bdev, struct spdk_io_channel *ch,
7298 : struct spdk_bdev_io_wait_entry *entry)
7299 : {
7300 4 : struct spdk_bdev_channel *channel = __io_ch_to_bdev_ch(ch);
7301 4 : struct spdk_bdev_mgmt_channel *mgmt_ch = channel->shared_resource->mgmt_ch;
7302 :
7303 4 : if (bdev != entry->bdev) {
7304 0 : SPDK_ERRLOG("bdevs do not match\n");
7305 0 : return -EINVAL;
7306 : }
7307 :
7308 4 : if (mgmt_ch->per_thread_cache_count > 0) {
7309 0 : SPDK_ERRLOG("Cannot queue io_wait if spdk_bdev_io available in per-thread cache\n");
7310 0 : return -EINVAL;
7311 : }
7312 :
7313 4 : TAILQ_INSERT_TAIL(&mgmt_ch->io_wait_queue, entry, link);
7314 4 : return 0;
7315 4 : }
7316 :
7317 : static inline void
7318 612 : bdev_io_update_io_stat(struct spdk_bdev_io *bdev_io, uint64_t tsc_diff)
7319 : {
7320 612 : enum spdk_bdev_io_status io_status = bdev_io->internal.status;
7321 612 : struct spdk_bdev_io_stat *io_stat = bdev_io->internal.ch->stat;
7322 612 : uint64_t num_blocks = bdev_io->u.bdev.num_blocks;
7323 612 : uint32_t blocklen = bdev_io->bdev->blocklen;
7324 :
7325 612 : if (spdk_likely(io_status == SPDK_BDEV_IO_STATUS_SUCCESS)) {
7326 519 : switch (bdev_io->type) {
7327 : case SPDK_BDEV_IO_TYPE_READ:
7328 321 : io_stat->bytes_read += num_blocks * blocklen;
7329 321 : io_stat->num_read_ops++;
7330 321 : io_stat->read_latency_ticks += tsc_diff;
7331 321 : if (io_stat->max_read_latency_ticks < tsc_diff) {
7332 7 : io_stat->max_read_latency_ticks = tsc_diff;
7333 7 : }
7334 321 : if (io_stat->min_read_latency_ticks > tsc_diff) {
7335 42 : io_stat->min_read_latency_ticks = tsc_diff;
7336 42 : }
7337 321 : break;
7338 : case SPDK_BDEV_IO_TYPE_WRITE:
7339 75 : io_stat->bytes_written += num_blocks * blocklen;
7340 75 : io_stat->num_write_ops++;
7341 75 : io_stat->write_latency_ticks += tsc_diff;
7342 75 : if (io_stat->max_write_latency_ticks < tsc_diff) {
7343 4 : io_stat->max_write_latency_ticks = tsc_diff;
7344 4 : }
7345 75 : if (io_stat->min_write_latency_ticks > tsc_diff) {
7346 25 : io_stat->min_write_latency_ticks = tsc_diff;
7347 25 : }
7348 75 : break;
7349 : case SPDK_BDEV_IO_TYPE_UNMAP:
7350 20 : io_stat->bytes_unmapped += num_blocks * blocklen;
7351 20 : io_stat->num_unmap_ops++;
7352 20 : io_stat->unmap_latency_ticks += tsc_diff;
7353 20 : if (io_stat->max_unmap_latency_ticks < tsc_diff) {
7354 0 : io_stat->max_unmap_latency_ticks = tsc_diff;
7355 0 : }
7356 20 : if (io_stat->min_unmap_latency_ticks > tsc_diff) {
7357 3 : io_stat->min_unmap_latency_ticks = tsc_diff;
7358 3 : }
7359 20 : break;
7360 : case SPDK_BDEV_IO_TYPE_ZCOPY:
7361 : /* Track the data in the start phase only */
7362 4 : if (bdev_io->u.bdev.zcopy.start) {
7363 2 : if (bdev_io->u.bdev.zcopy.populate) {
7364 1 : io_stat->bytes_read += num_blocks * blocklen;
7365 1 : io_stat->num_read_ops++;
7366 1 : io_stat->read_latency_ticks += tsc_diff;
7367 1 : if (io_stat->max_read_latency_ticks < tsc_diff) {
7368 0 : io_stat->max_read_latency_ticks = tsc_diff;
7369 0 : }
7370 1 : if (io_stat->min_read_latency_ticks > tsc_diff) {
7371 1 : io_stat->min_read_latency_ticks = tsc_diff;
7372 1 : }
7373 1 : } else {
7374 1 : io_stat->bytes_written += num_blocks * blocklen;
7375 1 : io_stat->num_write_ops++;
7376 1 : io_stat->write_latency_ticks += tsc_diff;
7377 1 : if (io_stat->max_write_latency_ticks < tsc_diff) {
7378 0 : io_stat->max_write_latency_ticks = tsc_diff;
7379 0 : }
7380 1 : if (io_stat->min_write_latency_ticks > tsc_diff) {
7381 1 : io_stat->min_write_latency_ticks = tsc_diff;
7382 1 : }
7383 : }
7384 2 : }
7385 4 : break;
7386 : case SPDK_BDEV_IO_TYPE_COPY:
7387 21 : io_stat->bytes_copied += num_blocks * blocklen;
7388 21 : io_stat->num_copy_ops++;
7389 21 : bdev_io->internal.ch->stat->copy_latency_ticks += tsc_diff;
7390 21 : if (io_stat->max_copy_latency_ticks < tsc_diff) {
7391 0 : io_stat->max_copy_latency_ticks = tsc_diff;
7392 0 : }
7393 21 : if (io_stat->min_copy_latency_ticks > tsc_diff) {
7394 4 : io_stat->min_copy_latency_ticks = tsc_diff;
7395 4 : }
7396 21 : break;
7397 : default:
7398 78 : break;
7399 : }
7400 612 : } else if (io_status <= SPDK_BDEV_IO_STATUS_FAILED && io_status >= SPDK_MIN_BDEV_IO_STATUS) {
7401 93 : io_stat = bdev_io->bdev->internal.stat;
7402 93 : assert(io_stat->io_error != NULL);
7403 :
7404 93 : spdk_spin_lock(&bdev_io->bdev->internal.spinlock);
7405 93 : io_stat->io_error->error_status[-io_status - 1]++;
7406 93 : spdk_spin_unlock(&bdev_io->bdev->internal.spinlock);
7407 93 : }
7408 :
7409 : #ifdef SPDK_CONFIG_VTUNE
7410 : uint64_t now_tsc = spdk_get_ticks();
7411 : if (now_tsc > (bdev_io->internal.ch->start_tsc + bdev_io->internal.ch->interval_tsc)) {
7412 : uint64_t data[5];
7413 : struct spdk_bdev_io_stat *prev_stat = bdev_io->internal.ch->prev_stat;
7414 :
7415 : data[0] = io_stat->num_read_ops - prev_stat->num_read_ops;
7416 : data[1] = io_stat->bytes_read - prev_stat->bytes_read;
7417 : data[2] = io_stat->num_write_ops - prev_stat->num_write_ops;
7418 : data[3] = io_stat->bytes_written - prev_stat->bytes_written;
7419 : data[4] = bdev_io->bdev->fn_table->get_spin_time ?
7420 : bdev_io->bdev->fn_table->get_spin_time(spdk_bdev_io_get_io_channel(bdev_io)) : 0;
7421 :
7422 : __itt_metadata_add(g_bdev_mgr.domain, __itt_null, bdev_io->internal.ch->handle,
7423 : __itt_metadata_u64, 5, data);
7424 :
7425 : memcpy(prev_stat, io_stat, sizeof(struct spdk_bdev_io_stat));
7426 : bdev_io->internal.ch->start_tsc = now_tsc;
7427 : }
7428 : #endif
7429 612 : }
7430 :
7431 : static inline void
7432 612 : _bdev_io_complete(void *ctx)
7433 : {
7434 612 : struct spdk_bdev_io *bdev_io = ctx;
7435 :
7436 612 : if (spdk_unlikely(bdev_io_use_accel_sequence(bdev_io))) {
7437 0 : assert(bdev_io->internal.status != SPDK_BDEV_IO_STATUS_SUCCESS);
7438 0 : spdk_accel_sequence_abort(bdev_io->internal.accel_sequence);
7439 0 : }
7440 :
7441 612 : assert(bdev_io->internal.cb != NULL);
7442 612 : assert(spdk_get_thread() == spdk_bdev_io_get_thread(bdev_io));
7443 :
7444 1224 : bdev_io->internal.cb(bdev_io, bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS,
7445 612 : bdev_io->internal.caller_ctx);
7446 612 : }
7447 :
7448 : static inline void
7449 620 : bdev_io_complete(void *ctx)
7450 : {
7451 620 : struct spdk_bdev_io *bdev_io = ctx;
7452 620 : struct spdk_bdev_channel *bdev_ch = bdev_io->internal.ch;
7453 : uint64_t tsc, tsc_diff;
7454 :
7455 620 : if (spdk_unlikely(bdev_io->internal.f.in_submit_request)) {
7456 : /*
7457 : * Defer completion to avoid potential infinite recursion if the
7458 : * user's completion callback issues a new I/O.
7459 : */
7460 16 : spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io),
7461 8 : bdev_io_complete, bdev_io);
7462 8 : return;
7463 : }
7464 :
7465 612 : tsc = spdk_get_ticks();
7466 612 : tsc_diff = tsc - bdev_io->internal.submit_tsc;
7467 :
7468 612 : bdev_ch_remove_from_io_submitted(bdev_io);
7469 612 : spdk_trace_record_tsc(tsc, TRACE_BDEV_IO_DONE, bdev_ch->trace_id, 0, (uintptr_t)bdev_io,
7470 : bdev_io->internal.caller_ctx, bdev_ch->queue_depth);
7471 :
7472 612 : if (bdev_ch->histogram) {
7473 4 : if (bdev_io->bdev->internal.histogram_io_type == 0 ||
7474 0 : bdev_io->bdev->internal.histogram_io_type == bdev_io->type) {
7475 : /*
7476 : * Tally all I/O types if the histogram_io_type is set to 0.
7477 : */
7478 4 : spdk_histogram_data_tally(bdev_ch->histogram, tsc_diff);
7479 4 : }
7480 4 : }
7481 :
7482 612 : bdev_io_update_io_stat(bdev_io, tsc_diff);
7483 612 : _bdev_io_complete(bdev_io);
7484 620 : }
7485 :
7486 : /* The difference between this function and bdev_io_complete() is that this should be called to
7487 : * complete IOs that haven't been submitted via bdev_io_submit(), as they weren't added onto the
7488 : * io_submitted list and don't have submit_tsc updated.
7489 : */
7490 : static inline void
7491 0 : bdev_io_complete_unsubmitted(struct spdk_bdev_io *bdev_io)
7492 : {
7493 : /* Since the IO hasn't been submitted it's bound to be failed */
7494 0 : assert(bdev_io->internal.status != SPDK_BDEV_IO_STATUS_SUCCESS);
7495 :
7496 : /* At this point we don't know if the IO is completed from submission context or not, but,
7497 : * since this is an error path, we can always do an spdk_thread_send_msg(). */
7498 0 : spdk_thread_send_msg(spdk_bdev_io_get_thread(bdev_io),
7499 0 : _bdev_io_complete, bdev_io);
7500 0 : }
7501 :
7502 : static void bdev_destroy_cb(void *io_device);
7503 :
7504 : static inline void
7505 18 : _bdev_reset_complete(void *ctx)
7506 : {
7507 18 : struct spdk_bdev_io *bdev_io = ctx;
7508 :
7509 : /* Put the channel reference we got in submission. */
7510 18 : assert(bdev_io->u.reset.ch_ref != NULL);
7511 18 : spdk_put_io_channel(bdev_io->u.reset.ch_ref);
7512 18 : bdev_io->u.reset.ch_ref = NULL;
7513 :
7514 18 : bdev_io_complete(bdev_io);
7515 18 : }
7516 :
7517 : static void
7518 16 : bdev_reset_complete(struct spdk_bdev *bdev, void *_ctx, int status)
7519 : {
7520 16 : struct spdk_bdev_io *bdev_io = _ctx;
7521 : bdev_io_tailq_t queued_resets;
7522 : struct spdk_bdev_io *queued_reset;
7523 :
7524 16 : assert(bdev_io == bdev->internal.reset_in_progress);
7525 :
7526 16 : TAILQ_INIT(&queued_resets);
7527 :
7528 16 : spdk_spin_lock(&bdev->internal.spinlock);
7529 16 : TAILQ_SWAP(&bdev->internal.queued_resets, &queued_resets,
7530 : spdk_bdev_io, internal.link);
7531 16 : bdev->internal.reset_in_progress = NULL;
7532 16 : spdk_spin_unlock(&bdev->internal.spinlock);
7533 :
7534 18 : while (!TAILQ_EMPTY(&queued_resets)) {
7535 2 : queued_reset = TAILQ_FIRST(&queued_resets);
7536 2 : TAILQ_REMOVE(&queued_resets, queued_reset, internal.link);
7537 2 : queued_reset->internal.status = bdev_io->internal.status;
7538 4 : spdk_thread_send_msg(spdk_bdev_io_get_thread(queued_reset),
7539 2 : _bdev_reset_complete, queued_reset);
7540 : }
7541 :
7542 16 : _bdev_reset_complete(bdev_io);
7543 :
7544 16 : if (bdev->internal.status == SPDK_BDEV_STATUS_REMOVING &&
7545 1 : TAILQ_EMPTY(&bdev->internal.open_descs)) {
7546 1 : spdk_io_device_unregister(__bdev_to_io_dev(bdev), bdev_destroy_cb);
7547 1 : }
7548 16 : }
7549 :
7550 : static void
7551 20 : bdev_unfreeze_channel(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
7552 : struct spdk_io_channel *_ch, void *_ctx)
7553 : {
7554 20 : struct spdk_bdev_channel *ch = __io_ch_to_bdev_ch(_ch);
7555 :
7556 20 : ch->flags &= ~BDEV_CH_RESET_IN_PROGRESS;
7557 :
7558 20 : spdk_bdev_for_each_channel_continue(i, 0);
7559 20 : }
7560 :
7561 : static void
7562 0 : bdev_io_complete_sequence_cb(void *ctx, int status)
7563 : {
7564 0 : struct spdk_bdev_io *bdev_io = ctx;
7565 :
7566 : /* u.bdev.accel_sequence should have already been cleared at this point */
7567 0 : assert(bdev_io->u.bdev.accel_sequence == NULL);
7568 0 : assert(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS);
7569 0 : bdev_io->internal.f.has_accel_sequence = false;
7570 :
7571 0 : if (spdk_unlikely(status != 0)) {
7572 0 : SPDK_ERRLOG("Failed to execute accel sequence, status=%d\n", status);
7573 0 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
7574 0 : }
7575 :
7576 0 : bdev_io_complete(bdev_io);
7577 0 : }
7578 :
7579 : void
7580 598 : spdk_bdev_io_complete(struct spdk_bdev_io *bdev_io, enum spdk_bdev_io_status status)
7581 : {
7582 598 : struct spdk_bdev *bdev = bdev_io->bdev;
7583 598 : struct spdk_bdev_channel *bdev_ch = bdev_io->internal.ch;
7584 598 : struct spdk_bdev_shared_resource *shared_resource = bdev_ch->shared_resource;
7585 :
7586 598 : if (spdk_unlikely(bdev_io->internal.status != SPDK_BDEV_IO_STATUS_PENDING)) {
7587 0 : SPDK_ERRLOG("Unexpected completion on IO from %s module, status was %s\n",
7588 : spdk_bdev_get_module_name(bdev),
7589 : bdev_io_status_get_string(bdev_io->internal.status));
7590 0 : assert(false);
7591 : }
7592 598 : bdev_io->internal.status = status;
7593 :
7594 598 : if (spdk_unlikely(bdev_io->type == SPDK_BDEV_IO_TYPE_RESET)) {
7595 16 : assert(bdev_io == bdev->internal.reset_in_progress);
7596 16 : spdk_bdev_for_each_channel(bdev, bdev_unfreeze_channel, bdev_io,
7597 : bdev_reset_complete);
7598 16 : return;
7599 : } else {
7600 582 : bdev_io_decrement_outstanding(bdev_ch, shared_resource);
7601 582 : if (spdk_likely(status == SPDK_BDEV_IO_STATUS_SUCCESS)) {
7602 485 : if (bdev_io_needs_sequence_exec(bdev_io->internal.desc, bdev_io)) {
7603 0 : bdev_io_exec_sequence(bdev_io, bdev_io_complete_sequence_cb);
7604 0 : return;
7605 485 : } else if (spdk_unlikely(bdev_io->internal.f.has_bounce_buf &&
7606 : !bdev_io_use_accel_sequence(bdev_io))) {
7607 26 : _bdev_io_push_bounce_data_buffer(bdev_io,
7608 : _bdev_io_complete_push_bounce_done);
7609 : /* bdev IO will be completed in the callback */
7610 26 : return;
7611 : }
7612 459 : }
7613 :
7614 556 : if (spdk_unlikely(_bdev_io_handle_no_mem(bdev_io, BDEV_IO_RETRY_STATE_SUBMIT))) {
7615 5 : return;
7616 : }
7617 : }
7618 :
7619 551 : bdev_io_complete(bdev_io);
7620 598 : }
7621 :
7622 : void
7623 0 : spdk_bdev_io_complete_scsi_status(struct spdk_bdev_io *bdev_io, enum spdk_scsi_status sc,
7624 : enum spdk_scsi_sense sk, uint8_t asc, uint8_t ascq)
7625 : {
7626 : enum spdk_bdev_io_status status;
7627 :
7628 0 : if (sc == SPDK_SCSI_STATUS_GOOD) {
7629 0 : status = SPDK_BDEV_IO_STATUS_SUCCESS;
7630 0 : } else {
7631 0 : status = SPDK_BDEV_IO_STATUS_SCSI_ERROR;
7632 0 : bdev_io->internal.error.scsi.sc = sc;
7633 0 : bdev_io->internal.error.scsi.sk = sk;
7634 0 : bdev_io->internal.error.scsi.asc = asc;
7635 0 : bdev_io->internal.error.scsi.ascq = ascq;
7636 : }
7637 :
7638 0 : spdk_bdev_io_complete(bdev_io, status);
7639 0 : }
7640 :
7641 : void
7642 0 : spdk_bdev_io_get_scsi_status(const struct spdk_bdev_io *bdev_io,
7643 : int *sc, int *sk, int *asc, int *ascq)
7644 : {
7645 0 : assert(sc != NULL);
7646 0 : assert(sk != NULL);
7647 0 : assert(asc != NULL);
7648 0 : assert(ascq != NULL);
7649 :
7650 0 : switch (bdev_io->internal.status) {
7651 : case SPDK_BDEV_IO_STATUS_SUCCESS:
7652 0 : *sc = SPDK_SCSI_STATUS_GOOD;
7653 0 : *sk = SPDK_SCSI_SENSE_NO_SENSE;
7654 0 : *asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE;
7655 0 : *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
7656 0 : break;
7657 : case SPDK_BDEV_IO_STATUS_NVME_ERROR:
7658 0 : spdk_scsi_nvme_translate(bdev_io, sc, sk, asc, ascq);
7659 0 : break;
7660 : case SPDK_BDEV_IO_STATUS_MISCOMPARE:
7661 0 : *sc = SPDK_SCSI_STATUS_CHECK_CONDITION;
7662 0 : *sk = SPDK_SCSI_SENSE_MISCOMPARE;
7663 0 : *asc = SPDK_SCSI_ASC_MISCOMPARE_DURING_VERIFY_OPERATION;
7664 0 : *ascq = bdev_io->internal.error.scsi.ascq;
7665 0 : break;
7666 : case SPDK_BDEV_IO_STATUS_SCSI_ERROR:
7667 0 : *sc = bdev_io->internal.error.scsi.sc;
7668 0 : *sk = bdev_io->internal.error.scsi.sk;
7669 0 : *asc = bdev_io->internal.error.scsi.asc;
7670 0 : *ascq = bdev_io->internal.error.scsi.ascq;
7671 0 : break;
7672 : default:
7673 0 : *sc = SPDK_SCSI_STATUS_CHECK_CONDITION;
7674 0 : *sk = SPDK_SCSI_SENSE_ABORTED_COMMAND;
7675 0 : *asc = SPDK_SCSI_ASC_NO_ADDITIONAL_SENSE;
7676 0 : *ascq = SPDK_SCSI_ASCQ_CAUSE_NOT_REPORTABLE;
7677 0 : break;
7678 : }
7679 0 : }
7680 :
7681 : void
7682 0 : spdk_bdev_io_complete_aio_status(struct spdk_bdev_io *bdev_io, int aio_result)
7683 : {
7684 : enum spdk_bdev_io_status status;
7685 :
7686 0 : if (aio_result == 0) {
7687 0 : status = SPDK_BDEV_IO_STATUS_SUCCESS;
7688 0 : } else {
7689 0 : status = SPDK_BDEV_IO_STATUS_AIO_ERROR;
7690 : }
7691 :
7692 0 : bdev_io->internal.error.aio_result = aio_result;
7693 :
7694 0 : spdk_bdev_io_complete(bdev_io, status);
7695 0 : }
7696 :
7697 : void
7698 0 : spdk_bdev_io_get_aio_status(const struct spdk_bdev_io *bdev_io, int *aio_result)
7699 : {
7700 0 : assert(aio_result != NULL);
7701 :
7702 0 : if (bdev_io->internal.status == SPDK_BDEV_IO_STATUS_AIO_ERROR) {
7703 0 : *aio_result = bdev_io->internal.error.aio_result;
7704 0 : } else if (bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS) {
7705 0 : *aio_result = 0;
7706 0 : } else {
7707 0 : *aio_result = -EIO;
7708 : }
7709 0 : }
7710 :
7711 : void
7712 0 : spdk_bdev_io_complete_nvme_status(struct spdk_bdev_io *bdev_io, uint32_t cdw0, int sct, int sc)
7713 : {
7714 : enum spdk_bdev_io_status status;
7715 :
7716 0 : if (spdk_likely(sct == SPDK_NVME_SCT_GENERIC && sc == SPDK_NVME_SC_SUCCESS)) {
7717 0 : status = SPDK_BDEV_IO_STATUS_SUCCESS;
7718 0 : } else if (sct == SPDK_NVME_SCT_GENERIC && sc == SPDK_NVME_SC_ABORTED_BY_REQUEST) {
7719 0 : status = SPDK_BDEV_IO_STATUS_ABORTED;
7720 0 : } else {
7721 0 : status = SPDK_BDEV_IO_STATUS_NVME_ERROR;
7722 : }
7723 :
7724 0 : bdev_io->internal.error.nvme.cdw0 = cdw0;
7725 0 : bdev_io->internal.error.nvme.sct = sct;
7726 0 : bdev_io->internal.error.nvme.sc = sc;
7727 :
7728 0 : spdk_bdev_io_complete(bdev_io, status);
7729 0 : }
7730 :
7731 : void
7732 0 : spdk_bdev_io_get_nvme_status(const struct spdk_bdev_io *bdev_io, uint32_t *cdw0, int *sct, int *sc)
7733 : {
7734 0 : assert(sct != NULL);
7735 0 : assert(sc != NULL);
7736 0 : assert(cdw0 != NULL);
7737 :
7738 0 : if (spdk_unlikely(bdev_io->type == SPDK_BDEV_IO_TYPE_ABORT)) {
7739 0 : *sct = SPDK_NVME_SCT_GENERIC;
7740 0 : *sc = SPDK_NVME_SC_SUCCESS;
7741 0 : if (bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS) {
7742 0 : *cdw0 = 0;
7743 0 : } else {
7744 0 : *cdw0 = 1U;
7745 : }
7746 0 : return;
7747 : }
7748 :
7749 0 : if (spdk_likely(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS)) {
7750 0 : *sct = SPDK_NVME_SCT_GENERIC;
7751 0 : *sc = SPDK_NVME_SC_SUCCESS;
7752 0 : } else if (bdev_io->internal.status == SPDK_BDEV_IO_STATUS_NVME_ERROR) {
7753 0 : *sct = bdev_io->internal.error.nvme.sct;
7754 0 : *sc = bdev_io->internal.error.nvme.sc;
7755 0 : } else if (bdev_io->internal.status == SPDK_BDEV_IO_STATUS_ABORTED) {
7756 0 : *sct = SPDK_NVME_SCT_GENERIC;
7757 0 : *sc = SPDK_NVME_SC_ABORTED_BY_REQUEST;
7758 0 : } else {
7759 0 : *sct = SPDK_NVME_SCT_GENERIC;
7760 0 : *sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
7761 : }
7762 :
7763 0 : *cdw0 = bdev_io->internal.error.nvme.cdw0;
7764 0 : }
7765 :
7766 : void
7767 0 : spdk_bdev_io_get_nvme_fused_status(const struct spdk_bdev_io *bdev_io, uint32_t *cdw0,
7768 : int *first_sct, int *first_sc, int *second_sct, int *second_sc)
7769 : {
7770 0 : assert(first_sct != NULL);
7771 0 : assert(first_sc != NULL);
7772 0 : assert(second_sct != NULL);
7773 0 : assert(second_sc != NULL);
7774 0 : assert(cdw0 != NULL);
7775 :
7776 0 : if (bdev_io->internal.status == SPDK_BDEV_IO_STATUS_NVME_ERROR) {
7777 0 : if (bdev_io->internal.error.nvme.sct == SPDK_NVME_SCT_MEDIA_ERROR &&
7778 0 : bdev_io->internal.error.nvme.sc == SPDK_NVME_SC_COMPARE_FAILURE) {
7779 0 : *first_sct = bdev_io->internal.error.nvme.sct;
7780 0 : *first_sc = bdev_io->internal.error.nvme.sc;
7781 0 : *second_sct = SPDK_NVME_SCT_GENERIC;
7782 0 : *second_sc = SPDK_NVME_SC_ABORTED_FAILED_FUSED;
7783 0 : } else {
7784 0 : *first_sct = SPDK_NVME_SCT_GENERIC;
7785 0 : *first_sc = SPDK_NVME_SC_SUCCESS;
7786 0 : *second_sct = bdev_io->internal.error.nvme.sct;
7787 0 : *second_sc = bdev_io->internal.error.nvme.sc;
7788 : }
7789 0 : } else if (bdev_io->internal.status == SPDK_BDEV_IO_STATUS_ABORTED) {
7790 0 : *first_sct = SPDK_NVME_SCT_GENERIC;
7791 0 : *first_sc = SPDK_NVME_SC_ABORTED_BY_REQUEST;
7792 0 : *second_sct = SPDK_NVME_SCT_GENERIC;
7793 0 : *second_sc = SPDK_NVME_SC_ABORTED_BY_REQUEST;
7794 0 : } else if (bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS) {
7795 0 : *first_sct = SPDK_NVME_SCT_GENERIC;
7796 0 : *first_sc = SPDK_NVME_SC_SUCCESS;
7797 0 : *second_sct = SPDK_NVME_SCT_GENERIC;
7798 0 : *second_sc = SPDK_NVME_SC_SUCCESS;
7799 0 : } else if (bdev_io->internal.status == SPDK_BDEV_IO_STATUS_FIRST_FUSED_FAILED) {
7800 0 : *first_sct = SPDK_NVME_SCT_GENERIC;
7801 0 : *first_sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
7802 0 : *second_sct = SPDK_NVME_SCT_GENERIC;
7803 0 : *second_sc = SPDK_NVME_SC_ABORTED_FAILED_FUSED;
7804 0 : } else if (bdev_io->internal.status == SPDK_BDEV_IO_STATUS_MISCOMPARE) {
7805 0 : *first_sct = SPDK_NVME_SCT_MEDIA_ERROR;
7806 0 : *first_sc = SPDK_NVME_SC_COMPARE_FAILURE;
7807 0 : *second_sct = SPDK_NVME_SCT_GENERIC;
7808 0 : *second_sc = SPDK_NVME_SC_ABORTED_FAILED_FUSED;
7809 0 : } else {
7810 0 : *first_sct = SPDK_NVME_SCT_GENERIC;
7811 0 : *first_sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
7812 0 : *second_sct = SPDK_NVME_SCT_GENERIC;
7813 0 : *second_sc = SPDK_NVME_SC_INTERNAL_DEVICE_ERROR;
7814 : }
7815 :
7816 0 : *cdw0 = bdev_io->internal.error.nvme.cdw0;
7817 0 : }
7818 :
7819 : void
7820 0 : spdk_bdev_io_complete_base_io_status(struct spdk_bdev_io *bdev_io,
7821 : const struct spdk_bdev_io *base_io)
7822 : {
7823 0 : switch (base_io->internal.status) {
7824 : case SPDK_BDEV_IO_STATUS_NVME_ERROR:
7825 0 : spdk_bdev_io_complete_nvme_status(bdev_io,
7826 0 : base_io->internal.error.nvme.cdw0,
7827 0 : base_io->internal.error.nvme.sct,
7828 0 : base_io->internal.error.nvme.sc);
7829 0 : break;
7830 : case SPDK_BDEV_IO_STATUS_SCSI_ERROR:
7831 0 : spdk_bdev_io_complete_scsi_status(bdev_io,
7832 0 : base_io->internal.error.scsi.sc,
7833 0 : base_io->internal.error.scsi.sk,
7834 0 : base_io->internal.error.scsi.asc,
7835 0 : base_io->internal.error.scsi.ascq);
7836 0 : break;
7837 : case SPDK_BDEV_IO_STATUS_AIO_ERROR:
7838 0 : spdk_bdev_io_complete_aio_status(bdev_io, base_io->internal.error.aio_result);
7839 0 : break;
7840 : default:
7841 0 : spdk_bdev_io_complete(bdev_io, base_io->internal.status);
7842 0 : break;
7843 : }
7844 0 : }
7845 :
7846 : struct spdk_thread *
7847 664 : spdk_bdev_io_get_thread(struct spdk_bdev_io *bdev_io)
7848 : {
7849 664 : return spdk_io_channel_get_thread(bdev_io->internal.ch->channel);
7850 : }
7851 :
7852 : struct spdk_io_channel *
7853 70 : spdk_bdev_io_get_io_channel(struct spdk_bdev_io *bdev_io)
7854 : {
7855 70 : return bdev_io->internal.ch->channel;
7856 : }
7857 :
7858 : static int
7859 131 : bdev_register(struct spdk_bdev *bdev)
7860 : {
7861 : char *bdev_name;
7862 : char uuid[SPDK_UUID_STRING_LEN];
7863 : struct spdk_iobuf_opts iobuf_opts;
7864 : int ret;
7865 :
7866 131 : assert(bdev->module != NULL);
7867 :
7868 131 : if (!bdev->name) {
7869 0 : SPDK_ERRLOG("Bdev name is NULL\n");
7870 0 : return -EINVAL;
7871 : }
7872 :
7873 131 : if (!strlen(bdev->name)) {
7874 0 : SPDK_ERRLOG("Bdev name must not be an empty string\n");
7875 0 : return -EINVAL;
7876 : }
7877 :
7878 : /* Users often register their own I/O devices using the bdev name. In
7879 : * order to avoid conflicts, prepend bdev_. */
7880 131 : bdev_name = spdk_sprintf_alloc("bdev_%s", bdev->name);
7881 131 : if (!bdev_name) {
7882 0 : SPDK_ERRLOG("Unable to allocate memory for internal bdev name.\n");
7883 0 : return -ENOMEM;
7884 : }
7885 :
7886 131 : bdev->internal.stat = bdev_alloc_io_stat(true);
7887 131 : if (!bdev->internal.stat) {
7888 0 : SPDK_ERRLOG("Unable to allocate I/O statistics structure.\n");
7889 0 : free(bdev_name);
7890 0 : return -ENOMEM;
7891 : }
7892 :
7893 131 : bdev->internal.status = SPDK_BDEV_STATUS_READY;
7894 131 : bdev->internal.measured_queue_depth = UINT64_MAX;
7895 131 : bdev->internal.claim_type = SPDK_BDEV_CLAIM_NONE;
7896 131 : memset(&bdev->internal.claim, 0, sizeof(bdev->internal.claim));
7897 131 : bdev->internal.qd_poller = NULL;
7898 131 : bdev->internal.qos = NULL;
7899 :
7900 131 : TAILQ_INIT(&bdev->internal.open_descs);
7901 131 : TAILQ_INIT(&bdev->internal.locked_ranges);
7902 131 : TAILQ_INIT(&bdev->internal.pending_locked_ranges);
7903 131 : TAILQ_INIT(&bdev->internal.queued_resets);
7904 131 : TAILQ_INIT(&bdev->aliases);
7905 :
7906 : /* UUID may be specified by the user or defined by bdev itself.
7907 : * Otherwise it will be generated here, so this field will never be empty. */
7908 131 : if (spdk_uuid_is_null(&bdev->uuid)) {
7909 43 : spdk_uuid_generate(&bdev->uuid);
7910 43 : }
7911 :
7912 : /* Add the UUID alias only if it's different than the name */
7913 131 : spdk_uuid_fmt_lower(uuid, sizeof(uuid), &bdev->uuid);
7914 131 : if (strcmp(bdev->name, uuid) != 0) {
7915 130 : ret = spdk_bdev_alias_add(bdev, uuid);
7916 130 : if (ret != 0) {
7917 2 : SPDK_ERRLOG("Unable to add uuid:%s alias for bdev %s\n", uuid, bdev->name);
7918 2 : bdev_free_io_stat(bdev->internal.stat);
7919 2 : free(bdev_name);
7920 2 : return ret;
7921 : }
7922 128 : }
7923 :
7924 129 : spdk_iobuf_get_opts(&iobuf_opts, sizeof(iobuf_opts));
7925 129 : if (spdk_bdev_get_buf_align(bdev) > 1) {
7926 0 : bdev->max_rw_size = spdk_min(bdev->max_rw_size ? bdev->max_rw_size : UINT32_MAX,
7927 : iobuf_opts.large_bufsize / bdev->blocklen);
7928 0 : }
7929 :
7930 : /* If the user didn't specify a write unit size, set it to one. */
7931 129 : if (bdev->write_unit_size == 0) {
7932 125 : bdev->write_unit_size = 1;
7933 125 : }
7934 :
7935 : /* Set ACWU value to the write unit size if bdev module did not set it (does not support it natively) */
7936 129 : if (bdev->acwu == 0) {
7937 125 : bdev->acwu = bdev->write_unit_size;
7938 125 : }
7939 :
7940 129 : if (bdev->phys_blocklen == 0) {
7941 125 : bdev->phys_blocklen = spdk_bdev_get_data_block_size(bdev);
7942 125 : }
7943 :
7944 129 : if (!bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_COPY)) {
7945 0 : bdev->max_copy = bdev_get_max_write(bdev, iobuf_opts.large_bufsize);
7946 0 : }
7947 :
7948 129 : if (!bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_WRITE_ZEROES)) {
7949 0 : bdev->max_write_zeroes = bdev_get_max_write(bdev, ZERO_BUFFER_SIZE);
7950 0 : }
7951 :
7952 129 : bdev->internal.reset_in_progress = NULL;
7953 129 : bdev->internal.qd_poll_in_progress = false;
7954 129 : bdev->internal.period = 0;
7955 129 : bdev->internal.new_period = 0;
7956 129 : bdev->internal.trace_id = spdk_trace_register_owner(OWNER_TYPE_BDEV, bdev_name);
7957 :
7958 : /*
7959 : * Initialize spinlock before registering IO device because spinlock is used in
7960 : * bdev_channel_create
7961 : */
7962 129 : spdk_spin_init(&bdev->internal.spinlock);
7963 :
7964 258 : spdk_io_device_register(__bdev_to_io_dev(bdev),
7965 : bdev_channel_create, bdev_channel_destroy,
7966 : sizeof(struct spdk_bdev_channel),
7967 129 : bdev_name);
7968 :
7969 : /*
7970 : * Register bdev name only after the bdev object is ready.
7971 : * After bdev_name_add returns, it is possible for other threads to start using the bdev,
7972 : * create IO channels...
7973 : */
7974 129 : ret = bdev_name_add(&bdev->internal.bdev_name, bdev, bdev->name);
7975 129 : if (ret != 0) {
7976 0 : spdk_io_device_unregister(__bdev_to_io_dev(bdev), NULL);
7977 0 : bdev_free_io_stat(bdev->internal.stat);
7978 0 : spdk_spin_destroy(&bdev->internal.spinlock);
7979 0 : free(bdev_name);
7980 0 : return ret;
7981 : }
7982 :
7983 129 : free(bdev_name);
7984 :
7985 129 : SPDK_DEBUGLOG(bdev, "Inserting bdev %s into list\n", bdev->name);
7986 129 : TAILQ_INSERT_TAIL(&g_bdev_mgr.bdevs, bdev, internal.link);
7987 :
7988 129 : return 0;
7989 131 : }
7990 :
7991 : static void
7992 130 : bdev_destroy_cb(void *io_device)
7993 : {
7994 : int rc;
7995 : struct spdk_bdev *bdev;
7996 : spdk_bdev_unregister_cb cb_fn;
7997 : void *cb_arg;
7998 :
7999 130 : bdev = __bdev_from_io_dev(io_device);
8000 :
8001 130 : if (bdev->internal.unregister_td != spdk_get_thread()) {
8002 1 : spdk_thread_send_msg(bdev->internal.unregister_td, bdev_destroy_cb, io_device);
8003 1 : return;
8004 : }
8005 :
8006 129 : cb_fn = bdev->internal.unregister_cb;
8007 129 : cb_arg = bdev->internal.unregister_ctx;
8008 :
8009 129 : spdk_spin_destroy(&bdev->internal.spinlock);
8010 129 : free(bdev->internal.qos);
8011 129 : bdev_free_io_stat(bdev->internal.stat);
8012 129 : spdk_trace_unregister_owner(bdev->internal.trace_id);
8013 :
8014 129 : rc = bdev->fn_table->destruct(bdev->ctxt);
8015 129 : if (rc < 0) {
8016 0 : SPDK_ERRLOG("destruct failed\n");
8017 0 : }
8018 129 : if (rc <= 0 && cb_fn != NULL) {
8019 10 : cb_fn(cb_arg, rc);
8020 10 : }
8021 130 : }
8022 :
8023 : void
8024 2 : spdk_bdev_destruct_done(struct spdk_bdev *bdev, int bdeverrno)
8025 : {
8026 2 : if (bdev->internal.unregister_cb != NULL) {
8027 0 : bdev->internal.unregister_cb(bdev->internal.unregister_ctx, bdeverrno);
8028 0 : }
8029 2 : }
8030 :
8031 : static void
8032 19 : _remove_notify(void *arg)
8033 : {
8034 19 : struct spdk_bdev_desc *desc = arg;
8035 :
8036 19 : _event_notify(desc, SPDK_BDEV_EVENT_REMOVE);
8037 19 : }
8038 :
8039 : /* returns: 0 - bdev removed and ready to be destructed.
8040 : * -EBUSY - bdev can't be destructed yet. */
8041 : static int
8042 144 : bdev_unregister_unsafe(struct spdk_bdev *bdev)
8043 : {
8044 : struct spdk_bdev_desc *desc, *tmp;
8045 : struct spdk_bdev_alias *alias;
8046 144 : int rc = 0;
8047 : char uuid[SPDK_UUID_STRING_LEN];
8048 :
8049 144 : assert(spdk_spin_held(&g_bdev_mgr.spinlock));
8050 144 : assert(spdk_spin_held(&bdev->internal.spinlock));
8051 :
8052 : /* Notify each descriptor about hotremoval */
8053 163 : TAILQ_FOREACH_SAFE(desc, &bdev->internal.open_descs, link, tmp) {
8054 19 : rc = -EBUSY;
8055 : /*
8056 : * Defer invocation of the event_cb to a separate message that will
8057 : * run later on its thread. This ensures this context unwinds and
8058 : * we don't recursively unregister this bdev again if the event_cb
8059 : * immediately closes its descriptor.
8060 : */
8061 19 : event_notify(desc, _remove_notify);
8062 19 : }
8063 :
8064 : /* If there are no descriptors, proceed removing the bdev */
8065 144 : if (rc == 0) {
8066 129 : bdev_examine_allowlist_remove(bdev->name);
8067 256 : TAILQ_FOREACH(alias, &bdev->aliases, tailq) {
8068 127 : bdev_examine_allowlist_remove(alias->alias.name);
8069 127 : }
8070 129 : TAILQ_REMOVE(&g_bdev_mgr.bdevs, bdev, internal.link);
8071 129 : SPDK_DEBUGLOG(bdev, "Removing bdev %s from list done\n", bdev->name);
8072 :
8073 : /* Delete the name and the UUID alias */
8074 129 : spdk_uuid_fmt_lower(uuid, sizeof(uuid), &bdev->uuid);
8075 129 : bdev_name_del_unsafe(&bdev->internal.bdev_name);
8076 129 : bdev_alias_del(bdev, uuid, bdev_name_del_unsafe);
8077 :
8078 129 : spdk_notify_send("bdev_unregister", spdk_bdev_get_name(bdev));
8079 :
8080 129 : if (bdev->internal.reset_in_progress != NULL) {
8081 : /* If reset is in progress, let the completion callback for reset
8082 : * unregister the bdev.
8083 : */
8084 1 : rc = -EBUSY;
8085 1 : }
8086 129 : }
8087 :
8088 144 : return rc;
8089 : }
8090 :
8091 : static void
8092 4 : bdev_unregister_abort_channel(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
8093 : struct spdk_io_channel *io_ch, void *_ctx)
8094 : {
8095 4 : struct spdk_bdev_channel *bdev_ch = __io_ch_to_bdev_ch(io_ch);
8096 :
8097 4 : bdev_channel_abort_queued_ios(bdev_ch);
8098 4 : spdk_bdev_for_each_channel_continue(i, 0);
8099 4 : }
8100 :
8101 : static void
8102 129 : bdev_unregister(struct spdk_bdev *bdev, void *_ctx, int status)
8103 : {
8104 : int rc;
8105 :
8106 129 : spdk_spin_lock(&g_bdev_mgr.spinlock);
8107 129 : spdk_spin_lock(&bdev->internal.spinlock);
8108 : /*
8109 : * Set the status to REMOVING after completing to abort channels. Otherwise,
8110 : * the last spdk_bdev_close() may call spdk_io_device_unregister() while
8111 : * spdk_bdev_for_each_channel() is executed and spdk_io_device_unregister()
8112 : * may fail.
8113 : */
8114 129 : bdev->internal.status = SPDK_BDEV_STATUS_REMOVING;
8115 129 : rc = bdev_unregister_unsafe(bdev);
8116 129 : spdk_spin_unlock(&bdev->internal.spinlock);
8117 129 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
8118 :
8119 129 : if (rc == 0) {
8120 113 : spdk_io_device_unregister(__bdev_to_io_dev(bdev), bdev_destroy_cb);
8121 113 : }
8122 129 : }
8123 :
8124 : void
8125 136 : spdk_bdev_unregister(struct spdk_bdev *bdev, spdk_bdev_unregister_cb cb_fn, void *cb_arg)
8126 : {
8127 : struct spdk_thread *thread;
8128 :
8129 136 : SPDK_DEBUGLOG(bdev, "Removing bdev %s from list\n", bdev->name);
8130 :
8131 136 : thread = spdk_get_thread();
8132 136 : if (!thread) {
8133 : /* The user called this from a non-SPDK thread. */
8134 0 : if (cb_fn != NULL) {
8135 0 : cb_fn(cb_arg, -ENOTSUP);
8136 0 : }
8137 0 : return;
8138 : }
8139 :
8140 136 : spdk_spin_lock(&g_bdev_mgr.spinlock);
8141 136 : if (bdev->internal.status == SPDK_BDEV_STATUS_UNREGISTERING ||
8142 136 : bdev->internal.status == SPDK_BDEV_STATUS_REMOVING) {
8143 7 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
8144 7 : if (cb_fn) {
8145 0 : cb_fn(cb_arg, -EBUSY);
8146 0 : }
8147 7 : return;
8148 : }
8149 :
8150 129 : spdk_spin_lock(&bdev->internal.spinlock);
8151 129 : bdev->internal.status = SPDK_BDEV_STATUS_UNREGISTERING;
8152 129 : bdev->internal.unregister_cb = cb_fn;
8153 129 : bdev->internal.unregister_ctx = cb_arg;
8154 129 : bdev->internal.unregister_td = thread;
8155 129 : spdk_spin_unlock(&bdev->internal.spinlock);
8156 129 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
8157 :
8158 129 : spdk_bdev_set_qd_sampling_period(bdev, 0);
8159 :
8160 129 : spdk_bdev_for_each_channel(bdev, bdev_unregister_abort_channel, bdev,
8161 : bdev_unregister);
8162 136 : }
8163 :
8164 : int
8165 4 : spdk_bdev_unregister_by_name(const char *bdev_name, struct spdk_bdev_module *module,
8166 : spdk_bdev_unregister_cb cb_fn, void *cb_arg)
8167 : {
8168 : struct spdk_bdev_desc *desc;
8169 : struct spdk_bdev *bdev;
8170 : int rc;
8171 :
8172 4 : rc = spdk_bdev_open_ext(bdev_name, false, _tmp_bdev_event_cb, NULL, &desc);
8173 4 : if (rc != 0) {
8174 1 : SPDK_ERRLOG("Failed to open bdev with name: %s\n", bdev_name);
8175 1 : return rc;
8176 : }
8177 :
8178 3 : bdev = spdk_bdev_desc_get_bdev(desc);
8179 :
8180 3 : if (bdev->module != module) {
8181 1 : spdk_bdev_close(desc);
8182 1 : SPDK_ERRLOG("Bdev %s was not registered by the specified module.\n",
8183 : bdev_name);
8184 1 : return -ENODEV;
8185 : }
8186 :
8187 2 : spdk_bdev_unregister(bdev, cb_fn, cb_arg);
8188 :
8189 2 : spdk_bdev_close(desc);
8190 :
8191 2 : return 0;
8192 4 : }
8193 :
8194 : static int
8195 268 : bdev_start_qos(struct spdk_bdev *bdev)
8196 : {
8197 : struct set_qos_limit_ctx *ctx;
8198 :
8199 : /* Enable QoS */
8200 268 : if (bdev->internal.qos && bdev->internal.qos->thread == NULL) {
8201 2 : ctx = calloc(1, sizeof(*ctx));
8202 2 : if (ctx == NULL) {
8203 0 : SPDK_ERRLOG("Failed to allocate memory for QoS context\n");
8204 0 : return -ENOMEM;
8205 : }
8206 2 : ctx->bdev = bdev;
8207 2 : spdk_bdev_for_each_channel(bdev, bdev_enable_qos_msg, ctx, bdev_enable_qos_done);
8208 2 : }
8209 :
8210 268 : return 0;
8211 268 : }
8212 :
8213 : static void
8214 25 : log_already_claimed(enum spdk_log_level level, const int line, const char *func, const char *detail,
8215 : struct spdk_bdev *bdev)
8216 : {
8217 : enum spdk_bdev_claim_type type;
8218 : const char *typename, *modname;
8219 : extern struct spdk_log_flag SPDK_LOG_bdev;
8220 :
8221 25 : assert(spdk_spin_held(&bdev->internal.spinlock));
8222 :
8223 25 : if (level >= SPDK_LOG_INFO && !SPDK_LOG_bdev.enabled) {
8224 0 : return;
8225 : }
8226 :
8227 25 : type = bdev->internal.claim_type;
8228 25 : typename = spdk_bdev_claim_get_name(type);
8229 :
8230 25 : if (type == SPDK_BDEV_CLAIM_EXCL_WRITE) {
8231 6 : modname = bdev->internal.claim.v1.module->name;
8232 12 : spdk_log(level, __FILE__, line, func, "bdev %s %s: type %s by module %s\n",
8233 6 : bdev->name, detail, typename, modname);
8234 6 : return;
8235 : }
8236 :
8237 19 : if (claim_type_is_v2(type)) {
8238 : struct spdk_bdev_module_claim *claim;
8239 :
8240 38 : TAILQ_FOREACH(claim, &bdev->internal.claim.v2.claims, link) {
8241 19 : modname = claim->module->name;
8242 38 : spdk_log(level, __FILE__, line, func, "bdev %s %s: type %s by module %s\n",
8243 19 : bdev->name, detail, typename, modname);
8244 19 : }
8245 19 : return;
8246 : }
8247 :
8248 0 : assert(false);
8249 25 : }
8250 :
8251 : static int
8252 277 : bdev_open(struct spdk_bdev *bdev, bool write, struct spdk_bdev_desc *desc)
8253 : {
8254 : struct spdk_thread *thread;
8255 277 : int rc = 0;
8256 :
8257 277 : thread = spdk_get_thread();
8258 277 : if (!thread) {
8259 0 : SPDK_ERRLOG("Cannot open bdev from non-SPDK thread.\n");
8260 0 : return -ENOTSUP;
8261 : }
8262 :
8263 277 : SPDK_DEBUGLOG(bdev, "Opening descriptor %p for bdev %s on thread %p\n", desc, bdev->name,
8264 : spdk_get_thread());
8265 :
8266 277 : desc->bdev = bdev;
8267 277 : desc->thread = thread;
8268 277 : desc->write = write;
8269 :
8270 277 : spdk_spin_lock(&bdev->internal.spinlock);
8271 277 : if (bdev->internal.status == SPDK_BDEV_STATUS_UNREGISTERING ||
8272 277 : bdev->internal.status == SPDK_BDEV_STATUS_REMOVING) {
8273 3 : spdk_spin_unlock(&bdev->internal.spinlock);
8274 3 : return -ENODEV;
8275 : }
8276 :
8277 274 : if (write && bdev->internal.claim_type != SPDK_BDEV_CLAIM_NONE) {
8278 6 : LOG_ALREADY_CLAIMED_ERROR("already claimed", bdev);
8279 6 : spdk_spin_unlock(&bdev->internal.spinlock);
8280 6 : return -EPERM;
8281 : }
8282 :
8283 268 : rc = bdev_start_qos(bdev);
8284 268 : if (rc != 0) {
8285 0 : SPDK_ERRLOG("Failed to start QoS on bdev %s\n", bdev->name);
8286 0 : spdk_spin_unlock(&bdev->internal.spinlock);
8287 0 : return rc;
8288 : }
8289 :
8290 268 : TAILQ_INSERT_TAIL(&bdev->internal.open_descs, desc, link);
8291 :
8292 268 : spdk_spin_unlock(&bdev->internal.spinlock);
8293 :
8294 268 : return 0;
8295 277 : }
8296 :
8297 : static void
8298 278 : bdev_open_opts_get_defaults(struct spdk_bdev_open_opts *opts, size_t opts_size)
8299 : {
8300 278 : if (!opts) {
8301 0 : SPDK_ERRLOG("opts should not be NULL.\n");
8302 0 : return;
8303 : }
8304 :
8305 278 : if (!opts_size) {
8306 0 : SPDK_ERRLOG("opts_size should not be zero.\n");
8307 0 : return;
8308 : }
8309 :
8310 278 : memset(opts, 0, opts_size);
8311 278 : opts->size = opts_size;
8312 :
8313 : #define FIELD_OK(field) \
8314 : offsetof(struct spdk_bdev_open_opts, field) + sizeof(opts->field) <= opts_size
8315 :
8316 : #define SET_FIELD(field, value) \
8317 : if (FIELD_OK(field)) { \
8318 : opts->field = value; \
8319 : } \
8320 :
8321 278 : SET_FIELD(hide_metadata, false);
8322 :
8323 : #undef FIELD_OK
8324 : #undef SET_FIELD
8325 278 : }
8326 :
8327 : static void
8328 2 : bdev_open_opts_copy(struct spdk_bdev_open_opts *opts,
8329 : const struct spdk_bdev_open_opts *opts_src, size_t opts_size)
8330 : {
8331 2 : assert(opts);
8332 2 : assert(opts_src);
8333 :
8334 : #define SET_FIELD(field) \
8335 : if (offsetof(struct spdk_bdev_open_opts, field) + sizeof(opts->field) <= opts_size) { \
8336 : opts->field = opts_src->field; \
8337 : } \
8338 :
8339 2 : SET_FIELD(hide_metadata);
8340 :
8341 2 : opts->size = opts_src->size;
8342 :
8343 : /* We should not remove this statement, but need to update the assert statement
8344 : * if we add a new field, and also add a corresponding SET_FIELD statement.
8345 : */
8346 : SPDK_STATIC_ASSERT(sizeof(struct spdk_bdev_open_opts) == 16, "Incorrect size");
8347 :
8348 : #undef SET_FIELD
8349 2 : }
8350 :
8351 : void
8352 1 : spdk_bdev_open_opts_init(struct spdk_bdev_open_opts *opts, size_t opts_size)
8353 : {
8354 : struct spdk_bdev_open_opts opts_local;
8355 :
8356 1 : bdev_open_opts_get_defaults(&opts_local, sizeof(opts_local));
8357 1 : bdev_open_opts_copy(opts, &opts_local, opts_size);
8358 1 : }
8359 :
8360 : static int
8361 277 : bdev_desc_alloc(struct spdk_bdev *bdev, spdk_bdev_event_cb_t event_cb, void *event_ctx,
8362 : struct spdk_bdev_open_opts *user_opts, struct spdk_bdev_desc **_desc)
8363 : {
8364 : struct spdk_bdev_desc *desc;
8365 : struct spdk_bdev_open_opts opts;
8366 : unsigned int i;
8367 :
8368 277 : bdev_open_opts_get_defaults(&opts, sizeof(opts));
8369 277 : if (user_opts != NULL) {
8370 1 : bdev_open_opts_copy(&opts, user_opts, user_opts->size);
8371 1 : }
8372 :
8373 277 : desc = calloc(1, sizeof(*desc));
8374 277 : if (desc == NULL) {
8375 0 : SPDK_ERRLOG("Failed to allocate memory for bdev descriptor\n");
8376 0 : return -ENOMEM;
8377 : }
8378 :
8379 277 : desc->opts = opts;
8380 :
8381 277 : TAILQ_INIT(&desc->pending_media_events);
8382 277 : TAILQ_INIT(&desc->free_media_events);
8383 :
8384 277 : desc->memory_domains_supported = spdk_bdev_get_memory_domains(bdev, NULL, 0) > 0;
8385 277 : desc->callback.event_fn = event_cb;
8386 277 : desc->callback.ctx = event_ctx;
8387 277 : spdk_spin_init(&desc->spinlock);
8388 :
8389 277 : if (desc->opts.hide_metadata) {
8390 1 : if (spdk_bdev_is_md_separate(bdev)) {
8391 0 : SPDK_ERRLOG("hide_metadata option is not supported with separate metadata.\n");
8392 0 : bdev_desc_free(desc);
8393 0 : return -EINVAL;
8394 : }
8395 1 : }
8396 :
8397 277 : if (bdev->media_events) {
8398 0 : desc->media_events_buffer = calloc(MEDIA_EVENT_POOL_SIZE,
8399 : sizeof(*desc->media_events_buffer));
8400 0 : if (desc->media_events_buffer == NULL) {
8401 0 : SPDK_ERRLOG("Failed to initialize media event pool\n");
8402 0 : bdev_desc_free(desc);
8403 0 : return -ENOMEM;
8404 : }
8405 :
8406 0 : for (i = 0; i < MEDIA_EVENT_POOL_SIZE; ++i) {
8407 0 : TAILQ_INSERT_TAIL(&desc->free_media_events,
8408 : &desc->media_events_buffer[i], tailq);
8409 0 : }
8410 0 : }
8411 :
8412 277 : if (bdev->fn_table->accel_sequence_supported != NULL) {
8413 0 : for (i = 0; i < SPDK_BDEV_NUM_IO_TYPES; ++i) {
8414 0 : desc->accel_sequence_supported[i] =
8415 0 : bdev->fn_table->accel_sequence_supported(bdev->ctxt,
8416 0 : (enum spdk_bdev_io_type)i);
8417 0 : }
8418 0 : }
8419 :
8420 277 : *_desc = desc;
8421 :
8422 277 : return 0;
8423 277 : }
8424 :
8425 : static int
8426 136 : bdev_open_ext(const char *bdev_name, bool write, spdk_bdev_event_cb_t event_cb,
8427 : void *event_ctx, struct spdk_bdev_open_opts *opts,
8428 : struct spdk_bdev_desc **_desc)
8429 : {
8430 : struct spdk_bdev_desc *desc;
8431 : struct spdk_bdev *bdev;
8432 : int rc;
8433 :
8434 136 : bdev = bdev_get_by_name(bdev_name);
8435 :
8436 136 : if (bdev == NULL) {
8437 1 : SPDK_NOTICELOG("Currently unable to find bdev with name: %s\n", bdev_name);
8438 1 : return -ENODEV;
8439 : }
8440 :
8441 135 : rc = bdev_desc_alloc(bdev, event_cb, event_ctx, opts, &desc);
8442 135 : if (rc != 0) {
8443 0 : return rc;
8444 : }
8445 :
8446 135 : rc = bdev_open(bdev, write, desc);
8447 135 : if (rc != 0) {
8448 7 : bdev_desc_free(desc);
8449 7 : desc = NULL;
8450 7 : }
8451 :
8452 135 : *_desc = desc;
8453 :
8454 135 : return rc;
8455 136 : }
8456 :
8457 : int
8458 138 : spdk_bdev_open_ext_v2(const char *bdev_name, bool write, spdk_bdev_event_cb_t event_cb,
8459 : void *event_ctx, struct spdk_bdev_open_opts *opts,
8460 : struct spdk_bdev_desc **_desc)
8461 : {
8462 : int rc;
8463 :
8464 138 : if (event_cb == NULL) {
8465 2 : SPDK_ERRLOG("Missing event callback function\n");
8466 2 : return -EINVAL;
8467 : }
8468 :
8469 136 : spdk_spin_lock(&g_bdev_mgr.spinlock);
8470 136 : rc = bdev_open_ext(bdev_name, write, event_cb, event_ctx, opts, _desc);
8471 136 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
8472 :
8473 136 : return rc;
8474 138 : }
8475 :
8476 : int
8477 136 : spdk_bdev_open_ext(const char *bdev_name, bool write, spdk_bdev_event_cb_t event_cb,
8478 : void *event_ctx, struct spdk_bdev_desc **_desc)
8479 : {
8480 136 : return spdk_bdev_open_ext_v2(bdev_name, write, event_cb, event_ctx, NULL, _desc);
8481 : }
8482 :
8483 : struct spdk_bdev_open_async_ctx {
8484 : char *bdev_name;
8485 : spdk_bdev_event_cb_t event_cb;
8486 : void *event_ctx;
8487 : bool write;
8488 : int rc;
8489 : spdk_bdev_open_async_cb_t cb_fn;
8490 : void *cb_arg;
8491 : struct spdk_bdev_desc *desc;
8492 : struct spdk_bdev_open_async_opts opts;
8493 : uint64_t start_ticks;
8494 : struct spdk_thread *orig_thread;
8495 : struct spdk_poller *poller;
8496 : TAILQ_ENTRY(spdk_bdev_open_async_ctx) tailq;
8497 : };
8498 :
8499 : static void
8500 0 : bdev_open_async_done(void *arg)
8501 : {
8502 0 : struct spdk_bdev_open_async_ctx *ctx = arg;
8503 :
8504 0 : ctx->cb_fn(ctx->desc, ctx->rc, ctx->cb_arg);
8505 :
8506 0 : free(ctx->bdev_name);
8507 0 : free(ctx);
8508 0 : }
8509 :
8510 : static void
8511 0 : bdev_open_async_cancel(void *arg)
8512 : {
8513 0 : struct spdk_bdev_open_async_ctx *ctx = arg;
8514 :
8515 0 : assert(ctx->rc == -ESHUTDOWN);
8516 :
8517 0 : spdk_poller_unregister(&ctx->poller);
8518 :
8519 0 : bdev_open_async_done(ctx);
8520 0 : }
8521 :
8522 : /* This is called when the bdev library finishes at shutdown. */
8523 : static void
8524 68 : bdev_open_async_fini(void)
8525 : {
8526 : struct spdk_bdev_open_async_ctx *ctx, *tmp_ctx;
8527 :
8528 68 : spdk_spin_lock(&g_bdev_mgr.spinlock);
8529 68 : TAILQ_FOREACH_SAFE(ctx, &g_bdev_mgr.async_bdev_opens, tailq, tmp_ctx) {
8530 0 : TAILQ_REMOVE(&g_bdev_mgr.async_bdev_opens, ctx, tailq);
8531 : /*
8532 : * We have to move to ctx->orig_thread to unregister ctx->poller.
8533 : * However, there is a chance that ctx->poller is executed before
8534 : * message is executed, which could result in bdev_open_async_done()
8535 : * being called twice. To avoid such race condition, set ctx->rc to
8536 : * -ESHUTDOWN.
8537 : */
8538 0 : ctx->rc = -ESHUTDOWN;
8539 0 : spdk_thread_send_msg(ctx->orig_thread, bdev_open_async_cancel, ctx);
8540 0 : }
8541 68 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
8542 68 : }
8543 :
8544 : static int bdev_open_async(void *arg);
8545 :
8546 : static void
8547 0 : _bdev_open_async(struct spdk_bdev_open_async_ctx *ctx)
8548 : {
8549 : uint64_t timeout_ticks;
8550 :
8551 0 : if (ctx->rc == -ESHUTDOWN) {
8552 : /* This context is being canceled. Do nothing. */
8553 0 : return;
8554 : }
8555 :
8556 0 : ctx->rc = bdev_open_ext(ctx->bdev_name, ctx->write, ctx->event_cb, ctx->event_ctx,
8557 0 : NULL, &ctx->desc);
8558 0 : if (ctx->rc == 0 || ctx->opts.timeout_ms == 0) {
8559 0 : goto exit;
8560 : }
8561 :
8562 0 : timeout_ticks = ctx->start_ticks + ctx->opts.timeout_ms * spdk_get_ticks_hz() / 1000ull;
8563 0 : if (spdk_get_ticks() >= timeout_ticks) {
8564 0 : SPDK_ERRLOG("Timed out while waiting for bdev '%s' to appear\n", ctx->bdev_name);
8565 0 : ctx->rc = -ETIMEDOUT;
8566 0 : goto exit;
8567 : }
8568 :
8569 0 : return;
8570 :
8571 : exit:
8572 0 : spdk_poller_unregister(&ctx->poller);
8573 0 : TAILQ_REMOVE(&g_bdev_mgr.async_bdev_opens, ctx, tailq);
8574 :
8575 : /* Completion callback is processed after stack unwinding. */
8576 0 : spdk_thread_send_msg(ctx->orig_thread, bdev_open_async_done, ctx);
8577 0 : }
8578 :
8579 : static int
8580 0 : bdev_open_async(void *arg)
8581 : {
8582 0 : struct spdk_bdev_open_async_ctx *ctx = arg;
8583 :
8584 0 : spdk_spin_lock(&g_bdev_mgr.spinlock);
8585 :
8586 0 : _bdev_open_async(ctx);
8587 :
8588 0 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
8589 :
8590 0 : return SPDK_POLLER_BUSY;
8591 : }
8592 :
8593 : static void
8594 0 : bdev_open_async_opts_copy(struct spdk_bdev_open_async_opts *opts,
8595 : struct spdk_bdev_open_async_opts *opts_src,
8596 : size_t size)
8597 : {
8598 0 : assert(opts);
8599 0 : assert(opts_src);
8600 :
8601 0 : opts->size = size;
8602 :
8603 : #define SET_FIELD(field) \
8604 : if (offsetof(struct spdk_bdev_open_async_opts, field) + sizeof(opts->field) <= size) { \
8605 : opts->field = opts_src->field; \
8606 : } \
8607 :
8608 0 : SET_FIELD(timeout_ms);
8609 :
8610 : /* Do not remove this statement, you should always update this statement when you adding a new field,
8611 : * and do not forget to add the SET_FIELD statement for your added field. */
8612 : SPDK_STATIC_ASSERT(sizeof(struct spdk_bdev_open_async_opts) == 16, "Incorrect size");
8613 :
8614 : #undef SET_FIELD
8615 0 : }
8616 :
8617 : static void
8618 0 : bdev_open_async_opts_get_default(struct spdk_bdev_open_async_opts *opts, size_t size)
8619 : {
8620 0 : assert(opts);
8621 :
8622 0 : opts->size = size;
8623 :
8624 : #define SET_FIELD(field, value) \
8625 : if (offsetof(struct spdk_bdev_open_async_opts, field) + sizeof(opts->field) <= size) { \
8626 : opts->field = value; \
8627 : } \
8628 :
8629 0 : SET_FIELD(timeout_ms, 0);
8630 :
8631 : #undef SET_FIELD
8632 0 : }
8633 :
8634 : int
8635 0 : spdk_bdev_open_async(const char *bdev_name, bool write, spdk_bdev_event_cb_t event_cb,
8636 : void *event_ctx, struct spdk_bdev_open_async_opts *opts,
8637 : spdk_bdev_open_async_cb_t open_cb, void *open_cb_arg)
8638 : {
8639 : struct spdk_bdev_open_async_ctx *ctx;
8640 :
8641 0 : if (event_cb == NULL) {
8642 0 : SPDK_ERRLOG("Missing event callback function\n");
8643 0 : return -EINVAL;
8644 : }
8645 :
8646 0 : if (open_cb == NULL) {
8647 0 : SPDK_ERRLOG("Missing open callback function\n");
8648 0 : return -EINVAL;
8649 : }
8650 :
8651 0 : if (opts != NULL && opts->size == 0) {
8652 0 : SPDK_ERRLOG("size in the options structure should not be zero\n");
8653 0 : return -EINVAL;
8654 : }
8655 :
8656 0 : ctx = calloc(1, sizeof(*ctx));
8657 0 : if (ctx == NULL) {
8658 0 : SPDK_ERRLOG("Failed to allocate open context\n");
8659 0 : return -ENOMEM;
8660 : }
8661 :
8662 0 : ctx->bdev_name = strdup(bdev_name);
8663 0 : if (ctx->bdev_name == NULL) {
8664 0 : SPDK_ERRLOG("Failed to duplicate bdev_name\n");
8665 0 : free(ctx);
8666 0 : return -ENOMEM;
8667 : }
8668 :
8669 0 : ctx->poller = SPDK_POLLER_REGISTER(bdev_open_async, ctx, 100 * 1000);
8670 0 : if (ctx->poller == NULL) {
8671 0 : SPDK_ERRLOG("Failed to register bdev_open_async poller\n");
8672 0 : free(ctx->bdev_name);
8673 0 : free(ctx);
8674 0 : return -ENOMEM;
8675 : }
8676 :
8677 0 : ctx->cb_fn = open_cb;
8678 0 : ctx->cb_arg = open_cb_arg;
8679 0 : ctx->write = write;
8680 0 : ctx->event_cb = event_cb;
8681 0 : ctx->event_ctx = event_ctx;
8682 0 : ctx->orig_thread = spdk_get_thread();
8683 0 : ctx->start_ticks = spdk_get_ticks();
8684 :
8685 0 : bdev_open_async_opts_get_default(&ctx->opts, sizeof(ctx->opts));
8686 0 : if (opts != NULL) {
8687 0 : bdev_open_async_opts_copy(&ctx->opts, opts, opts->size);
8688 0 : }
8689 :
8690 0 : spdk_spin_lock(&g_bdev_mgr.spinlock);
8691 :
8692 0 : TAILQ_INSERT_TAIL(&g_bdev_mgr.async_bdev_opens, ctx, tailq);
8693 0 : _bdev_open_async(ctx);
8694 :
8695 0 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
8696 :
8697 0 : return 0;
8698 0 : }
8699 :
8700 : static void
8701 268 : bdev_close(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc)
8702 : {
8703 : int rc;
8704 :
8705 268 : spdk_spin_lock(&bdev->internal.spinlock);
8706 268 : spdk_spin_lock(&desc->spinlock);
8707 :
8708 268 : TAILQ_REMOVE(&bdev->internal.open_descs, desc, link);
8709 :
8710 268 : desc->closed = true;
8711 :
8712 268 : if (desc->claim != NULL) {
8713 20 : bdev_desc_release_claims(desc);
8714 20 : }
8715 :
8716 268 : if (0 == desc->refs) {
8717 257 : spdk_spin_unlock(&desc->spinlock);
8718 257 : bdev_desc_free(desc);
8719 257 : } else {
8720 11 : spdk_spin_unlock(&desc->spinlock);
8721 : }
8722 :
8723 : /* If no more descriptors, kill QoS channel */
8724 268 : if (bdev->internal.qos && TAILQ_EMPTY(&bdev->internal.open_descs)) {
8725 7 : SPDK_DEBUGLOG(bdev, "Closed last descriptor for bdev %s on thread %p. Stopping QoS.\n",
8726 : bdev->name, spdk_get_thread());
8727 :
8728 7 : if (bdev_qos_destroy(bdev)) {
8729 : /* There isn't anything we can do to recover here. Just let the
8730 : * old QoS poller keep running. The QoS handling won't change
8731 : * cores when the user allocates a new channel, but it won't break. */
8732 0 : SPDK_ERRLOG("Unable to shut down QoS poller. It will continue running on the current thread.\n");
8733 0 : }
8734 7 : }
8735 :
8736 268 : if (bdev->internal.status == SPDK_BDEV_STATUS_REMOVING && TAILQ_EMPTY(&bdev->internal.open_descs)) {
8737 15 : rc = bdev_unregister_unsafe(bdev);
8738 15 : spdk_spin_unlock(&bdev->internal.spinlock);
8739 :
8740 15 : if (rc == 0) {
8741 15 : spdk_io_device_unregister(__bdev_to_io_dev(bdev), bdev_destroy_cb);
8742 15 : }
8743 15 : } else {
8744 253 : spdk_spin_unlock(&bdev->internal.spinlock);
8745 : }
8746 268 : }
8747 :
8748 : void
8749 128 : spdk_bdev_close(struct spdk_bdev_desc *desc)
8750 : {
8751 128 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
8752 :
8753 128 : SPDK_DEBUGLOG(bdev, "Closing descriptor %p for bdev %s on thread %p\n", desc, bdev->name,
8754 : spdk_get_thread());
8755 :
8756 128 : assert(desc->thread == spdk_get_thread());
8757 :
8758 128 : spdk_poller_unregister(&desc->io_timeout_poller);
8759 :
8760 128 : spdk_spin_lock(&g_bdev_mgr.spinlock);
8761 :
8762 128 : bdev_close(bdev, desc);
8763 :
8764 128 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
8765 128 : }
8766 :
8767 : int32_t
8768 3 : spdk_bdev_get_numa_id(struct spdk_bdev *bdev)
8769 : {
8770 3 : if (bdev->numa.id_valid) {
8771 2 : return bdev->numa.id;
8772 : } else {
8773 1 : return SPDK_ENV_NUMA_ID_ANY;
8774 : }
8775 3 : }
8776 :
8777 : static void
8778 129 : bdev_register_finished(void *arg)
8779 : {
8780 129 : struct spdk_bdev_desc *desc = arg;
8781 129 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
8782 :
8783 129 : spdk_notify_send("bdev_register", spdk_bdev_get_name(bdev));
8784 :
8785 129 : spdk_spin_lock(&g_bdev_mgr.spinlock);
8786 :
8787 129 : bdev_close(bdev, desc);
8788 :
8789 129 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
8790 129 : }
8791 :
8792 : int
8793 132 : spdk_bdev_register(struct spdk_bdev *bdev)
8794 : {
8795 : struct spdk_bdev_desc *desc;
8796 132 : struct spdk_thread *thread = spdk_get_thread();
8797 : int rc;
8798 :
8799 132 : if (spdk_unlikely(!spdk_thread_is_app_thread(NULL))) {
8800 1 : SPDK_ERRLOG("Cannot register bdev %s on thread %p (%s)\n", bdev->name, thread,
8801 : thread ? spdk_thread_get_name(thread) : "null");
8802 1 : return -EINVAL;
8803 : }
8804 :
8805 131 : rc = bdev_register(bdev);
8806 131 : if (rc != 0) {
8807 2 : return rc;
8808 : }
8809 :
8810 : /* A descriptor is opened to prevent bdev deletion during examination */
8811 129 : rc = bdev_desc_alloc(bdev, _tmp_bdev_event_cb, NULL, NULL, &desc);
8812 129 : if (rc != 0) {
8813 0 : spdk_bdev_unregister(bdev, NULL, NULL);
8814 0 : return rc;
8815 : }
8816 :
8817 129 : rc = bdev_open(bdev, false, desc);
8818 129 : if (rc != 0) {
8819 0 : bdev_desc_free(desc);
8820 0 : spdk_bdev_unregister(bdev, NULL, NULL);
8821 0 : return rc;
8822 : }
8823 :
8824 : /* Examine configuration before initializing I/O */
8825 129 : bdev_examine(bdev);
8826 :
8827 129 : rc = spdk_bdev_wait_for_examine(bdev_register_finished, desc);
8828 129 : if (rc != 0) {
8829 0 : bdev_close(bdev, desc);
8830 0 : spdk_bdev_unregister(bdev, NULL, NULL);
8831 0 : }
8832 :
8833 129 : return rc;
8834 132 : }
8835 :
8836 : int
8837 26 : spdk_bdev_module_claim_bdev(struct spdk_bdev *bdev, struct spdk_bdev_desc *desc,
8838 : struct spdk_bdev_module *module)
8839 : {
8840 26 : spdk_spin_lock(&bdev->internal.spinlock);
8841 :
8842 26 : if (bdev->internal.claim_type != SPDK_BDEV_CLAIM_NONE) {
8843 6 : LOG_ALREADY_CLAIMED_ERROR("already claimed", bdev);
8844 6 : spdk_spin_unlock(&bdev->internal.spinlock);
8845 6 : return -EPERM;
8846 : }
8847 :
8848 20 : if (desc && !desc->write) {
8849 5 : desc->write = true;
8850 5 : }
8851 :
8852 20 : bdev->internal.claim_type = SPDK_BDEV_CLAIM_EXCL_WRITE;
8853 20 : bdev->internal.claim.v1.module = module;
8854 :
8855 20 : spdk_spin_unlock(&bdev->internal.spinlock);
8856 20 : return 0;
8857 26 : }
8858 :
8859 : void
8860 8 : spdk_bdev_module_release_bdev(struct spdk_bdev *bdev)
8861 : {
8862 8 : spdk_spin_lock(&bdev->internal.spinlock);
8863 :
8864 8 : assert(bdev->internal.claim.v1.module != NULL);
8865 8 : assert(bdev->internal.claim_type == SPDK_BDEV_CLAIM_EXCL_WRITE);
8866 8 : bdev->internal.claim_type = SPDK_BDEV_CLAIM_NONE;
8867 8 : bdev->internal.claim.v1.module = NULL;
8868 :
8869 8 : spdk_spin_unlock(&bdev->internal.spinlock);
8870 8 : }
8871 :
8872 : /*
8873 : * Start claims v2
8874 : */
8875 :
8876 : const char *
8877 25 : spdk_bdev_claim_get_name(enum spdk_bdev_claim_type type)
8878 : {
8879 25 : switch (type) {
8880 : case SPDK_BDEV_CLAIM_NONE:
8881 0 : return "not_claimed";
8882 : case SPDK_BDEV_CLAIM_EXCL_WRITE:
8883 6 : return "exclusive_write";
8884 : case SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE:
8885 8 : return "read_many_write_one";
8886 : case SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE:
8887 5 : return "read_many_write_none";
8888 : case SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED:
8889 6 : return "read_many_write_many";
8890 : default:
8891 0 : break;
8892 : }
8893 0 : return "invalid_claim";
8894 25 : }
8895 :
8896 : static bool
8897 115 : claim_type_is_v2(enum spdk_bdev_claim_type type)
8898 : {
8899 115 : switch (type) {
8900 : case SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE:
8901 : case SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE:
8902 : case SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED:
8903 115 : return true;
8904 : default:
8905 0 : break;
8906 : }
8907 0 : return false;
8908 115 : }
8909 :
8910 : /* Returns true if taking a claim with desc->write == false should make the descriptor writable. */
8911 : static bool
8912 17 : claim_type_promotes_to_write(enum spdk_bdev_claim_type type)
8913 : {
8914 17 : switch (type) {
8915 : case SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE:
8916 : case SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED:
8917 6 : return true;
8918 : default:
8919 11 : break;
8920 : }
8921 11 : return false;
8922 17 : }
8923 :
8924 : void
8925 57 : spdk_bdev_claim_opts_init(struct spdk_bdev_claim_opts *opts, size_t size)
8926 : {
8927 57 : if (opts == NULL) {
8928 0 : SPDK_ERRLOG("opts should not be NULL\n");
8929 0 : assert(opts != NULL);
8930 0 : return;
8931 : }
8932 57 : if (size == 0) {
8933 0 : SPDK_ERRLOG("size should not be zero\n");
8934 0 : assert(size != 0);
8935 0 : return;
8936 : }
8937 :
8938 57 : memset(opts, 0, size);
8939 57 : opts->opts_size = size;
8940 :
8941 : #define FIELD_OK(field) \
8942 : offsetof(struct spdk_bdev_claim_opts, field) + sizeof(opts->field) <= size
8943 :
8944 : #define SET_FIELD(field, value) \
8945 : if (FIELD_OK(field)) { \
8946 : opts->field = value; \
8947 : } \
8948 :
8949 57 : SET_FIELD(shared_claim_key, 0);
8950 :
8951 : #undef FIELD_OK
8952 : #undef SET_FIELD
8953 57 : }
8954 :
8955 : static int
8956 22 : claim_opts_copy(struct spdk_bdev_claim_opts *src, struct spdk_bdev_claim_opts *dst)
8957 : {
8958 22 : if (src->opts_size == 0) {
8959 0 : SPDK_ERRLOG("size should not be zero\n");
8960 0 : return -1;
8961 : }
8962 :
8963 22 : memset(dst, 0, sizeof(*dst));
8964 22 : dst->opts_size = src->opts_size;
8965 :
8966 : #define FIELD_OK(field) \
8967 : offsetof(struct spdk_bdev_claim_opts, field) + sizeof(src->field) <= src->opts_size
8968 :
8969 : #define SET_FIELD(field) \
8970 : if (FIELD_OK(field)) { \
8971 : dst->field = src->field; \
8972 : } \
8973 :
8974 22 : if (FIELD_OK(name)) {
8975 22 : snprintf(dst->name, sizeof(dst->name), "%s", src->name);
8976 22 : }
8977 :
8978 22 : SET_FIELD(shared_claim_key);
8979 :
8980 : /* You should not remove this statement, but need to update the assert statement
8981 : * if you add a new field, and also add a corresponding SET_FIELD statement */
8982 : SPDK_STATIC_ASSERT(sizeof(struct spdk_bdev_claim_opts) == 48, "Incorrect size");
8983 :
8984 : #undef FIELD_OK
8985 : #undef SET_FIELD
8986 22 : return 0;
8987 22 : }
8988 :
8989 : /* Returns 0 if a read-write-once claim can be taken. */
8990 : static int
8991 10 : claim_verify_rwo(struct spdk_bdev_desc *desc, enum spdk_bdev_claim_type type,
8992 : struct spdk_bdev_claim_opts *opts, struct spdk_bdev_module *module)
8993 : {
8994 10 : struct spdk_bdev *bdev = desc->bdev;
8995 : struct spdk_bdev_desc *open_desc;
8996 :
8997 10 : assert(spdk_spin_held(&bdev->internal.spinlock));
8998 10 : assert(type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE);
8999 :
9000 10 : if (opts->shared_claim_key != 0) {
9001 1 : SPDK_ERRLOG("%s: key option not supported with read-write-once claims\n",
9002 : bdev->name);
9003 1 : return -EINVAL;
9004 : }
9005 9 : if (bdev->internal.claim_type != SPDK_BDEV_CLAIM_NONE) {
9006 1 : LOG_ALREADY_CLAIMED_ERROR("already claimed", bdev);
9007 1 : return -EPERM;
9008 : }
9009 8 : if (desc->claim != NULL) {
9010 0 : SPDK_NOTICELOG("%s: descriptor already claimed bdev with module %s\n",
9011 : bdev->name, desc->claim->module->name);
9012 0 : return -EPERM;
9013 : }
9014 16 : TAILQ_FOREACH(open_desc, &bdev->internal.open_descs, link) {
9015 10 : if (desc != open_desc && open_desc->write) {
9016 2 : SPDK_NOTICELOG("%s: Cannot obtain read-write-once claim while "
9017 : "another descriptor is open for writing\n",
9018 : bdev->name);
9019 2 : return -EPERM;
9020 : }
9021 8 : }
9022 :
9023 6 : return 0;
9024 10 : }
9025 :
9026 : /* Returns 0 if a read-only-many claim can be taken. */
9027 : static int
9028 15 : claim_verify_rom(struct spdk_bdev_desc *desc, enum spdk_bdev_claim_type type,
9029 : struct spdk_bdev_claim_opts *opts, struct spdk_bdev_module *module)
9030 : {
9031 15 : struct spdk_bdev *bdev = desc->bdev;
9032 : struct spdk_bdev_desc *open_desc;
9033 :
9034 15 : assert(spdk_spin_held(&bdev->internal.spinlock));
9035 15 : assert(type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE);
9036 15 : assert(desc->claim == NULL);
9037 :
9038 15 : if (desc->write) {
9039 3 : SPDK_ERRLOG("%s: Cannot obtain read-only-many claim with writable descriptor\n",
9040 : bdev->name);
9041 3 : return -EINVAL;
9042 : }
9043 12 : if (opts->shared_claim_key != 0) {
9044 1 : SPDK_ERRLOG("%s: key option not supported with read-only-may claims\n", bdev->name);
9045 1 : return -EINVAL;
9046 : }
9047 11 : if (bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE) {
9048 19 : TAILQ_FOREACH(open_desc, &bdev->internal.open_descs, link) {
9049 11 : if (open_desc->write) {
9050 0 : SPDK_NOTICELOG("%s: Cannot obtain read-only-many claim while "
9051 : "another descriptor is open for writing\n",
9052 : bdev->name);
9053 0 : return -EPERM;
9054 : }
9055 11 : }
9056 8 : }
9057 :
9058 11 : return 0;
9059 15 : }
9060 :
9061 : /* Returns 0 if a read-write-many claim can be taken. */
9062 : static int
9063 8 : claim_verify_rwm(struct spdk_bdev_desc *desc, enum spdk_bdev_claim_type type,
9064 : struct spdk_bdev_claim_opts *opts, struct spdk_bdev_module *module)
9065 : {
9066 8 : struct spdk_bdev *bdev = desc->bdev;
9067 : struct spdk_bdev_desc *open_desc;
9068 :
9069 8 : assert(spdk_spin_held(&bdev->internal.spinlock));
9070 8 : assert(type == SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED);
9071 8 : assert(desc->claim == NULL);
9072 :
9073 8 : if (opts->shared_claim_key == 0) {
9074 2 : SPDK_ERRLOG("%s: shared_claim_key option required with read-write-may claims\n",
9075 : bdev->name);
9076 2 : return -EINVAL;
9077 : }
9078 6 : switch (bdev->internal.claim_type) {
9079 : case SPDK_BDEV_CLAIM_NONE:
9080 7 : TAILQ_FOREACH(open_desc, &bdev->internal.open_descs, link) {
9081 5 : if (open_desc == desc) {
9082 3 : continue;
9083 : }
9084 2 : if (open_desc->write) {
9085 2 : SPDK_NOTICELOG("%s: Cannot obtain read-write-many claim while "
9086 : "another descriptor is open for writing without a "
9087 : "claim\n", bdev->name);
9088 2 : return -EPERM;
9089 : }
9090 0 : }
9091 2 : break;
9092 : case SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED:
9093 2 : if (opts->shared_claim_key != bdev->internal.claim.v2.key) {
9094 1 : LOG_ALREADY_CLAIMED_ERROR("already claimed with another key", bdev);
9095 1 : return -EPERM;
9096 : }
9097 1 : break;
9098 : default:
9099 0 : LOG_ALREADY_CLAIMED_ERROR("already claimed", bdev);
9100 0 : return -EBUSY;
9101 : }
9102 :
9103 3 : return 0;
9104 8 : }
9105 :
9106 : /* Updates desc and its bdev with a v2 claim. */
9107 : static int
9108 20 : claim_bdev(struct spdk_bdev_desc *desc, enum spdk_bdev_claim_type type,
9109 : struct spdk_bdev_claim_opts *opts, struct spdk_bdev_module *module)
9110 : {
9111 20 : struct spdk_bdev *bdev = desc->bdev;
9112 : struct spdk_bdev_module_claim *claim;
9113 :
9114 20 : assert(spdk_spin_held(&bdev->internal.spinlock));
9115 20 : assert(claim_type_is_v2(type));
9116 20 : assert(desc->claim == NULL);
9117 :
9118 20 : claim = calloc(1, sizeof(*desc->claim));
9119 20 : if (claim == NULL) {
9120 0 : SPDK_ERRLOG("%s: out of memory while allocating claim\n", bdev->name);
9121 0 : return -ENOMEM;
9122 : }
9123 20 : claim->module = module;
9124 20 : claim->desc = desc;
9125 : SPDK_STATIC_ASSERT(sizeof(claim->name) == sizeof(opts->name), "sizes must match");
9126 20 : memcpy(claim->name, opts->name, sizeof(claim->name));
9127 20 : desc->claim = claim;
9128 :
9129 20 : if (bdev->internal.claim_type == SPDK_BDEV_CLAIM_NONE) {
9130 16 : bdev->internal.claim_type = type;
9131 16 : TAILQ_INIT(&bdev->internal.claim.v2.claims);
9132 16 : bdev->internal.claim.v2.key = opts->shared_claim_key;
9133 16 : }
9134 20 : assert(type == bdev->internal.claim_type);
9135 :
9136 20 : TAILQ_INSERT_TAIL(&bdev->internal.claim.v2.claims, claim, link);
9137 :
9138 20 : if (!desc->write && claim_type_promotes_to_write(type)) {
9139 6 : desc->write = true;
9140 6 : }
9141 :
9142 20 : return 0;
9143 20 : }
9144 :
9145 : int
9146 44 : spdk_bdev_module_claim_bdev_desc(struct spdk_bdev_desc *desc, enum spdk_bdev_claim_type type,
9147 : struct spdk_bdev_claim_opts *_opts,
9148 : struct spdk_bdev_module *module)
9149 : {
9150 : struct spdk_bdev *bdev;
9151 : struct spdk_bdev_claim_opts opts;
9152 44 : int rc = 0;
9153 :
9154 44 : if (desc == NULL) {
9155 0 : SPDK_ERRLOG("descriptor must not be NULL\n");
9156 0 : return -EINVAL;
9157 : }
9158 :
9159 44 : bdev = desc->bdev;
9160 :
9161 44 : if (_opts == NULL) {
9162 22 : spdk_bdev_claim_opts_init(&opts, sizeof(opts));
9163 44 : } else if (claim_opts_copy(_opts, &opts) != 0) {
9164 0 : return -EINVAL;
9165 : }
9166 :
9167 44 : spdk_spin_lock(&bdev->internal.spinlock);
9168 :
9169 44 : if (bdev->internal.claim_type != SPDK_BDEV_CLAIM_NONE &&
9170 17 : bdev->internal.claim_type != type) {
9171 11 : LOG_ALREADY_CLAIMED_ERROR("already claimed", bdev);
9172 11 : spdk_spin_unlock(&bdev->internal.spinlock);
9173 11 : return -EPERM;
9174 : }
9175 :
9176 33 : if (claim_type_is_v2(type) && desc->claim != NULL) {
9177 0 : SPDK_ERRLOG("%s: descriptor already has %s claim with name '%s'\n",
9178 : bdev->name, spdk_bdev_claim_get_name(type), desc->claim->name);
9179 0 : spdk_spin_unlock(&bdev->internal.spinlock);
9180 0 : return -EPERM;
9181 : }
9182 :
9183 33 : switch (type) {
9184 : case SPDK_BDEV_CLAIM_EXCL_WRITE:
9185 0 : spdk_spin_unlock(&bdev->internal.spinlock);
9186 0 : return spdk_bdev_module_claim_bdev(bdev, desc, module);
9187 : case SPDK_BDEV_CLAIM_READ_MANY_WRITE_ONE:
9188 10 : rc = claim_verify_rwo(desc, type, &opts, module);
9189 10 : break;
9190 : case SPDK_BDEV_CLAIM_READ_MANY_WRITE_NONE:
9191 15 : rc = claim_verify_rom(desc, type, &opts, module);
9192 15 : break;
9193 : case SPDK_BDEV_CLAIM_READ_MANY_WRITE_SHARED:
9194 8 : rc = claim_verify_rwm(desc, type, &opts, module);
9195 8 : break;
9196 : default:
9197 0 : SPDK_ERRLOG("%s: claim type %d not supported\n", bdev->name, type);
9198 0 : rc = -ENOTSUP;
9199 0 : }
9200 :
9201 33 : if (rc == 0) {
9202 20 : rc = claim_bdev(desc, type, &opts, module);
9203 20 : }
9204 :
9205 33 : spdk_spin_unlock(&bdev->internal.spinlock);
9206 33 : return rc;
9207 44 : }
9208 :
9209 : static void
9210 16 : claim_reset(struct spdk_bdev *bdev)
9211 : {
9212 16 : assert(spdk_spin_held(&bdev->internal.spinlock));
9213 16 : assert(claim_type_is_v2(bdev->internal.claim_type));
9214 16 : assert(TAILQ_EMPTY(&bdev->internal.claim.v2.claims));
9215 :
9216 16 : memset(&bdev->internal.claim, 0, sizeof(bdev->internal.claim));
9217 16 : bdev->internal.claim_type = SPDK_BDEV_CLAIM_NONE;
9218 16 : }
9219 :
9220 : static void
9221 20 : bdev_desc_release_claims(struct spdk_bdev_desc *desc)
9222 : {
9223 20 : struct spdk_bdev *bdev = desc->bdev;
9224 :
9225 20 : assert(spdk_spin_held(&bdev->internal.spinlock));
9226 20 : assert(claim_type_is_v2(bdev->internal.claim_type));
9227 :
9228 20 : if (bdev->internal.examine_in_progress == 0) {
9229 20 : TAILQ_REMOVE(&bdev->internal.claim.v2.claims, desc->claim, link);
9230 20 : free(desc->claim);
9231 20 : if (TAILQ_EMPTY(&bdev->internal.claim.v2.claims)) {
9232 16 : claim_reset(bdev);
9233 16 : }
9234 20 : } else {
9235 : /* This is a dead claim that will be cleaned up when bdev_examine() is done. */
9236 0 : desc->claim->module = NULL;
9237 0 : desc->claim->desc = NULL;
9238 : }
9239 20 : desc->claim = NULL;
9240 20 : }
9241 :
9242 : /*
9243 : * End claims v2
9244 : */
9245 :
9246 : struct spdk_bdev *
9247 1553 : spdk_bdev_desc_get_bdev(struct spdk_bdev_desc *desc)
9248 : {
9249 1553 : assert(desc != NULL);
9250 1553 : return desc->bdev;
9251 : }
9252 :
9253 : int
9254 1 : spdk_for_each_bdev(void *ctx, spdk_for_each_bdev_fn fn)
9255 : {
9256 : struct spdk_bdev *bdev, *tmp;
9257 : struct spdk_bdev_desc *desc;
9258 1 : int rc = 0;
9259 :
9260 1 : assert(fn != NULL);
9261 :
9262 1 : spdk_spin_lock(&g_bdev_mgr.spinlock);
9263 1 : bdev = spdk_bdev_first();
9264 9 : while (bdev != NULL) {
9265 8 : rc = bdev_desc_alloc(bdev, _tmp_bdev_event_cb, NULL, NULL, &desc);
9266 8 : if (rc != 0) {
9267 0 : break;
9268 : }
9269 8 : rc = bdev_open(bdev, false, desc);
9270 8 : if (rc != 0) {
9271 1 : bdev_desc_free(desc);
9272 1 : if (rc == -ENODEV) {
9273 : /* Ignore the error and move to the next bdev. */
9274 1 : rc = 0;
9275 1 : bdev = spdk_bdev_next(bdev);
9276 1 : continue;
9277 : }
9278 0 : break;
9279 : }
9280 7 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
9281 :
9282 7 : rc = fn(ctx, bdev);
9283 :
9284 7 : spdk_spin_lock(&g_bdev_mgr.spinlock);
9285 7 : tmp = spdk_bdev_next(bdev);
9286 7 : bdev_close(bdev, desc);
9287 7 : if (rc != 0) {
9288 0 : break;
9289 : }
9290 7 : bdev = tmp;
9291 : }
9292 1 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
9293 :
9294 1 : return rc;
9295 : }
9296 :
9297 : int
9298 1 : spdk_for_each_bdev_leaf(void *ctx, spdk_for_each_bdev_fn fn)
9299 : {
9300 : struct spdk_bdev *bdev, *tmp;
9301 : struct spdk_bdev_desc *desc;
9302 1 : int rc = 0;
9303 :
9304 1 : assert(fn != NULL);
9305 :
9306 1 : spdk_spin_lock(&g_bdev_mgr.spinlock);
9307 1 : bdev = spdk_bdev_first_leaf();
9308 6 : while (bdev != NULL) {
9309 5 : rc = bdev_desc_alloc(bdev, _tmp_bdev_event_cb, NULL, NULL, &desc);
9310 5 : if (rc != 0) {
9311 0 : break;
9312 : }
9313 5 : rc = bdev_open(bdev, false, desc);
9314 5 : if (rc != 0) {
9315 1 : bdev_desc_free(desc);
9316 1 : if (rc == -ENODEV) {
9317 : /* Ignore the error and move to the next bdev. */
9318 1 : rc = 0;
9319 1 : bdev = spdk_bdev_next_leaf(bdev);
9320 1 : continue;
9321 : }
9322 0 : break;
9323 : }
9324 4 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
9325 :
9326 4 : rc = fn(ctx, bdev);
9327 :
9328 4 : spdk_spin_lock(&g_bdev_mgr.spinlock);
9329 4 : tmp = spdk_bdev_next_leaf(bdev);
9330 4 : bdev_close(bdev, desc);
9331 4 : if (rc != 0) {
9332 0 : break;
9333 : }
9334 4 : bdev = tmp;
9335 : }
9336 1 : spdk_spin_unlock(&g_bdev_mgr.spinlock);
9337 :
9338 1 : return rc;
9339 : }
9340 :
9341 : void
9342 0 : spdk_bdev_io_get_iovec(struct spdk_bdev_io *bdev_io, struct iovec **iovp, int *iovcntp)
9343 : {
9344 : struct iovec *iovs;
9345 : int iovcnt;
9346 :
9347 0 : if (bdev_io == NULL) {
9348 0 : return;
9349 : }
9350 :
9351 0 : switch (bdev_io->type) {
9352 : case SPDK_BDEV_IO_TYPE_READ:
9353 : case SPDK_BDEV_IO_TYPE_WRITE:
9354 : case SPDK_BDEV_IO_TYPE_ZCOPY:
9355 0 : iovs = bdev_io->u.bdev.iovs;
9356 0 : iovcnt = bdev_io->u.bdev.iovcnt;
9357 0 : break;
9358 : default:
9359 0 : iovs = NULL;
9360 0 : iovcnt = 0;
9361 0 : break;
9362 : }
9363 :
9364 0 : if (iovp) {
9365 0 : *iovp = iovs;
9366 0 : }
9367 0 : if (iovcntp) {
9368 0 : *iovcntp = iovcnt;
9369 0 : }
9370 0 : }
9371 :
9372 : void *
9373 0 : spdk_bdev_io_get_md_buf(struct spdk_bdev_io *bdev_io)
9374 : {
9375 0 : if (bdev_io == NULL) {
9376 0 : return NULL;
9377 : }
9378 :
9379 0 : if (!spdk_bdev_is_md_separate(bdev_io->bdev)) {
9380 0 : return NULL;
9381 : }
9382 :
9383 0 : if (bdev_io->type == SPDK_BDEV_IO_TYPE_READ ||
9384 0 : bdev_io->type == SPDK_BDEV_IO_TYPE_WRITE) {
9385 0 : return bdev_io->u.bdev.md_buf;
9386 : }
9387 :
9388 0 : return NULL;
9389 0 : }
9390 :
9391 : void *
9392 0 : spdk_bdev_io_get_cb_arg(struct spdk_bdev_io *bdev_io)
9393 : {
9394 0 : if (bdev_io == NULL) {
9395 0 : assert(false);
9396 : return NULL;
9397 : }
9398 :
9399 0 : return bdev_io->internal.caller_ctx;
9400 : }
9401 :
9402 : void
9403 7 : spdk_bdev_module_list_add(struct spdk_bdev_module *bdev_module)
9404 : {
9405 :
9406 7 : if (spdk_bdev_module_list_find(bdev_module->name)) {
9407 0 : SPDK_ERRLOG("ERROR: module '%s' already registered.\n", bdev_module->name);
9408 0 : assert(false);
9409 : }
9410 :
9411 7 : spdk_spin_init(&bdev_module->internal.spinlock);
9412 7 : TAILQ_INIT(&bdev_module->internal.quiesced_ranges);
9413 :
9414 : /*
9415 : * Modules with examine callbacks must be initialized first, so they are
9416 : * ready to handle examine callbacks from later modules that will
9417 : * register physical bdevs.
9418 : */
9419 7 : if (bdev_module->examine_config != NULL || bdev_module->examine_disk != NULL) {
9420 4 : TAILQ_INSERT_HEAD(&g_bdev_mgr.bdev_modules, bdev_module, internal.tailq);
9421 4 : } else {
9422 3 : TAILQ_INSERT_TAIL(&g_bdev_mgr.bdev_modules, bdev_module, internal.tailq);
9423 : }
9424 7 : }
9425 :
9426 : struct spdk_bdev_module *
9427 7 : spdk_bdev_module_list_find(const char *name)
9428 : {
9429 : struct spdk_bdev_module *bdev_module;
9430 :
9431 14 : TAILQ_FOREACH(bdev_module, &g_bdev_mgr.bdev_modules, internal.tailq) {
9432 7 : if (strcmp(name, bdev_module->name) == 0) {
9433 0 : break;
9434 : }
9435 7 : }
9436 :
9437 7 : return bdev_module;
9438 : }
9439 :
9440 : static int
9441 6 : bdev_write_zero_buffer(struct spdk_bdev_io *bdev_io)
9442 : {
9443 : uint64_t num_blocks;
9444 6 : void *md_buf = NULL;
9445 :
9446 6 : num_blocks = bdev_io->u.bdev.num_blocks;
9447 :
9448 6 : if (spdk_bdev_is_md_separate(bdev_io->bdev)) {
9449 4 : md_buf = (char *)g_bdev_mgr.zero_buffer +
9450 2 : spdk_bdev_get_block_size(bdev_io->bdev) * num_blocks;
9451 2 : }
9452 :
9453 12 : return bdev_write_blocks_with_md(bdev_io->internal.desc,
9454 6 : spdk_io_channel_from_ctx(bdev_io->internal.ch),
9455 6 : g_bdev_mgr.zero_buffer, md_buf,
9456 6 : bdev_io->u.bdev.offset_blocks, num_blocks,
9457 6 : bdev_write_zero_buffer_done, bdev_io);
9458 : }
9459 :
9460 : static void
9461 6 : bdev_write_zero_buffer_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
9462 : {
9463 6 : struct spdk_bdev_io *parent_io = cb_arg;
9464 :
9465 6 : spdk_bdev_free_io(bdev_io);
9466 :
9467 6 : parent_io->internal.status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
9468 6 : parent_io->internal.cb(parent_io, success, parent_io->internal.caller_ctx);
9469 6 : }
9470 :
9471 : static void
9472 10 : bdev_set_qos_limit_done(struct set_qos_limit_ctx *ctx, int status)
9473 : {
9474 10 : spdk_spin_lock(&ctx->bdev->internal.spinlock);
9475 10 : ctx->bdev->internal.qos_mod_in_progress = false;
9476 10 : spdk_spin_unlock(&ctx->bdev->internal.spinlock);
9477 :
9478 10 : if (ctx->cb_fn) {
9479 8 : ctx->cb_fn(ctx->cb_arg, status);
9480 8 : }
9481 10 : free(ctx);
9482 10 : }
9483 :
9484 : static void
9485 2 : bdev_disable_qos_done(void *cb_arg)
9486 : {
9487 2 : struct set_qos_limit_ctx *ctx = cb_arg;
9488 2 : struct spdk_bdev *bdev = ctx->bdev;
9489 : struct spdk_bdev_qos *qos;
9490 :
9491 2 : spdk_spin_lock(&bdev->internal.spinlock);
9492 2 : qos = bdev->internal.qos;
9493 2 : bdev->internal.qos = NULL;
9494 2 : spdk_spin_unlock(&bdev->internal.spinlock);
9495 :
9496 2 : if (qos->thread != NULL) {
9497 2 : spdk_put_io_channel(spdk_io_channel_from_ctx(qos->ch));
9498 2 : spdk_poller_unregister(&qos->poller);
9499 2 : }
9500 :
9501 2 : free(qos);
9502 :
9503 2 : bdev_set_qos_limit_done(ctx, 0);
9504 2 : }
9505 :
9506 : static void
9507 2 : bdev_disable_qos_msg_done(struct spdk_bdev *bdev, void *_ctx, int status)
9508 : {
9509 2 : struct set_qos_limit_ctx *ctx = _ctx;
9510 : struct spdk_thread *thread;
9511 :
9512 2 : spdk_spin_lock(&bdev->internal.spinlock);
9513 2 : thread = bdev->internal.qos->thread;
9514 2 : spdk_spin_unlock(&bdev->internal.spinlock);
9515 :
9516 2 : if (thread != NULL) {
9517 2 : spdk_thread_send_msg(thread, bdev_disable_qos_done, ctx);
9518 2 : } else {
9519 0 : bdev_disable_qos_done(ctx);
9520 : }
9521 2 : }
9522 :
9523 : static void
9524 4 : bdev_disable_qos_msg(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
9525 : struct spdk_io_channel *ch, void *_ctx)
9526 : {
9527 4 : struct spdk_bdev_channel *bdev_ch = __io_ch_to_bdev_ch(ch);
9528 : struct spdk_bdev_io *bdev_io;
9529 :
9530 4 : bdev_ch->flags &= ~BDEV_CH_QOS_ENABLED;
9531 :
9532 6 : while (!TAILQ_EMPTY(&bdev_ch->qos_queued_io)) {
9533 : /* Re-submit the queued I/O. */
9534 2 : bdev_io = TAILQ_FIRST(&bdev_ch->qos_queued_io);
9535 2 : TAILQ_REMOVE(&bdev_ch->qos_queued_io, bdev_io, internal.link);
9536 2 : _bdev_io_submit(bdev_io);
9537 : }
9538 :
9539 4 : spdk_bdev_for_each_channel_continue(i, 0);
9540 4 : }
9541 :
9542 : static void
9543 1 : bdev_update_qos_rate_limit_msg(void *cb_arg)
9544 : {
9545 1 : struct set_qos_limit_ctx *ctx = cb_arg;
9546 1 : struct spdk_bdev *bdev = ctx->bdev;
9547 :
9548 1 : spdk_spin_lock(&bdev->internal.spinlock);
9549 1 : bdev_qos_update_max_quota_per_timeslice(bdev->internal.qos);
9550 1 : spdk_spin_unlock(&bdev->internal.spinlock);
9551 :
9552 1 : bdev_set_qos_limit_done(ctx, 0);
9553 1 : }
9554 :
9555 : static void
9556 9 : bdev_enable_qos_msg(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
9557 : struct spdk_io_channel *ch, void *_ctx)
9558 : {
9559 9 : struct spdk_bdev_channel *bdev_ch = __io_ch_to_bdev_ch(ch);
9560 :
9561 9 : spdk_spin_lock(&bdev->internal.spinlock);
9562 9 : bdev_enable_qos(bdev, bdev_ch);
9563 9 : spdk_spin_unlock(&bdev->internal.spinlock);
9564 9 : spdk_bdev_for_each_channel_continue(i, 0);
9565 9 : }
9566 :
9567 : static void
9568 6 : bdev_enable_qos_done(struct spdk_bdev *bdev, void *_ctx, int status)
9569 : {
9570 6 : struct set_qos_limit_ctx *ctx = _ctx;
9571 :
9572 6 : bdev_set_qos_limit_done(ctx, status);
9573 6 : }
9574 :
9575 : static void
9576 7 : bdev_set_qos_rate_limits(struct spdk_bdev *bdev, uint64_t *limits)
9577 : {
9578 : int i;
9579 :
9580 7 : assert(bdev->internal.qos != NULL);
9581 :
9582 35 : for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
9583 28 : if (limits[i] != SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) {
9584 28 : bdev->internal.qos->rate_limits[i].limit = limits[i];
9585 :
9586 28 : if (limits[i] == 0) {
9587 19 : bdev->internal.qos->rate_limits[i].limit =
9588 : SPDK_BDEV_QOS_LIMIT_NOT_DEFINED;
9589 19 : }
9590 28 : }
9591 28 : }
9592 7 : }
9593 :
9594 : void
9595 9 : spdk_bdev_set_qos_rate_limits(struct spdk_bdev *bdev, uint64_t *limits,
9596 : void (*cb_fn)(void *cb_arg, int status), void *cb_arg)
9597 : {
9598 : struct set_qos_limit_ctx *ctx;
9599 : uint32_t limit_set_complement;
9600 : uint64_t min_limit_per_sec;
9601 : int i;
9602 9 : bool disable_rate_limit = true;
9603 :
9604 45 : for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
9605 36 : if (limits[i] == SPDK_BDEV_QOS_LIMIT_NOT_DEFINED) {
9606 0 : continue;
9607 : }
9608 :
9609 36 : if (limits[i] > 0) {
9610 10 : disable_rate_limit = false;
9611 10 : }
9612 :
9613 36 : if (bdev_qos_is_iops_rate_limit(i) == true) {
9614 9 : min_limit_per_sec = SPDK_BDEV_QOS_MIN_IOS_PER_SEC;
9615 9 : } else {
9616 27 : if (limits[i] > SPDK_BDEV_QOS_MAX_MBYTES_PER_SEC) {
9617 0 : SPDK_WARNLOG("Requested rate limit %" PRIu64 " will result in uint64_t overflow, "
9618 : "reset to %" PRIu64 "\n", limits[i], SPDK_BDEV_QOS_MAX_MBYTES_PER_SEC);
9619 0 : limits[i] = SPDK_BDEV_QOS_MAX_MBYTES_PER_SEC;
9620 0 : }
9621 : /* Change from megabyte to byte rate limit */
9622 27 : limits[i] = limits[i] * 1024 * 1024;
9623 27 : min_limit_per_sec = SPDK_BDEV_QOS_MIN_BYTES_PER_SEC;
9624 : }
9625 :
9626 36 : limit_set_complement = limits[i] % min_limit_per_sec;
9627 36 : if (limit_set_complement) {
9628 0 : SPDK_ERRLOG("Requested rate limit %" PRIu64 " is not a multiple of %" PRIu64 "\n",
9629 : limits[i], min_limit_per_sec);
9630 0 : limits[i] += min_limit_per_sec - limit_set_complement;
9631 0 : SPDK_ERRLOG("Round up the rate limit to %" PRIu64 "\n", limits[i]);
9632 0 : }
9633 36 : }
9634 :
9635 9 : ctx = calloc(1, sizeof(*ctx));
9636 9 : if (ctx == NULL) {
9637 0 : cb_fn(cb_arg, -ENOMEM);
9638 0 : return;
9639 : }
9640 :
9641 9 : ctx->cb_fn = cb_fn;
9642 9 : ctx->cb_arg = cb_arg;
9643 9 : ctx->bdev = bdev;
9644 :
9645 9 : spdk_spin_lock(&bdev->internal.spinlock);
9646 9 : if (bdev->internal.qos_mod_in_progress) {
9647 1 : spdk_spin_unlock(&bdev->internal.spinlock);
9648 1 : free(ctx);
9649 1 : cb_fn(cb_arg, -EAGAIN);
9650 1 : return;
9651 : }
9652 8 : bdev->internal.qos_mod_in_progress = true;
9653 :
9654 8 : if (disable_rate_limit == true && bdev->internal.qos) {
9655 10 : for (i = 0; i < SPDK_BDEV_QOS_NUM_RATE_LIMIT_TYPES; i++) {
9656 8 : if (limits[i] == SPDK_BDEV_QOS_LIMIT_NOT_DEFINED &&
9657 0 : (bdev->internal.qos->rate_limits[i].limit > 0 &&
9658 0 : bdev->internal.qos->rate_limits[i].limit !=
9659 : SPDK_BDEV_QOS_LIMIT_NOT_DEFINED)) {
9660 0 : disable_rate_limit = false;
9661 0 : break;
9662 : }
9663 8 : }
9664 2 : }
9665 :
9666 8 : if (disable_rate_limit == false) {
9667 5 : if (bdev->internal.qos == NULL) {
9668 4 : bdev->internal.qos = calloc(1, sizeof(*bdev->internal.qos));
9669 4 : if (!bdev->internal.qos) {
9670 0 : spdk_spin_unlock(&bdev->internal.spinlock);
9671 0 : SPDK_ERRLOG("Unable to allocate memory for QoS tracking\n");
9672 0 : bdev_set_qos_limit_done(ctx, -ENOMEM);
9673 0 : return;
9674 : }
9675 4 : }
9676 :
9677 5 : if (bdev->internal.qos->thread == NULL) {
9678 : /* Enabling */
9679 4 : bdev_set_qos_rate_limits(bdev, limits);
9680 :
9681 4 : spdk_bdev_for_each_channel(bdev, bdev_enable_qos_msg, ctx,
9682 : bdev_enable_qos_done);
9683 4 : } else {
9684 : /* Updating */
9685 1 : bdev_set_qos_rate_limits(bdev, limits);
9686 :
9687 2 : spdk_thread_send_msg(bdev->internal.qos->thread,
9688 1 : bdev_update_qos_rate_limit_msg, ctx);
9689 : }
9690 5 : } else {
9691 3 : if (bdev->internal.qos != NULL) {
9692 2 : bdev_set_qos_rate_limits(bdev, limits);
9693 :
9694 : /* Disabling */
9695 2 : spdk_bdev_for_each_channel(bdev, bdev_disable_qos_msg, ctx,
9696 : bdev_disable_qos_msg_done);
9697 2 : } else {
9698 1 : spdk_spin_unlock(&bdev->internal.spinlock);
9699 1 : bdev_set_qos_limit_done(ctx, 0);
9700 1 : return;
9701 : }
9702 : }
9703 :
9704 7 : spdk_spin_unlock(&bdev->internal.spinlock);
9705 9 : }
9706 :
9707 : struct spdk_bdev_histogram_ctx {
9708 : spdk_bdev_histogram_status_cb cb_fn;
9709 : void *cb_arg;
9710 : struct spdk_bdev *bdev;
9711 : int status;
9712 : };
9713 :
9714 : static void
9715 2 : bdev_histogram_disable_channel_cb(struct spdk_bdev *bdev, void *_ctx, int status)
9716 : {
9717 2 : struct spdk_bdev_histogram_ctx *ctx = _ctx;
9718 :
9719 2 : spdk_spin_lock(&ctx->bdev->internal.spinlock);
9720 2 : ctx->bdev->internal.histogram_in_progress = false;
9721 2 : spdk_spin_unlock(&ctx->bdev->internal.spinlock);
9722 2 : ctx->cb_fn(ctx->cb_arg, ctx->status);
9723 2 : free(ctx);
9724 2 : }
9725 :
9726 : static void
9727 3 : bdev_histogram_disable_channel(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
9728 : struct spdk_io_channel *_ch, void *_ctx)
9729 : {
9730 3 : struct spdk_bdev_channel *ch = __io_ch_to_bdev_ch(_ch);
9731 :
9732 3 : if (ch->histogram != NULL) {
9733 3 : spdk_histogram_data_free(ch->histogram);
9734 3 : ch->histogram = NULL;
9735 3 : }
9736 3 : spdk_bdev_for_each_channel_continue(i, 0);
9737 3 : }
9738 :
9739 : static void
9740 2 : bdev_histogram_enable_channel_cb(struct spdk_bdev *bdev, void *_ctx, int status)
9741 : {
9742 2 : struct spdk_bdev_histogram_ctx *ctx = _ctx;
9743 :
9744 2 : if (status != 0) {
9745 0 : ctx->status = status;
9746 0 : ctx->bdev->internal.histogram_enabled = false;
9747 0 : spdk_bdev_for_each_channel(ctx->bdev, bdev_histogram_disable_channel, ctx,
9748 : bdev_histogram_disable_channel_cb);
9749 0 : } else {
9750 2 : spdk_spin_lock(&ctx->bdev->internal.spinlock);
9751 2 : ctx->bdev->internal.histogram_in_progress = false;
9752 2 : spdk_spin_unlock(&ctx->bdev->internal.spinlock);
9753 2 : ctx->cb_fn(ctx->cb_arg, ctx->status);
9754 2 : free(ctx);
9755 : }
9756 2 : }
9757 :
9758 : static void
9759 3 : bdev_histogram_enable_channel(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
9760 : struct spdk_io_channel *_ch, void *_ctx)
9761 : {
9762 3 : struct spdk_bdev_channel *ch = __io_ch_to_bdev_ch(_ch);
9763 3 : int status = 0;
9764 :
9765 3 : if (ch->histogram == NULL) {
9766 3 : ch->histogram = spdk_histogram_data_alloc();
9767 3 : if (ch->histogram == NULL) {
9768 0 : status = -ENOMEM;
9769 0 : }
9770 3 : }
9771 :
9772 3 : spdk_bdev_for_each_channel_continue(i, status);
9773 3 : }
9774 :
9775 : void
9776 4 : spdk_bdev_histogram_enable_ext(struct spdk_bdev *bdev, spdk_bdev_histogram_status_cb cb_fn,
9777 : void *cb_arg, bool enable, struct spdk_bdev_enable_histogram_opts *opts)
9778 : {
9779 : struct spdk_bdev_histogram_ctx *ctx;
9780 :
9781 4 : ctx = calloc(1, sizeof(struct spdk_bdev_histogram_ctx));
9782 4 : if (ctx == NULL) {
9783 0 : cb_fn(cb_arg, -ENOMEM);
9784 0 : return;
9785 : }
9786 :
9787 4 : ctx->bdev = bdev;
9788 4 : ctx->status = 0;
9789 4 : ctx->cb_fn = cb_fn;
9790 4 : ctx->cb_arg = cb_arg;
9791 :
9792 4 : spdk_spin_lock(&bdev->internal.spinlock);
9793 4 : if (bdev->internal.histogram_in_progress) {
9794 0 : spdk_spin_unlock(&bdev->internal.spinlock);
9795 0 : free(ctx);
9796 0 : cb_fn(cb_arg, -EAGAIN);
9797 0 : return;
9798 : }
9799 :
9800 4 : bdev->internal.histogram_in_progress = true;
9801 4 : spdk_spin_unlock(&bdev->internal.spinlock);
9802 :
9803 4 : bdev->internal.histogram_enabled = enable;
9804 4 : bdev->internal.histogram_io_type = opts->io_type;
9805 :
9806 4 : if (enable) {
9807 : /* Allocate histogram for each channel */
9808 2 : spdk_bdev_for_each_channel(bdev, bdev_histogram_enable_channel, ctx,
9809 : bdev_histogram_enable_channel_cb);
9810 2 : } else {
9811 2 : spdk_bdev_for_each_channel(bdev, bdev_histogram_disable_channel, ctx,
9812 : bdev_histogram_disable_channel_cb);
9813 : }
9814 4 : }
9815 :
9816 : void
9817 4 : spdk_bdev_enable_histogram_opts_init(struct spdk_bdev_enable_histogram_opts *opts, size_t size)
9818 : {
9819 4 : if (opts == NULL) {
9820 0 : SPDK_ERRLOG("opts should not be NULL\n");
9821 0 : assert(opts != NULL);
9822 0 : return;
9823 : }
9824 4 : if (size == 0) {
9825 0 : SPDK_ERRLOG("size should not be zero\n");
9826 0 : assert(size != 0);
9827 0 : return;
9828 : }
9829 :
9830 4 : memset(opts, 0, size);
9831 4 : opts->size = size;
9832 :
9833 : #define FIELD_OK(field) \
9834 : offsetof(struct spdk_bdev_enable_histogram_opts, field) + sizeof(opts->field) <= size
9835 :
9836 : #define SET_FIELD(field, value) \
9837 : if (FIELD_OK(field)) { \
9838 : opts->field = value; \
9839 : } \
9840 :
9841 4 : SET_FIELD(io_type, 0);
9842 :
9843 : /* You should not remove this statement, but need to update the assert statement
9844 : * if you add a new field, and also add a corresponding SET_FIELD statement */
9845 : SPDK_STATIC_ASSERT(sizeof(struct spdk_bdev_enable_histogram_opts) == 9, "Incorrect size");
9846 :
9847 : #undef FIELD_OK
9848 : #undef SET_FIELD
9849 4 : }
9850 :
9851 : void
9852 4 : spdk_bdev_histogram_enable(struct spdk_bdev *bdev, spdk_bdev_histogram_status_cb cb_fn,
9853 : void *cb_arg, bool enable)
9854 : {
9855 : struct spdk_bdev_enable_histogram_opts opts;
9856 :
9857 4 : spdk_bdev_enable_histogram_opts_init(&opts, sizeof(opts));
9858 4 : spdk_bdev_histogram_enable_ext(bdev, cb_fn, cb_arg, enable, &opts);
9859 4 : }
9860 :
9861 : struct spdk_bdev_histogram_data_ctx {
9862 : spdk_bdev_histogram_data_cb cb_fn;
9863 : void *cb_arg;
9864 : struct spdk_bdev *bdev;
9865 : /** merged histogram data from all channels */
9866 : struct spdk_histogram_data *histogram;
9867 : };
9868 :
9869 : static void
9870 5 : bdev_histogram_get_channel_cb(struct spdk_bdev *bdev, void *_ctx, int status)
9871 : {
9872 5 : struct spdk_bdev_histogram_data_ctx *ctx = _ctx;
9873 :
9874 5 : ctx->cb_fn(ctx->cb_arg, status, ctx->histogram);
9875 5 : free(ctx);
9876 5 : }
9877 :
9878 : static void
9879 7 : bdev_histogram_get_channel(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
9880 : struct spdk_io_channel *_ch, void *_ctx)
9881 : {
9882 7 : struct spdk_bdev_channel *ch = __io_ch_to_bdev_ch(_ch);
9883 7 : struct spdk_bdev_histogram_data_ctx *ctx = _ctx;
9884 7 : int status = 0;
9885 :
9886 7 : if (ch->histogram == NULL) {
9887 1 : status = -EFAULT;
9888 1 : } else {
9889 6 : spdk_histogram_data_merge(ctx->histogram, ch->histogram);
9890 : }
9891 :
9892 7 : spdk_bdev_for_each_channel_continue(i, status);
9893 7 : }
9894 :
9895 : void
9896 5 : spdk_bdev_histogram_get(struct spdk_bdev *bdev, struct spdk_histogram_data *histogram,
9897 : spdk_bdev_histogram_data_cb cb_fn,
9898 : void *cb_arg)
9899 : {
9900 : struct spdk_bdev_histogram_data_ctx *ctx;
9901 :
9902 5 : ctx = calloc(1, sizeof(struct spdk_bdev_histogram_data_ctx));
9903 5 : if (ctx == NULL) {
9904 0 : cb_fn(cb_arg, -ENOMEM, NULL);
9905 0 : return;
9906 : }
9907 :
9908 5 : ctx->bdev = bdev;
9909 5 : ctx->cb_fn = cb_fn;
9910 5 : ctx->cb_arg = cb_arg;
9911 :
9912 5 : ctx->histogram = histogram;
9913 :
9914 5 : spdk_bdev_for_each_channel(bdev, bdev_histogram_get_channel, ctx,
9915 : bdev_histogram_get_channel_cb);
9916 5 : }
9917 :
9918 : void
9919 2 : spdk_bdev_channel_get_histogram(struct spdk_io_channel *ch, spdk_bdev_histogram_data_cb cb_fn,
9920 : void *cb_arg)
9921 : {
9922 2 : struct spdk_bdev_channel *bdev_ch = __io_ch_to_bdev_ch(ch);
9923 2 : int status = 0;
9924 :
9925 2 : assert(cb_fn != NULL);
9926 :
9927 2 : if (bdev_ch->histogram == NULL) {
9928 1 : status = -EFAULT;
9929 1 : }
9930 2 : cb_fn(cb_arg, status, bdev_ch->histogram);
9931 2 : }
9932 :
9933 : size_t
9934 0 : spdk_bdev_get_media_events(struct spdk_bdev_desc *desc, struct spdk_bdev_media_event *events,
9935 : size_t max_events)
9936 : {
9937 : struct media_event_entry *entry;
9938 0 : size_t num_events = 0;
9939 :
9940 0 : for (; num_events < max_events; ++num_events) {
9941 0 : entry = TAILQ_FIRST(&desc->pending_media_events);
9942 0 : if (entry == NULL) {
9943 0 : break;
9944 : }
9945 :
9946 0 : events[num_events] = entry->event;
9947 0 : TAILQ_REMOVE(&desc->pending_media_events, entry, tailq);
9948 0 : TAILQ_INSERT_TAIL(&desc->free_media_events, entry, tailq);
9949 0 : }
9950 :
9951 0 : return num_events;
9952 : }
9953 :
9954 : int
9955 0 : spdk_bdev_push_media_events(struct spdk_bdev *bdev, const struct spdk_bdev_media_event *events,
9956 : size_t num_events)
9957 : {
9958 : struct spdk_bdev_desc *desc;
9959 : struct media_event_entry *entry;
9960 : size_t event_id;
9961 0 : int rc = 0;
9962 :
9963 0 : assert(bdev->media_events);
9964 :
9965 0 : spdk_spin_lock(&bdev->internal.spinlock);
9966 0 : TAILQ_FOREACH(desc, &bdev->internal.open_descs, link) {
9967 0 : if (desc->write) {
9968 0 : break;
9969 : }
9970 0 : }
9971 :
9972 0 : if (desc == NULL || desc->media_events_buffer == NULL) {
9973 0 : rc = -ENODEV;
9974 0 : goto out;
9975 : }
9976 :
9977 0 : for (event_id = 0; event_id < num_events; ++event_id) {
9978 0 : entry = TAILQ_FIRST(&desc->free_media_events);
9979 0 : if (entry == NULL) {
9980 0 : break;
9981 : }
9982 :
9983 0 : TAILQ_REMOVE(&desc->free_media_events, entry, tailq);
9984 0 : TAILQ_INSERT_TAIL(&desc->pending_media_events, entry, tailq);
9985 0 : entry->event = events[event_id];
9986 0 : }
9987 :
9988 0 : rc = event_id;
9989 : out:
9990 0 : spdk_spin_unlock(&bdev->internal.spinlock);
9991 0 : return rc;
9992 : }
9993 :
9994 : static void
9995 0 : _media_management_notify(void *arg)
9996 : {
9997 0 : struct spdk_bdev_desc *desc = arg;
9998 :
9999 0 : _event_notify(desc, SPDK_BDEV_EVENT_MEDIA_MANAGEMENT);
10000 0 : }
10001 :
10002 : void
10003 0 : spdk_bdev_notify_media_management(struct spdk_bdev *bdev)
10004 : {
10005 : struct spdk_bdev_desc *desc;
10006 :
10007 0 : spdk_spin_lock(&bdev->internal.spinlock);
10008 0 : TAILQ_FOREACH(desc, &bdev->internal.open_descs, link) {
10009 0 : if (!TAILQ_EMPTY(&desc->pending_media_events)) {
10010 0 : event_notify(desc, _media_management_notify);
10011 0 : }
10012 0 : }
10013 0 : spdk_spin_unlock(&bdev->internal.spinlock);
10014 0 : }
10015 :
10016 : struct locked_lba_range_ctx {
10017 : struct lba_range range;
10018 : struct lba_range *current_range;
10019 : struct lba_range *owner_range;
10020 : struct spdk_poller *poller;
10021 : lock_range_cb cb_fn;
10022 : void *cb_arg;
10023 : };
10024 :
10025 : static void
10026 0 : bdev_lock_error_cleanup_cb(struct spdk_bdev *bdev, void *_ctx, int status)
10027 : {
10028 0 : struct locked_lba_range_ctx *ctx = _ctx;
10029 :
10030 0 : ctx->cb_fn(&ctx->range, ctx->cb_arg, -ENOMEM);
10031 0 : free(ctx);
10032 0 : }
10033 :
10034 : static void bdev_unlock_lba_range_get_channel(struct spdk_bdev_channel_iter *i,
10035 : struct spdk_bdev *bdev, struct spdk_io_channel *ch, void *_ctx);
10036 :
10037 : static void
10038 14 : bdev_lock_lba_range_cb(struct spdk_bdev *bdev, void *_ctx, int status)
10039 : {
10040 14 : struct locked_lba_range_ctx *ctx = _ctx;
10041 :
10042 14 : if (status == -ENOMEM) {
10043 : /* One of the channels could not allocate a range object.
10044 : * So we have to go back and clean up any ranges that were
10045 : * allocated successfully before we return error status to
10046 : * the caller. We can reuse the unlock function to do that
10047 : * clean up.
10048 : */
10049 0 : spdk_bdev_for_each_channel(bdev, bdev_unlock_lba_range_get_channel, ctx,
10050 : bdev_lock_error_cleanup_cb);
10051 0 : return;
10052 : }
10053 :
10054 : /* All channels have locked this range and no I/O overlapping the range
10055 : * are outstanding! Set the owner_ch for the range object for the
10056 : * locking channel, so that this channel will know that it is allowed
10057 : * to write to this range.
10058 : */
10059 14 : if (ctx->owner_range != NULL) {
10060 10 : ctx->owner_range->owner_ch = ctx->range.owner_ch;
10061 10 : }
10062 :
10063 14 : ctx->cb_fn(&ctx->range, ctx->cb_arg, status);
10064 :
10065 : /* Don't free the ctx here. Its range is in the bdev's global list of
10066 : * locked ranges still, and will be removed and freed when this range
10067 : * is later unlocked.
10068 : */
10069 14 : }
10070 :
10071 : static int
10072 17 : bdev_lock_lba_range_check_io(void *_i)
10073 : {
10074 17 : struct spdk_bdev_channel_iter *i = _i;
10075 17 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i->i);
10076 17 : struct spdk_bdev_channel *ch = __io_ch_to_bdev_ch(_ch);
10077 17 : struct locked_lba_range_ctx *ctx = i->ctx;
10078 17 : struct lba_range *range = ctx->current_range;
10079 : struct spdk_bdev_io *bdev_io;
10080 :
10081 17 : spdk_poller_unregister(&ctx->poller);
10082 :
10083 : /* The range is now in the locked_ranges, so no new IO can be submitted to this
10084 : * range. But we need to wait until any outstanding IO overlapping with this range
10085 : * are completed.
10086 : */
10087 18 : TAILQ_FOREACH(bdev_io, &ch->io_submitted, internal.ch_link) {
10088 3 : if (bdev_io_range_is_locked(bdev_io, range)) {
10089 2 : ctx->poller = SPDK_POLLER_REGISTER(bdev_lock_lba_range_check_io, i, 100);
10090 2 : return SPDK_POLLER_BUSY;
10091 : }
10092 1 : }
10093 :
10094 15 : spdk_bdev_for_each_channel_continue(i, 0);
10095 15 : return SPDK_POLLER_BUSY;
10096 17 : }
10097 :
10098 : static void
10099 15 : bdev_lock_lba_range_get_channel(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
10100 : struct spdk_io_channel *_ch, void *_ctx)
10101 : {
10102 15 : struct spdk_bdev_channel *ch = __io_ch_to_bdev_ch(_ch);
10103 15 : struct locked_lba_range_ctx *ctx = _ctx;
10104 : struct lba_range *range;
10105 :
10106 16 : TAILQ_FOREACH(range, &ch->locked_ranges, tailq) {
10107 1 : if (range->length == ctx->range.length &&
10108 0 : range->offset == ctx->range.offset &&
10109 0 : range->locked_ctx == ctx->range.locked_ctx) {
10110 : /* This range already exists on this channel, so don't add
10111 : * it again. This can happen when a new channel is created
10112 : * while the for_each_channel operation is in progress.
10113 : * Do not check for outstanding I/O in that case, since the
10114 : * range was locked before any I/O could be submitted to the
10115 : * new channel.
10116 : */
10117 0 : spdk_bdev_for_each_channel_continue(i, 0);
10118 0 : return;
10119 : }
10120 1 : }
10121 :
10122 15 : range = calloc(1, sizeof(*range));
10123 15 : if (range == NULL) {
10124 0 : spdk_bdev_for_each_channel_continue(i, -ENOMEM);
10125 0 : return;
10126 : }
10127 :
10128 15 : range->length = ctx->range.length;
10129 15 : range->offset = ctx->range.offset;
10130 15 : range->locked_ctx = ctx->range.locked_ctx;
10131 15 : range->quiesce = ctx->range.quiesce;
10132 15 : ctx->current_range = range;
10133 15 : if (ctx->range.owner_ch == ch) {
10134 : /* This is the range object for the channel that will hold
10135 : * the lock. Store it in the ctx object so that we can easily
10136 : * set its owner_ch after the lock is finally acquired.
10137 : */
10138 10 : ctx->owner_range = range;
10139 10 : }
10140 15 : TAILQ_INSERT_TAIL(&ch->locked_ranges, range, tailq);
10141 15 : bdev_lock_lba_range_check_io(i);
10142 15 : }
10143 :
10144 : static void
10145 14 : bdev_lock_lba_range_ctx(struct spdk_bdev *bdev, struct locked_lba_range_ctx *ctx)
10146 : {
10147 14 : assert(spdk_get_thread() == ctx->range.owner_thread);
10148 14 : assert(ctx->range.owner_ch == NULL ||
10149 : spdk_io_channel_get_thread(ctx->range.owner_ch->channel) == ctx->range.owner_thread);
10150 :
10151 : /* We will add a copy of this range to each channel now. */
10152 14 : spdk_bdev_for_each_channel(bdev, bdev_lock_lba_range_get_channel, ctx,
10153 : bdev_lock_lba_range_cb);
10154 14 : }
10155 :
10156 : static bool
10157 17 : bdev_lba_range_overlaps_tailq(struct lba_range *range, lba_range_tailq_t *tailq)
10158 : {
10159 : struct lba_range *r;
10160 :
10161 18 : TAILQ_FOREACH(r, tailq, tailq) {
10162 4 : if (bdev_lba_range_overlapped(range, r)) {
10163 3 : return true;
10164 : }
10165 1 : }
10166 14 : return false;
10167 17 : }
10168 :
10169 : static void bdev_quiesce_range_locked(struct lba_range *range, void *ctx, int status);
10170 :
10171 : static int
10172 14 : _bdev_lock_lba_range(struct spdk_bdev *bdev, struct spdk_bdev_channel *ch,
10173 : uint64_t offset, uint64_t length,
10174 : lock_range_cb cb_fn, void *cb_arg)
10175 : {
10176 : struct locked_lba_range_ctx *ctx;
10177 :
10178 14 : ctx = calloc(1, sizeof(*ctx));
10179 14 : if (ctx == NULL) {
10180 0 : return -ENOMEM;
10181 : }
10182 :
10183 14 : ctx->range.offset = offset;
10184 14 : ctx->range.length = length;
10185 14 : ctx->range.owner_thread = spdk_get_thread();
10186 14 : ctx->range.owner_ch = ch;
10187 14 : ctx->range.locked_ctx = cb_arg;
10188 14 : ctx->range.bdev = bdev;
10189 14 : ctx->range.quiesce = (cb_fn == bdev_quiesce_range_locked);
10190 14 : ctx->cb_fn = cb_fn;
10191 14 : ctx->cb_arg = cb_arg;
10192 :
10193 14 : spdk_spin_lock(&bdev->internal.spinlock);
10194 14 : if (bdev_lba_range_overlaps_tailq(&ctx->range, &bdev->internal.locked_ranges)) {
10195 : /* There is an active lock overlapping with this range.
10196 : * Put it on the pending list until this range no
10197 : * longer overlaps with another.
10198 : */
10199 2 : TAILQ_INSERT_TAIL(&bdev->internal.pending_locked_ranges, &ctx->range, tailq);
10200 2 : } else {
10201 12 : TAILQ_INSERT_TAIL(&bdev->internal.locked_ranges, &ctx->range, tailq);
10202 12 : bdev_lock_lba_range_ctx(bdev, ctx);
10203 : }
10204 14 : spdk_spin_unlock(&bdev->internal.spinlock);
10205 14 : return 0;
10206 14 : }
10207 :
10208 : static int
10209 10 : bdev_lock_lba_range(struct spdk_bdev_desc *desc, struct spdk_io_channel *_ch,
10210 : uint64_t offset, uint64_t length,
10211 : lock_range_cb cb_fn, void *cb_arg)
10212 : {
10213 10 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
10214 10 : struct spdk_bdev_channel *ch = __io_ch_to_bdev_ch(_ch);
10215 :
10216 10 : if (cb_arg == NULL) {
10217 0 : SPDK_ERRLOG("cb_arg must not be NULL\n");
10218 0 : return -EINVAL;
10219 : }
10220 :
10221 10 : return _bdev_lock_lba_range(bdev, ch, offset, length, cb_fn, cb_arg);
10222 10 : }
10223 :
10224 : static void
10225 2 : bdev_lock_lba_range_ctx_msg(void *_ctx)
10226 : {
10227 2 : struct locked_lba_range_ctx *ctx = _ctx;
10228 :
10229 2 : bdev_lock_lba_range_ctx(ctx->range.bdev, ctx);
10230 2 : }
10231 :
10232 : static void
10233 14 : bdev_unlock_lba_range_cb(struct spdk_bdev *bdev, void *_ctx, int status)
10234 : {
10235 14 : struct locked_lba_range_ctx *ctx = _ctx;
10236 : struct locked_lba_range_ctx *pending_ctx;
10237 : struct lba_range *range, *tmp;
10238 :
10239 14 : spdk_spin_lock(&bdev->internal.spinlock);
10240 : /* Check if there are any pending locked ranges that overlap with this range
10241 : * that was just unlocked. If there are, check that it doesn't overlap with any
10242 : * other locked ranges before calling bdev_lock_lba_range_ctx which will start
10243 : * the lock process.
10244 : */
10245 17 : TAILQ_FOREACH_SAFE(range, &bdev->internal.pending_locked_ranges, tailq, tmp) {
10246 3 : if (bdev_lba_range_overlapped(range, &ctx->range) &&
10247 3 : !bdev_lba_range_overlaps_tailq(range, &bdev->internal.locked_ranges)) {
10248 2 : TAILQ_REMOVE(&bdev->internal.pending_locked_ranges, range, tailq);
10249 2 : pending_ctx = SPDK_CONTAINEROF(range, struct locked_lba_range_ctx, range);
10250 2 : TAILQ_INSERT_TAIL(&bdev->internal.locked_ranges, range, tailq);
10251 4 : spdk_thread_send_msg(pending_ctx->range.owner_thread,
10252 2 : bdev_lock_lba_range_ctx_msg, pending_ctx);
10253 2 : }
10254 3 : }
10255 14 : spdk_spin_unlock(&bdev->internal.spinlock);
10256 :
10257 14 : ctx->cb_fn(&ctx->range, ctx->cb_arg, status);
10258 14 : free(ctx);
10259 14 : }
10260 :
10261 : static void
10262 16 : bdev_unlock_lba_range_get_channel(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
10263 : struct spdk_io_channel *_ch, void *_ctx)
10264 : {
10265 16 : struct spdk_bdev_channel *ch = __io_ch_to_bdev_ch(_ch);
10266 16 : struct locked_lba_range_ctx *ctx = _ctx;
10267 : TAILQ_HEAD(, spdk_bdev_io) io_locked;
10268 : struct spdk_bdev_io *bdev_io;
10269 : struct lba_range *range;
10270 :
10271 16 : TAILQ_FOREACH(range, &ch->locked_ranges, tailq) {
10272 32 : if (ctx->range.offset == range->offset &&
10273 16 : ctx->range.length == range->length &&
10274 16 : ctx->range.locked_ctx == range->locked_ctx) {
10275 16 : TAILQ_REMOVE(&ch->locked_ranges, range, tailq);
10276 16 : free(range);
10277 16 : break;
10278 : }
10279 0 : }
10280 :
10281 : /* Note: we should almost always be able to assert that the range specified
10282 : * was found. But there are some very rare corner cases where a new channel
10283 : * gets created simultaneously with a range unlock, where this function
10284 : * would execute on that new channel and wouldn't have the range.
10285 : * We also use this to clean up range allocations when a later allocation
10286 : * fails in the locking path.
10287 : * So we can't actually assert() here.
10288 : */
10289 :
10290 : /* Swap the locked IO into a temporary list, and then try to submit them again.
10291 : * We could hyper-optimize this to only resubmit locked I/O that overlap
10292 : * with the range that was just unlocked, but this isn't a performance path so
10293 : * we go for simplicity here.
10294 : */
10295 16 : TAILQ_INIT(&io_locked);
10296 16 : TAILQ_SWAP(&ch->io_locked, &io_locked, spdk_bdev_io, internal.ch_link);
10297 19 : while (!TAILQ_EMPTY(&io_locked)) {
10298 3 : bdev_io = TAILQ_FIRST(&io_locked);
10299 3 : TAILQ_REMOVE(&io_locked, bdev_io, internal.ch_link);
10300 3 : bdev_io_submit(bdev_io);
10301 : }
10302 :
10303 16 : spdk_bdev_for_each_channel_continue(i, 0);
10304 16 : }
10305 :
10306 : static int
10307 14 : _bdev_unlock_lba_range(struct spdk_bdev *bdev, uint64_t offset, uint64_t length,
10308 : lock_range_cb cb_fn, void *cb_arg)
10309 : {
10310 : struct locked_lba_range_ctx *ctx;
10311 : struct lba_range *range;
10312 :
10313 14 : spdk_spin_lock(&bdev->internal.spinlock);
10314 : /* To start the unlock the process, we find the range in the bdev's locked_ranges
10315 : * and remove it. This ensures new channels don't inherit the locked range.
10316 : * Then we will send a message to each channel to remove the range from its
10317 : * per-channel list.
10318 : */
10319 14 : TAILQ_FOREACH(range, &bdev->internal.locked_ranges, tailq) {
10320 24 : if (range->offset == offset && range->length == length &&
10321 14 : (range->owner_ch == NULL || range->locked_ctx == cb_arg)) {
10322 14 : break;
10323 : }
10324 0 : }
10325 14 : if (range == NULL) {
10326 0 : assert(false);
10327 : spdk_spin_unlock(&bdev->internal.spinlock);
10328 : return -EINVAL;
10329 : }
10330 14 : TAILQ_REMOVE(&bdev->internal.locked_ranges, range, tailq);
10331 14 : ctx = SPDK_CONTAINEROF(range, struct locked_lba_range_ctx, range);
10332 14 : spdk_spin_unlock(&bdev->internal.spinlock);
10333 :
10334 14 : ctx->cb_fn = cb_fn;
10335 14 : ctx->cb_arg = cb_arg;
10336 :
10337 14 : spdk_bdev_for_each_channel(bdev, bdev_unlock_lba_range_get_channel, ctx,
10338 : bdev_unlock_lba_range_cb);
10339 14 : return 0;
10340 : }
10341 :
10342 : static int
10343 12 : bdev_unlock_lba_range(struct spdk_bdev_desc *desc, struct spdk_io_channel *_ch,
10344 : uint64_t offset, uint64_t length,
10345 : lock_range_cb cb_fn, void *cb_arg)
10346 : {
10347 12 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
10348 12 : struct spdk_bdev_channel *ch = __io_ch_to_bdev_ch(_ch);
10349 : struct lba_range *range;
10350 12 : bool range_found = false;
10351 :
10352 : /* Let's make sure the specified channel actually has a lock on
10353 : * the specified range. Note that the range must match exactly.
10354 : */
10355 14 : TAILQ_FOREACH(range, &ch->locked_ranges, tailq) {
10356 22 : if (range->offset == offset && range->length == length &&
10357 11 : range->owner_ch == ch && range->locked_ctx == cb_arg) {
10358 10 : range_found = true;
10359 10 : break;
10360 : }
10361 2 : }
10362 :
10363 12 : if (!range_found) {
10364 2 : return -EINVAL;
10365 : }
10366 :
10367 10 : return _bdev_unlock_lba_range(bdev, offset, length, cb_fn, cb_arg);
10368 12 : }
10369 :
10370 : struct bdev_quiesce_ctx {
10371 : spdk_bdev_quiesce_cb cb_fn;
10372 : void *cb_arg;
10373 : };
10374 :
10375 : static void
10376 4 : bdev_unquiesce_range_unlocked(struct lba_range *range, void *ctx, int status)
10377 : {
10378 4 : struct bdev_quiesce_ctx *quiesce_ctx = ctx;
10379 :
10380 4 : if (quiesce_ctx->cb_fn != NULL) {
10381 4 : quiesce_ctx->cb_fn(quiesce_ctx->cb_arg, status);
10382 4 : }
10383 :
10384 4 : free(quiesce_ctx);
10385 4 : }
10386 :
10387 : static void
10388 4 : bdev_quiesce_range_locked(struct lba_range *range, void *ctx, int status)
10389 : {
10390 4 : struct bdev_quiesce_ctx *quiesce_ctx = ctx;
10391 4 : struct spdk_bdev_module *module = range->bdev->module;
10392 :
10393 4 : if (status != 0) {
10394 0 : if (quiesce_ctx->cb_fn != NULL) {
10395 0 : quiesce_ctx->cb_fn(quiesce_ctx->cb_arg, status);
10396 0 : }
10397 0 : free(quiesce_ctx);
10398 0 : return;
10399 : }
10400 :
10401 4 : spdk_spin_lock(&module->internal.spinlock);
10402 4 : TAILQ_INSERT_TAIL(&module->internal.quiesced_ranges, range, tailq_module);
10403 4 : spdk_spin_unlock(&module->internal.spinlock);
10404 :
10405 4 : if (quiesce_ctx->cb_fn != NULL) {
10406 : /* copy the context in case the range is unlocked by the callback */
10407 4 : struct bdev_quiesce_ctx tmp = *quiesce_ctx;
10408 :
10409 4 : quiesce_ctx->cb_fn = NULL;
10410 4 : quiesce_ctx->cb_arg = NULL;
10411 :
10412 4 : tmp.cb_fn(tmp.cb_arg, status);
10413 4 : }
10414 : /* quiesce_ctx will be freed on unquiesce */
10415 4 : }
10416 :
10417 : static int
10418 9 : _spdk_bdev_quiesce(struct spdk_bdev *bdev, struct spdk_bdev_module *module,
10419 : uint64_t offset, uint64_t length,
10420 : spdk_bdev_quiesce_cb cb_fn, void *cb_arg,
10421 : bool unquiesce)
10422 : {
10423 : struct bdev_quiesce_ctx *quiesce_ctx;
10424 : int rc;
10425 :
10426 9 : if (module != bdev->module) {
10427 0 : SPDK_ERRLOG("Bdev does not belong to specified module.\n");
10428 0 : return -EINVAL;
10429 : }
10430 :
10431 9 : if (!bdev_io_valid_blocks(bdev, offset, length)) {
10432 0 : return -EINVAL;
10433 : }
10434 :
10435 9 : if (unquiesce) {
10436 : struct lba_range *range;
10437 :
10438 : /* Make sure the specified range is actually quiesced in the specified module and
10439 : * then remove it from the list. Note that the range must match exactly.
10440 : */
10441 5 : spdk_spin_lock(&module->internal.spinlock);
10442 6 : TAILQ_FOREACH(range, &module->internal.quiesced_ranges, tailq_module) {
10443 5 : if (range->bdev == bdev && range->offset == offset && range->length == length) {
10444 4 : TAILQ_REMOVE(&module->internal.quiesced_ranges, range, tailq_module);
10445 4 : break;
10446 : }
10447 1 : }
10448 5 : spdk_spin_unlock(&module->internal.spinlock);
10449 :
10450 5 : if (range == NULL) {
10451 1 : SPDK_ERRLOG("The range to unquiesce was not found.\n");
10452 1 : return -EINVAL;
10453 : }
10454 :
10455 4 : quiesce_ctx = range->locked_ctx;
10456 4 : quiesce_ctx->cb_fn = cb_fn;
10457 4 : quiesce_ctx->cb_arg = cb_arg;
10458 :
10459 4 : rc = _bdev_unlock_lba_range(bdev, offset, length, bdev_unquiesce_range_unlocked, quiesce_ctx);
10460 4 : } else {
10461 4 : quiesce_ctx = malloc(sizeof(*quiesce_ctx));
10462 4 : if (quiesce_ctx == NULL) {
10463 0 : return -ENOMEM;
10464 : }
10465 :
10466 4 : quiesce_ctx->cb_fn = cb_fn;
10467 4 : quiesce_ctx->cb_arg = cb_arg;
10468 :
10469 4 : rc = _bdev_lock_lba_range(bdev, NULL, offset, length, bdev_quiesce_range_locked, quiesce_ctx);
10470 4 : if (rc != 0) {
10471 0 : free(quiesce_ctx);
10472 0 : }
10473 : }
10474 :
10475 8 : return rc;
10476 9 : }
10477 :
10478 : int
10479 3 : spdk_bdev_quiesce(struct spdk_bdev *bdev, struct spdk_bdev_module *module,
10480 : spdk_bdev_quiesce_cb cb_fn, void *cb_arg)
10481 : {
10482 3 : return _spdk_bdev_quiesce(bdev, module, 0, bdev->blockcnt, cb_fn, cb_arg, false);
10483 : }
10484 :
10485 : int
10486 3 : spdk_bdev_unquiesce(struct spdk_bdev *bdev, struct spdk_bdev_module *module,
10487 : spdk_bdev_quiesce_cb cb_fn, void *cb_arg)
10488 : {
10489 3 : return _spdk_bdev_quiesce(bdev, module, 0, bdev->blockcnt, cb_fn, cb_arg, true);
10490 : }
10491 :
10492 : int
10493 1 : spdk_bdev_quiesce_range(struct spdk_bdev *bdev, struct spdk_bdev_module *module,
10494 : uint64_t offset, uint64_t length,
10495 : spdk_bdev_quiesce_cb cb_fn, void *cb_arg)
10496 : {
10497 1 : return _spdk_bdev_quiesce(bdev, module, offset, length, cb_fn, cb_arg, false);
10498 : }
10499 :
10500 : int
10501 2 : spdk_bdev_unquiesce_range(struct spdk_bdev *bdev, struct spdk_bdev_module *module,
10502 : uint64_t offset, uint64_t length,
10503 : spdk_bdev_quiesce_cb cb_fn, void *cb_arg)
10504 : {
10505 2 : return _spdk_bdev_quiesce(bdev, module, offset, length, cb_fn, cb_arg, true);
10506 : }
10507 :
10508 : int
10509 282 : spdk_bdev_get_memory_domains(struct spdk_bdev *bdev, struct spdk_memory_domain **domains,
10510 : int array_size)
10511 : {
10512 282 : if (!bdev) {
10513 1 : return -EINVAL;
10514 : }
10515 :
10516 281 : if (bdev->fn_table->get_memory_domains) {
10517 3 : return bdev->fn_table->get_memory_domains(bdev->ctxt, domains, array_size);
10518 : }
10519 :
10520 278 : return 0;
10521 282 : }
10522 :
10523 : struct spdk_bdev_for_each_io_ctx {
10524 : void *ctx;
10525 : spdk_bdev_io_fn fn;
10526 : spdk_bdev_for_each_io_cb cb;
10527 : };
10528 :
10529 : static void
10530 0 : bdev_channel_for_each_io(struct spdk_bdev_channel_iter *i, struct spdk_bdev *bdev,
10531 : struct spdk_io_channel *io_ch, void *_ctx)
10532 : {
10533 0 : struct spdk_bdev_for_each_io_ctx *ctx = _ctx;
10534 0 : struct spdk_bdev_channel *bdev_ch = __io_ch_to_bdev_ch(io_ch);
10535 : struct spdk_bdev_io *bdev_io;
10536 0 : int rc = 0;
10537 :
10538 0 : TAILQ_FOREACH(bdev_io, &bdev_ch->io_submitted, internal.ch_link) {
10539 0 : rc = ctx->fn(ctx->ctx, bdev_io);
10540 0 : if (rc != 0) {
10541 0 : break;
10542 : }
10543 0 : }
10544 :
10545 0 : spdk_bdev_for_each_channel_continue(i, rc);
10546 0 : }
10547 :
10548 : static void
10549 0 : bdev_for_each_io_done(struct spdk_bdev *bdev, void *_ctx, int status)
10550 : {
10551 0 : struct spdk_bdev_for_each_io_ctx *ctx = _ctx;
10552 :
10553 0 : ctx->cb(ctx->ctx, status);
10554 :
10555 0 : free(ctx);
10556 0 : }
10557 :
10558 : void
10559 0 : spdk_bdev_for_each_bdev_io(struct spdk_bdev *bdev, void *_ctx, spdk_bdev_io_fn fn,
10560 : spdk_bdev_for_each_io_cb cb)
10561 : {
10562 : struct spdk_bdev_for_each_io_ctx *ctx;
10563 :
10564 0 : assert(fn != NULL && cb != NULL);
10565 :
10566 0 : ctx = calloc(1, sizeof(*ctx));
10567 0 : if (ctx == NULL) {
10568 0 : SPDK_ERRLOG("Failed to allocate context.\n");
10569 0 : cb(_ctx, -ENOMEM);
10570 0 : return;
10571 : }
10572 :
10573 0 : ctx->ctx = _ctx;
10574 0 : ctx->fn = fn;
10575 0 : ctx->cb = cb;
10576 :
10577 0 : spdk_bdev_for_each_channel(bdev, bdev_channel_for_each_io, ctx,
10578 : bdev_for_each_io_done);
10579 0 : }
10580 :
10581 : void
10582 135 : spdk_bdev_for_each_channel_continue(struct spdk_bdev_channel_iter *iter, int status)
10583 : {
10584 135 : spdk_for_each_channel_continue(iter->i, status);
10585 135 : }
10586 :
10587 : static struct spdk_bdev *
10588 370 : io_channel_iter_get_bdev(struct spdk_io_channel_iter *i)
10589 : {
10590 370 : void *io_device = spdk_io_channel_iter_get_io_device(i);
10591 :
10592 370 : return __bdev_from_io_dev(io_device);
10593 : }
10594 :
10595 : static void
10596 135 : bdev_each_channel_msg(struct spdk_io_channel_iter *i)
10597 : {
10598 135 : struct spdk_bdev_channel_iter *iter = spdk_io_channel_iter_get_ctx(i);
10599 135 : struct spdk_bdev *bdev = io_channel_iter_get_bdev(i);
10600 135 : struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
10601 :
10602 135 : iter->i = i;
10603 135 : iter->fn(iter, bdev, ch, iter->ctx);
10604 135 : }
10605 :
10606 : static void
10607 235 : bdev_each_channel_cpl(struct spdk_io_channel_iter *i, int status)
10608 : {
10609 235 : struct spdk_bdev_channel_iter *iter = spdk_io_channel_iter_get_ctx(i);
10610 235 : struct spdk_bdev *bdev = io_channel_iter_get_bdev(i);
10611 :
10612 235 : iter->i = i;
10613 235 : iter->cpl(bdev, iter->ctx, status);
10614 :
10615 235 : free(iter);
10616 235 : }
10617 :
10618 : void
10619 235 : spdk_bdev_for_each_channel(struct spdk_bdev *bdev, spdk_bdev_for_each_channel_msg fn,
10620 : void *ctx, spdk_bdev_for_each_channel_done cpl)
10621 : {
10622 : struct spdk_bdev_channel_iter *iter;
10623 :
10624 235 : assert(bdev != NULL && fn != NULL && ctx != NULL);
10625 :
10626 235 : iter = calloc(1, sizeof(struct spdk_bdev_channel_iter));
10627 235 : if (iter == NULL) {
10628 0 : SPDK_ERRLOG("Unable to allocate iterator\n");
10629 0 : assert(false);
10630 : return;
10631 : }
10632 :
10633 235 : iter->fn = fn;
10634 235 : iter->cpl = cpl;
10635 235 : iter->ctx = ctx;
10636 :
10637 470 : spdk_for_each_channel(__bdev_to_io_dev(bdev), bdev_each_channel_msg,
10638 235 : iter, bdev_each_channel_cpl);
10639 235 : }
10640 :
10641 : static void
10642 3 : bdev_copy_do_write_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
10643 : {
10644 3 : struct spdk_bdev_io *parent_io = cb_arg;
10645 :
10646 3 : spdk_bdev_free_io(bdev_io);
10647 :
10648 : /* Check return status of write */
10649 3 : parent_io->internal.status = success ? SPDK_BDEV_IO_STATUS_SUCCESS : SPDK_BDEV_IO_STATUS_FAILED;
10650 3 : parent_io->internal.cb(parent_io, success, parent_io->internal.caller_ctx);
10651 3 : }
10652 :
10653 : static void
10654 3 : bdev_copy_do_write(void *_bdev_io)
10655 : {
10656 3 : struct spdk_bdev_io *bdev_io = _bdev_io;
10657 : int rc;
10658 :
10659 : /* Write blocks */
10660 6 : rc = spdk_bdev_write_blocks_with_md(bdev_io->internal.desc,
10661 3 : spdk_io_channel_from_ctx(bdev_io->internal.ch),
10662 3 : bdev_io->u.bdev.iovs[0].iov_base,
10663 3 : bdev_io->u.bdev.md_buf, bdev_io->u.bdev.offset_blocks,
10664 3 : bdev_io->u.bdev.num_blocks, bdev_copy_do_write_done, bdev_io);
10665 :
10666 3 : if (rc == -ENOMEM) {
10667 0 : bdev_queue_io_wait_with_cb(bdev_io, bdev_copy_do_write);
10668 3 : } else if (rc != 0) {
10669 0 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
10670 0 : bdev_io->internal.cb(bdev_io, false, bdev_io->internal.caller_ctx);
10671 0 : }
10672 3 : }
10673 :
10674 : static void
10675 3 : bdev_copy_do_read_done(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
10676 : {
10677 3 : struct spdk_bdev_io *parent_io = cb_arg;
10678 :
10679 3 : spdk_bdev_free_io(bdev_io);
10680 :
10681 : /* Check return status of read */
10682 3 : if (!success) {
10683 0 : parent_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
10684 0 : parent_io->internal.cb(parent_io, false, parent_io->internal.caller_ctx);
10685 0 : return;
10686 : }
10687 :
10688 : /* Do write */
10689 3 : bdev_copy_do_write(parent_io);
10690 3 : }
10691 :
10692 : static void
10693 3 : bdev_copy_do_read(void *_bdev_io)
10694 : {
10695 3 : struct spdk_bdev_io *bdev_io = _bdev_io;
10696 : int rc;
10697 :
10698 : /* Read blocks */
10699 6 : rc = spdk_bdev_read_blocks_with_md(bdev_io->internal.desc,
10700 3 : spdk_io_channel_from_ctx(bdev_io->internal.ch),
10701 3 : bdev_io->u.bdev.iovs[0].iov_base,
10702 3 : bdev_io->u.bdev.md_buf, bdev_io->u.bdev.copy.src_offset_blocks,
10703 3 : bdev_io->u.bdev.num_blocks, bdev_copy_do_read_done, bdev_io);
10704 :
10705 3 : if (rc == -ENOMEM) {
10706 0 : bdev_queue_io_wait_with_cb(bdev_io, bdev_copy_do_read);
10707 3 : } else if (rc != 0) {
10708 0 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
10709 0 : bdev_io->internal.cb(bdev_io, false, bdev_io->internal.caller_ctx);
10710 0 : }
10711 3 : }
10712 :
10713 : static void
10714 3 : bdev_copy_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io, bool success)
10715 : {
10716 3 : if (!success) {
10717 0 : bdev_io->internal.status = SPDK_BDEV_IO_STATUS_FAILED;
10718 0 : bdev_io->internal.cb(bdev_io, false, bdev_io->internal.caller_ctx);
10719 0 : return;
10720 : }
10721 :
10722 3 : bdev_copy_do_read(bdev_io);
10723 3 : }
10724 :
10725 : int
10726 27 : spdk_bdev_copy_blocks(struct spdk_bdev_desc *desc, struct spdk_io_channel *ch,
10727 : uint64_t dst_offset_blocks, uint64_t src_offset_blocks, uint64_t num_blocks,
10728 : spdk_bdev_io_completion_cb cb, void *cb_arg)
10729 : {
10730 27 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(desc);
10731 : struct spdk_bdev_io *bdev_io;
10732 27 : struct spdk_bdev_channel *channel = spdk_io_channel_get_ctx(ch);
10733 :
10734 27 : if (!desc->write) {
10735 0 : return -EBADF;
10736 : }
10737 :
10738 27 : if (!bdev_io_valid_blocks(bdev, dst_offset_blocks, num_blocks) ||
10739 27 : !bdev_io_valid_blocks(bdev, src_offset_blocks, num_blocks)) {
10740 0 : SPDK_DEBUGLOG(bdev,
10741 : "Invalid offset or number of blocks: dst %lu, src %lu, count %lu\n",
10742 : dst_offset_blocks, src_offset_blocks, num_blocks);
10743 0 : return -EINVAL;
10744 : }
10745 :
10746 27 : bdev_io = bdev_channel_get_io(channel);
10747 27 : if (!bdev_io) {
10748 0 : return -ENOMEM;
10749 : }
10750 :
10751 27 : bdev_io->internal.ch = channel;
10752 27 : bdev_io->internal.desc = desc;
10753 27 : bdev_io->type = SPDK_BDEV_IO_TYPE_COPY;
10754 :
10755 27 : bdev_io->u.bdev.offset_blocks = dst_offset_blocks;
10756 27 : bdev_io->u.bdev.copy.src_offset_blocks = src_offset_blocks;
10757 27 : bdev_io->u.bdev.num_blocks = num_blocks;
10758 27 : bdev_io->u.bdev.memory_domain = NULL;
10759 27 : bdev_io->u.bdev.memory_domain_ctx = NULL;
10760 27 : bdev_io->u.bdev.iovs = NULL;
10761 27 : bdev_io->u.bdev.iovcnt = 0;
10762 27 : bdev_io->u.bdev.md_buf = NULL;
10763 27 : bdev_io->u.bdev.accel_sequence = NULL;
10764 27 : bdev_io_init(bdev_io, bdev, cb_arg, cb);
10765 :
10766 27 : if (dst_offset_blocks == src_offset_blocks || num_blocks == 0) {
10767 0 : spdk_thread_send_msg(spdk_get_thread(), bdev_io_complete_cb, bdev_io);
10768 0 : return 0;
10769 : }
10770 :
10771 :
10772 : /* If the copy size is large and should be split, use the generic split logic
10773 : * regardless of whether SPDK_BDEV_IO_TYPE_COPY is supported or not.
10774 : *
10775 : * Then, send the copy request if SPDK_BDEV_IO_TYPE_COPY is supported or
10776 : * emulate it using regular read and write requests otherwise.
10777 : */
10778 27 : if (spdk_bdev_io_type_supported(bdev, SPDK_BDEV_IO_TYPE_COPY) ||
10779 4 : bdev_io->internal.f.split) {
10780 24 : bdev_io_submit(bdev_io);
10781 24 : return 0;
10782 : }
10783 :
10784 3 : spdk_bdev_io_get_buf(bdev_io, bdev_copy_get_buf_cb, num_blocks * spdk_bdev_get_block_size(bdev));
10785 :
10786 3 : return 0;
10787 27 : }
10788 :
10789 3 : SPDK_LOG_REGISTER_COMPONENT(bdev)
10790 :
10791 : static void
10792 0 : bdev_trace(void)
10793 : {
10794 0 : struct spdk_trace_tpoint_opts opts[] = {
10795 : {
10796 : "BDEV_IO_START", TRACE_BDEV_IO_START,
10797 : OWNER_TYPE_BDEV, OBJECT_BDEV_IO, 1,
10798 : {
10799 : { "type", SPDK_TRACE_ARG_TYPE_INT, 8 },
10800 : { "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 },
10801 : { "offset", SPDK_TRACE_ARG_TYPE_INT, 8 },
10802 : { "qd", SPDK_TRACE_ARG_TYPE_INT, 4 }
10803 : }
10804 : },
10805 : {
10806 : "BDEV_IO_DONE", TRACE_BDEV_IO_DONE,
10807 : OWNER_TYPE_BDEV, OBJECT_BDEV_IO, 0,
10808 : {
10809 : { "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 },
10810 : { "qd", SPDK_TRACE_ARG_TYPE_INT, 4 }
10811 : }
10812 : },
10813 : {
10814 : "BDEV_IOCH_CREATE", TRACE_BDEV_IOCH_CREATE,
10815 : OWNER_TYPE_BDEV, OBJECT_NONE, 0,
10816 : {
10817 : { "tid", SPDK_TRACE_ARG_TYPE_INT, 8 }
10818 : }
10819 : },
10820 : {
10821 : "BDEV_IOCH_DESTROY", TRACE_BDEV_IOCH_DESTROY,
10822 : OWNER_TYPE_BDEV, OBJECT_NONE, 0,
10823 : {
10824 : { "tid", SPDK_TRACE_ARG_TYPE_INT, 8 }
10825 : }
10826 : },
10827 : };
10828 :
10829 :
10830 0 : spdk_trace_register_owner_type(OWNER_TYPE_BDEV, 'b');
10831 0 : spdk_trace_register_object(OBJECT_BDEV_IO, 'i');
10832 0 : spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts));
10833 0 : spdk_trace_tpoint_register_relation(TRACE_BDEV_NVME_IO_START, OBJECT_BDEV_IO, 0);
10834 0 : spdk_trace_tpoint_register_relation(TRACE_BDEV_NVME_IO_DONE, OBJECT_BDEV_IO, 0);
10835 0 : spdk_trace_tpoint_register_relation(TRACE_BLOB_REQ_SET_START, OBJECT_BDEV_IO, 0);
10836 0 : spdk_trace_tpoint_register_relation(TRACE_BLOB_REQ_SET_COMPLETE, OBJECT_BDEV_IO, 0);
10837 0 : spdk_trace_tpoint_register_relation(TRACE_BDEV_RAID_IO_START, OBJECT_BDEV_IO, 0);
10838 0 : spdk_trace_tpoint_register_relation(TRACE_BDEV_RAID_IO_DONE, OBJECT_BDEV_IO, 0);
10839 0 : }
10840 3 : SPDK_TRACE_REGISTER_FN(bdev_trace, "bdev", TRACE_GROUP_BDEV)
|