Line data Source code
1 : /* SPDX-License-Identifier: BSD-3-Clause
2 : * Copyright (C) 2018 Intel Corporation.
3 : * All rights reserved.
4 : * Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5 : */
6 :
7 : #include "bdev_raid.h"
8 : #include "spdk/env.h"
9 : #include "spdk/thread.h"
10 : #include "spdk/log.h"
11 : #include "spdk/string.h"
12 : #include "spdk/util.h"
13 : #include "spdk/json.h"
14 : #include "spdk/likely.h"
15 :
16 : #define RAID_OFFSET_BLOCKS_INVALID UINT64_MAX
17 : #define RAID_BDEV_PROCESS_MAX_QD 16
18 :
19 : #define RAID_BDEV_PROCESS_WINDOW_SIZE_KB_DEFAULT 1024
20 :
21 : static bool g_shutdown_started = false;
22 :
23 : /* List of all raid bdevs */
24 : struct raid_all_tailq g_raid_bdev_list = TAILQ_HEAD_INITIALIZER(g_raid_bdev_list);
25 :
26 : static TAILQ_HEAD(, raid_bdev_module) g_raid_modules = TAILQ_HEAD_INITIALIZER(g_raid_modules);
27 :
28 : /*
29 : * raid_bdev_io_channel is the context of spdk_io_channel for raid bdev device. It
30 : * contains the relationship of raid bdev io channel with base bdev io channels.
31 : */
32 : struct raid_bdev_io_channel {
33 : /* Array of IO channels of base bdevs */
34 : struct spdk_io_channel **base_channel;
35 :
36 : /* Private raid module IO channel */
37 : struct spdk_io_channel *module_channel;
38 :
39 : /* Background process data */
40 : struct {
41 : uint64_t offset;
42 : struct spdk_io_channel *target_ch;
43 : struct raid_bdev_io_channel *ch_processed;
44 : } process;
45 : };
46 :
47 : enum raid_bdev_process_state {
48 : RAID_PROCESS_STATE_INIT,
49 : RAID_PROCESS_STATE_RUNNING,
50 : RAID_PROCESS_STATE_STOPPING,
51 : RAID_PROCESS_STATE_STOPPED,
52 : };
53 :
54 : struct raid_bdev_process {
55 : struct raid_bdev *raid_bdev;
56 : enum raid_process_type type;
57 : enum raid_bdev_process_state state;
58 : struct spdk_thread *thread;
59 : struct raid_bdev_io_channel *raid_ch;
60 : TAILQ_HEAD(, raid_bdev_process_request) requests;
61 : uint64_t max_window_size;
62 : uint64_t window_size;
63 : uint64_t window_remaining;
64 : int window_status;
65 : uint64_t window_offset;
66 : bool window_range_locked;
67 : struct raid_base_bdev_info *target;
68 : int status;
69 : TAILQ_HEAD(, raid_process_finish_action) finish_actions;
70 : };
71 :
72 : struct raid_process_finish_action {
73 : spdk_msg_fn cb;
74 : void *cb_ctx;
75 : TAILQ_ENTRY(raid_process_finish_action) link;
76 : };
77 :
78 : static struct spdk_raid_bdev_opts g_opts = {
79 : .process_window_size_kb = RAID_BDEV_PROCESS_WINDOW_SIZE_KB_DEFAULT,
80 : };
81 :
82 : void
83 0 : raid_bdev_get_opts(struct spdk_raid_bdev_opts *opts)
84 : {
85 0 : *opts = g_opts;
86 0 : }
87 :
88 : int
89 0 : raid_bdev_set_opts(const struct spdk_raid_bdev_opts *opts)
90 : {
91 0 : if (opts->process_window_size_kb == 0) {
92 0 : return -EINVAL;
93 : }
94 :
95 0 : g_opts = *opts;
96 :
97 0 : return 0;
98 : }
99 :
100 : static struct raid_bdev_module *
101 18 : raid_bdev_module_find(enum raid_level level)
102 : {
103 : struct raid_bdev_module *raid_module;
104 :
105 19 : TAILQ_FOREACH(raid_module, &g_raid_modules, link) {
106 17 : if (raid_module->level == level) {
107 16 : return raid_module;
108 : }
109 : }
110 :
111 2 : return NULL;
112 : }
113 :
114 : void
115 1 : raid_bdev_module_list_add(struct raid_bdev_module *raid_module)
116 : {
117 1 : if (raid_bdev_module_find(raid_module->level) != NULL) {
118 0 : SPDK_ERRLOG("module for raid level '%s' already registered.\n",
119 : raid_bdev_level_to_str(raid_module->level));
120 0 : assert(false);
121 : } else {
122 1 : TAILQ_INSERT_TAIL(&g_raid_modules, raid_module, link);
123 : }
124 1 : }
125 :
126 : struct spdk_io_channel *
127 0 : raid_bdev_channel_get_base_channel(struct raid_bdev_io_channel *raid_ch, uint8_t idx)
128 : {
129 0 : return raid_ch->base_channel[idx];
130 : }
131 :
132 : void *
133 0 : raid_bdev_channel_get_module_ctx(struct raid_bdev_io_channel *raid_ch)
134 : {
135 0 : assert(raid_ch->module_channel != NULL);
136 :
137 0 : return spdk_io_channel_get_ctx(raid_ch->module_channel);
138 : }
139 :
140 : struct raid_base_bdev_info *
141 0 : raid_bdev_channel_get_base_info(struct raid_bdev_io_channel *raid_ch, struct spdk_bdev *base_bdev)
142 : {
143 0 : struct spdk_io_channel *ch = spdk_io_channel_from_ctx(raid_ch);
144 0 : struct raid_bdev *raid_bdev = spdk_io_channel_get_io_device(ch);
145 : uint8_t i;
146 :
147 0 : for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
148 0 : struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[i];
149 :
150 0 : if (base_info->is_configured &&
151 0 : spdk_bdev_desc_get_bdev(base_info->desc) == base_bdev) {
152 0 : return base_info;
153 : }
154 : }
155 :
156 0 : return NULL;
157 : }
158 :
159 : /* Function declarations */
160 : static void raid_bdev_examine(struct spdk_bdev *bdev);
161 : static int raid_bdev_init(void);
162 : static void raid_bdev_deconfigure(struct raid_bdev *raid_bdev,
163 : raid_bdev_destruct_cb cb_fn, void *cb_arg);
164 :
165 : static void
166 5 : raid_bdev_ch_process_cleanup(struct raid_bdev_io_channel *raid_ch)
167 : {
168 5 : raid_ch->process.offset = RAID_OFFSET_BLOCKS_INVALID;
169 :
170 5 : if (raid_ch->process.target_ch != NULL) {
171 1 : spdk_put_io_channel(raid_ch->process.target_ch);
172 1 : raid_ch->process.target_ch = NULL;
173 : }
174 :
175 5 : if (raid_ch->process.ch_processed != NULL) {
176 2 : free(raid_ch->process.ch_processed->base_channel);
177 2 : free(raid_ch->process.ch_processed);
178 2 : raid_ch->process.ch_processed = NULL;
179 : }
180 5 : }
181 :
182 : static int
183 2 : raid_bdev_ch_process_setup(struct raid_bdev_io_channel *raid_ch, struct raid_bdev_process *process)
184 : {
185 2 : struct raid_bdev *raid_bdev = process->raid_bdev;
186 : struct raid_bdev_io_channel *raid_ch_processed;
187 : struct raid_base_bdev_info *base_info;
188 :
189 2 : raid_ch->process.offset = process->window_offset;
190 :
191 : /* In the future we may have other types of processes which don't use a target bdev,
192 : * like data scrubbing or strip size migration. Until then, expect that there always is
193 : * a process target. */
194 2 : assert(process->target != NULL);
195 :
196 2 : raid_ch->process.target_ch = spdk_bdev_get_io_channel(process->target->desc);
197 2 : if (raid_ch->process.target_ch == NULL) {
198 0 : goto err;
199 : }
200 :
201 2 : raid_ch_processed = calloc(1, sizeof(*raid_ch_processed));
202 2 : if (raid_ch_processed == NULL) {
203 0 : goto err;
204 : }
205 2 : raid_ch->process.ch_processed = raid_ch_processed;
206 :
207 2 : raid_ch_processed->base_channel = calloc(raid_bdev->num_base_bdevs,
208 : sizeof(*raid_ch_processed->base_channel));
209 2 : if (raid_ch_processed->base_channel == NULL) {
210 0 : goto err;
211 : }
212 :
213 66 : RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
214 64 : uint8_t slot = raid_bdev_base_bdev_slot(base_info);
215 :
216 64 : if (base_info != process->target) {
217 62 : raid_ch_processed->base_channel[slot] = raid_ch->base_channel[slot];
218 : } else {
219 2 : raid_ch_processed->base_channel[slot] = raid_ch->process.target_ch;
220 : }
221 : }
222 :
223 2 : raid_ch_processed->module_channel = raid_ch->module_channel;
224 2 : raid_ch_processed->process.offset = RAID_OFFSET_BLOCKS_INVALID;
225 :
226 2 : return 0;
227 0 : err:
228 0 : raid_bdev_ch_process_cleanup(raid_ch);
229 0 : return -ENOMEM;
230 : }
231 :
232 : /*
233 : * brief:
234 : * raid_bdev_create_cb function is a cb function for raid bdev which creates the
235 : * hierarchy from raid bdev to base bdev io channels. It will be called per core
236 : * params:
237 : * io_device - pointer to raid bdev io device represented by raid_bdev
238 : * ctx_buf - pointer to context buffer for raid bdev io channel
239 : * returns:
240 : * 0 - success
241 : * non zero - failure
242 : */
243 : static int
244 4 : raid_bdev_create_cb(void *io_device, void *ctx_buf)
245 : {
246 4 : struct raid_bdev *raid_bdev = io_device;
247 4 : struct raid_bdev_io_channel *raid_ch = ctx_buf;
248 : uint8_t i;
249 4 : int ret = -ENOMEM;
250 :
251 4 : SPDK_DEBUGLOG(bdev_raid, "raid_bdev_create_cb, %p\n", raid_ch);
252 :
253 4 : assert(raid_bdev != NULL);
254 4 : assert(raid_bdev->state == RAID_BDEV_STATE_ONLINE);
255 :
256 4 : raid_ch->base_channel = calloc(raid_bdev->num_base_bdevs, sizeof(struct spdk_io_channel *));
257 4 : if (!raid_ch->base_channel) {
258 0 : SPDK_ERRLOG("Unable to allocate base bdevs io channel\n");
259 0 : return -ENOMEM;
260 : }
261 :
262 132 : for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
263 : /*
264 : * Get the spdk_io_channel for all the base bdevs. This is used during
265 : * split logic to send the respective child bdev ios to respective base
266 : * bdev io channel.
267 : * Skip missing base bdevs and the process target, which should also be treated as
268 : * missing until the process completes.
269 : */
270 128 : if (raid_bdev->base_bdev_info[i].is_configured == false ||
271 128 : raid_bdev->base_bdev_info[i].is_process_target == true) {
272 0 : continue;
273 : }
274 128 : raid_ch->base_channel[i] = spdk_bdev_get_io_channel(
275 128 : raid_bdev->base_bdev_info[i].desc);
276 128 : if (!raid_ch->base_channel[i]) {
277 0 : SPDK_ERRLOG("Unable to create io channel for base bdev\n");
278 0 : goto err;
279 : }
280 : }
281 :
282 4 : if (raid_bdev->module->get_io_channel) {
283 0 : raid_ch->module_channel = raid_bdev->module->get_io_channel(raid_bdev);
284 0 : if (!raid_ch->module_channel) {
285 0 : SPDK_ERRLOG("Unable to create io channel for raid module\n");
286 0 : goto err;
287 : }
288 : }
289 :
290 4 : if (raid_bdev->process != NULL) {
291 2 : ret = raid_bdev_ch_process_setup(raid_ch, raid_bdev->process);
292 2 : if (ret != 0) {
293 0 : SPDK_ERRLOG("Failed to setup process io channel\n");
294 0 : goto err;
295 : }
296 : } else {
297 2 : raid_ch->process.offset = RAID_OFFSET_BLOCKS_INVALID;
298 : }
299 :
300 4 : return 0;
301 0 : err:
302 0 : for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
303 0 : if (raid_ch->base_channel[i] != NULL) {
304 0 : spdk_put_io_channel(raid_ch->base_channel[i]);
305 : }
306 : }
307 0 : free(raid_ch->base_channel);
308 :
309 0 : raid_bdev_ch_process_cleanup(raid_ch);
310 :
311 0 : return ret;
312 : }
313 :
314 : /*
315 : * brief:
316 : * raid_bdev_destroy_cb function is a cb function for raid bdev which deletes the
317 : * hierarchy from raid bdev to base bdev io channels. It will be called per core
318 : * params:
319 : * io_device - pointer to raid bdev io device represented by raid_bdev
320 : * ctx_buf - pointer to context buffer for raid bdev io channel
321 : * returns:
322 : * none
323 : */
324 : static void
325 4 : raid_bdev_destroy_cb(void *io_device, void *ctx_buf)
326 : {
327 4 : struct raid_bdev *raid_bdev = io_device;
328 4 : struct raid_bdev_io_channel *raid_ch = ctx_buf;
329 : uint8_t i;
330 :
331 4 : SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destroy_cb\n");
332 :
333 4 : assert(raid_ch != NULL);
334 4 : assert(raid_ch->base_channel);
335 :
336 4 : if (raid_ch->module_channel) {
337 0 : spdk_put_io_channel(raid_ch->module_channel);
338 : }
339 :
340 132 : for (i = 0; i < raid_bdev->num_base_bdevs; i++) {
341 : /* Free base bdev channels */
342 128 : if (raid_ch->base_channel[i] != NULL) {
343 128 : spdk_put_io_channel(raid_ch->base_channel[i]);
344 : }
345 : }
346 4 : free(raid_ch->base_channel);
347 4 : raid_ch->base_channel = NULL;
348 :
349 4 : raid_bdev_ch_process_cleanup(raid_ch);
350 4 : }
351 :
352 : /*
353 : * brief:
354 : * raid_bdev_cleanup is used to cleanup raid_bdev related data
355 : * structures.
356 : * params:
357 : * raid_bdev - pointer to raid_bdev
358 : * returns:
359 : * none
360 : */
361 : static void
362 16 : raid_bdev_cleanup(struct raid_bdev *raid_bdev)
363 : {
364 : struct raid_base_bdev_info *base_info;
365 :
366 16 : SPDK_DEBUGLOG(bdev_raid, "raid_bdev_cleanup, %p name %s, state %s\n",
367 : raid_bdev, raid_bdev->bdev.name, raid_bdev_state_to_str(raid_bdev->state));
368 16 : assert(raid_bdev->state != RAID_BDEV_STATE_ONLINE);
369 16 : assert(spdk_get_thread() == spdk_thread_get_app_thread());
370 :
371 528 : RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
372 512 : assert(base_info->desc == NULL);
373 512 : free(base_info->name);
374 : }
375 :
376 16 : TAILQ_REMOVE(&g_raid_bdev_list, raid_bdev, global_link);
377 16 : }
378 :
379 : static void
380 16 : raid_bdev_free(struct raid_bdev *raid_bdev)
381 : {
382 16 : raid_bdev_free_superblock(raid_bdev);
383 16 : free(raid_bdev->base_bdev_info);
384 16 : free(raid_bdev->bdev.name);
385 16 : free(raid_bdev);
386 16 : }
387 :
388 : static void
389 3 : raid_bdev_cleanup_and_free(struct raid_bdev *raid_bdev)
390 : {
391 3 : raid_bdev_cleanup(raid_bdev);
392 3 : raid_bdev_free(raid_bdev);
393 3 : }
394 :
395 : static void
396 478 : raid_bdev_deconfigure_base_bdev(struct raid_base_bdev_info *base_info)
397 : {
398 478 : struct raid_bdev *raid_bdev = base_info->raid_bdev;
399 :
400 478 : assert(base_info->is_configured);
401 478 : assert(raid_bdev->num_base_bdevs_discovered);
402 478 : raid_bdev->num_base_bdevs_discovered--;
403 478 : base_info->is_configured = false;
404 478 : base_info->is_process_target = false;
405 478 : }
406 :
407 : /*
408 : * brief:
409 : * free resource of base bdev for raid bdev
410 : * params:
411 : * base_info - raid base bdev info
412 : * returns:
413 : * none
414 : */
415 : static void
416 511 : raid_bdev_free_base_bdev_resource(struct raid_base_bdev_info *base_info)
417 : {
418 511 : struct raid_bdev *raid_bdev = base_info->raid_bdev;
419 :
420 511 : assert(spdk_get_thread() == spdk_thread_get_app_thread());
421 :
422 511 : free(base_info->name);
423 511 : base_info->name = NULL;
424 511 : if (raid_bdev->state != RAID_BDEV_STATE_CONFIGURING) {
425 416 : spdk_uuid_set_null(&base_info->uuid);
426 : }
427 511 : base_info->is_failed = false;
428 :
429 511 : if (base_info->desc == NULL) {
430 33 : return;
431 : }
432 :
433 478 : spdk_bdev_module_release_bdev(spdk_bdev_desc_get_bdev(base_info->desc));
434 478 : spdk_bdev_close(base_info->desc);
435 478 : base_info->desc = NULL;
436 478 : spdk_put_io_channel(base_info->app_thread_ch);
437 478 : base_info->app_thread_ch = NULL;
438 :
439 478 : if (base_info->is_configured) {
440 478 : raid_bdev_deconfigure_base_bdev(base_info);
441 : }
442 : }
443 :
444 : static void
445 13 : raid_bdev_io_device_unregister_cb(void *io_device)
446 : {
447 13 : struct raid_bdev *raid_bdev = io_device;
448 :
449 13 : if (raid_bdev->num_base_bdevs_discovered == 0) {
450 : /* Free raid_bdev when there are no base bdevs left */
451 13 : SPDK_DEBUGLOG(bdev_raid, "raid bdev base bdevs is 0, going to free all in destruct\n");
452 13 : raid_bdev_cleanup(raid_bdev);
453 13 : spdk_bdev_destruct_done(&raid_bdev->bdev, 0);
454 13 : raid_bdev_free(raid_bdev);
455 : } else {
456 0 : spdk_bdev_destruct_done(&raid_bdev->bdev, 0);
457 : }
458 13 : }
459 :
460 : void
461 13 : raid_bdev_module_stop_done(struct raid_bdev *raid_bdev)
462 : {
463 13 : if (raid_bdev->state != RAID_BDEV_STATE_CONFIGURING) {
464 13 : spdk_io_device_unregister(raid_bdev, raid_bdev_io_device_unregister_cb);
465 : }
466 13 : }
467 :
468 : static void
469 13 : _raid_bdev_destruct(void *ctxt)
470 : {
471 13 : struct raid_bdev *raid_bdev = ctxt;
472 : struct raid_base_bdev_info *base_info;
473 :
474 13 : SPDK_DEBUGLOG(bdev_raid, "raid_bdev_destruct\n");
475 :
476 13 : assert(raid_bdev->process == NULL);
477 :
478 429 : RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
479 : /*
480 : * Close all base bdev descriptors for which call has come from below
481 : * layers. Also close the descriptors if we have started shutdown.
482 : */
483 416 : if (g_shutdown_started || base_info->remove_scheduled == true) {
484 416 : raid_bdev_free_base_bdev_resource(base_info);
485 : }
486 : }
487 :
488 13 : if (g_shutdown_started) {
489 0 : raid_bdev->state = RAID_BDEV_STATE_OFFLINE;
490 : }
491 :
492 13 : if (raid_bdev->module->stop != NULL) {
493 0 : if (raid_bdev->module->stop(raid_bdev) == false) {
494 0 : return;
495 : }
496 : }
497 :
498 13 : raid_bdev_module_stop_done(raid_bdev);
499 : }
500 :
501 : static int
502 13 : raid_bdev_destruct(void *ctx)
503 : {
504 13 : spdk_thread_exec_msg(spdk_thread_get_app_thread(), _raid_bdev_destruct, ctx);
505 :
506 13 : return 1;
507 : }
508 :
509 : int
510 0 : raid_bdev_remap_dix_reftag(void *md_buf, uint64_t num_blocks,
511 : struct spdk_bdev *bdev, uint32_t remapped_offset)
512 : {
513 0 : struct spdk_dif_ctx dif_ctx;
514 0 : struct spdk_dif_error err_blk = {};
515 : int rc;
516 0 : struct spdk_dif_ctx_init_ext_opts dif_opts;
517 0 : struct iovec md_iov = {
518 : .iov_base = md_buf,
519 0 : .iov_len = num_blocks * bdev->md_len,
520 : };
521 :
522 0 : if (md_buf == NULL) {
523 0 : return 0;
524 : }
525 :
526 0 : dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format);
527 0 : dif_opts.dif_pi_format = SPDK_DIF_PI_FORMAT_16;
528 0 : rc = spdk_dif_ctx_init(&dif_ctx,
529 0 : bdev->blocklen, bdev->md_len, bdev->md_interleave,
530 0 : bdev->dif_is_head_of_md, bdev->dif_type,
531 : SPDK_DIF_FLAGS_REFTAG_CHECK,
532 : 0, 0, 0, 0, 0, &dif_opts);
533 0 : if (rc != 0) {
534 0 : SPDK_ERRLOG("Initialization of DIF context failed\n");
535 0 : return rc;
536 : }
537 :
538 0 : spdk_dif_ctx_set_remapped_init_ref_tag(&dif_ctx, remapped_offset);
539 :
540 0 : rc = spdk_dix_remap_ref_tag(&md_iov, num_blocks, &dif_ctx, &err_blk, false);
541 0 : if (rc != 0) {
542 0 : SPDK_ERRLOG("Remapping reference tag failed. type=%d, offset=%d"
543 : PRIu32 "\n", err_blk.err_type, err_blk.err_offset);
544 : }
545 :
546 0 : return rc;
547 : }
548 :
549 : int
550 0 : raid_bdev_verify_dix_reftag(struct iovec *iovs, int iovcnt, void *md_buf,
551 : uint64_t num_blocks, struct spdk_bdev *bdev, uint32_t offset_blocks)
552 : {
553 0 : struct spdk_dif_ctx dif_ctx;
554 0 : struct spdk_dif_error err_blk = {};
555 : int rc;
556 0 : struct spdk_dif_ctx_init_ext_opts dif_opts;
557 0 : struct iovec md_iov = {
558 : .iov_base = md_buf,
559 0 : .iov_len = num_blocks * bdev->md_len,
560 : };
561 :
562 0 : if (md_buf == NULL) {
563 0 : return 0;
564 : }
565 :
566 0 : dif_opts.size = SPDK_SIZEOF(&dif_opts, dif_pi_format);
567 0 : dif_opts.dif_pi_format = SPDK_DIF_PI_FORMAT_16;
568 0 : rc = spdk_dif_ctx_init(&dif_ctx,
569 0 : bdev->blocklen, bdev->md_len, bdev->md_interleave,
570 0 : bdev->dif_is_head_of_md, bdev->dif_type,
571 : SPDK_DIF_FLAGS_REFTAG_CHECK,
572 : offset_blocks, 0, 0, 0, 0, &dif_opts);
573 0 : if (rc != 0) {
574 0 : SPDK_ERRLOG("Initialization of DIF context failed\n");
575 0 : return rc;
576 : }
577 :
578 0 : rc = spdk_dix_verify(iovs, iovcnt, &md_iov, num_blocks, &dif_ctx, &err_blk);
579 0 : if (rc != 0) {
580 0 : SPDK_ERRLOG("Reference tag check failed. type=%d, offset=%d"
581 : PRIu32 "\n", err_blk.err_type, err_blk.err_offset);
582 : }
583 :
584 0 : return rc;
585 : }
586 :
587 : void
588 11 : raid_bdev_io_complete(struct raid_bdev_io *raid_io, enum spdk_bdev_io_status status)
589 : {
590 11 : struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(raid_io);
591 : int rc;
592 :
593 11 : if (raid_io->split.offset != RAID_OFFSET_BLOCKS_INVALID) {
594 10 : struct iovec *split_iov = raid_io->split.iov;
595 10 : const struct iovec *split_iov_orig = &raid_io->split.iov_copy;
596 :
597 : /*
598 : * Non-zero offset here means that this is the completion of the first part of the
599 : * split I/O (the higher LBAs). Then, we submit the second part and set offset to 0.
600 : */
601 10 : if (raid_io->split.offset != 0) {
602 5 : raid_io->offset_blocks = bdev_io->u.bdev.offset_blocks;
603 5 : raid_io->md_buf = bdev_io->u.bdev.md_buf;
604 :
605 5 : if (status == SPDK_BDEV_IO_STATUS_SUCCESS) {
606 5 : raid_io->num_blocks = raid_io->split.offset;
607 5 : raid_io->iovcnt = raid_io->iovs - bdev_io->u.bdev.iovs;
608 5 : raid_io->iovs = bdev_io->u.bdev.iovs;
609 5 : if (split_iov != NULL) {
610 4 : raid_io->iovcnt++;
611 4 : split_iov->iov_len = split_iov->iov_base - split_iov_orig->iov_base;
612 4 : split_iov->iov_base = split_iov_orig->iov_base;
613 : }
614 :
615 5 : raid_io->split.offset = 0;
616 5 : raid_io->base_bdev_io_submitted = 0;
617 5 : raid_io->raid_ch = raid_io->raid_ch->process.ch_processed;
618 :
619 5 : raid_io->raid_bdev->module->submit_rw_request(raid_io);
620 5 : return;
621 : }
622 : }
623 :
624 5 : raid_io->num_blocks = bdev_io->u.bdev.num_blocks;
625 5 : raid_io->iovcnt = bdev_io->u.bdev.iovcnt;
626 5 : raid_io->iovs = bdev_io->u.bdev.iovs;
627 5 : if (split_iov != NULL) {
628 4 : *split_iov = *split_iov_orig;
629 : }
630 : }
631 :
632 6 : if (spdk_unlikely(raid_io->completion_cb != NULL)) {
633 0 : raid_io->completion_cb(raid_io, status);
634 : } else {
635 6 : if (spdk_unlikely(bdev_io->type == SPDK_BDEV_IO_TYPE_READ &&
636 : spdk_bdev_get_dif_type(bdev_io->bdev) != SPDK_DIF_DISABLE &&
637 : bdev_io->bdev->dif_check_flags & SPDK_DIF_FLAGS_REFTAG_CHECK &&
638 : status == SPDK_BDEV_IO_STATUS_SUCCESS)) {
639 :
640 0 : rc = raid_bdev_remap_dix_reftag(bdev_io->u.bdev.md_buf,
641 : bdev_io->u.bdev.num_blocks, bdev_io->bdev,
642 0 : bdev_io->u.bdev.offset_blocks);
643 0 : if (rc != 0) {
644 0 : status = SPDK_BDEV_IO_STATUS_FAILED;
645 : }
646 : }
647 6 : spdk_bdev_io_complete(bdev_io, status);
648 : }
649 : }
650 :
651 : /*
652 : * brief:
653 : * raid_bdev_io_complete_part - signal the completion of a part of the expected
654 : * base bdev IOs and complete the raid_io if this is the final expected IO.
655 : * The caller should first set raid_io->base_bdev_io_remaining. This function
656 : * will decrement this counter by the value of the 'completed' parameter and
657 : * complete the raid_io if the counter reaches 0. The caller is free to
658 : * interpret the 'base_bdev_io_remaining' and 'completed' values as needed,
659 : * it can represent e.g. blocks or IOs.
660 : * params:
661 : * raid_io - pointer to raid_bdev_io
662 : * completed - the part of the raid_io that has been completed
663 : * status - status of the base IO
664 : * returns:
665 : * true - if the raid_io is completed
666 : * false - otherwise
667 : */
668 : bool
669 32 : raid_bdev_io_complete_part(struct raid_bdev_io *raid_io, uint64_t completed,
670 : enum spdk_bdev_io_status status)
671 : {
672 32 : assert(raid_io->base_bdev_io_remaining >= completed);
673 32 : raid_io->base_bdev_io_remaining -= completed;
674 :
675 32 : if (status != raid_io->base_bdev_io_status_default) {
676 0 : raid_io->base_bdev_io_status = status;
677 : }
678 :
679 32 : if (raid_io->base_bdev_io_remaining == 0) {
680 1 : raid_bdev_io_complete(raid_io, raid_io->base_bdev_io_status);
681 1 : return true;
682 : } else {
683 31 : return false;
684 : }
685 : }
686 :
687 : /*
688 : * brief:
689 : * raid_bdev_queue_io_wait function processes the IO which failed to submit.
690 : * It will try to queue the IOs after storing the context to bdev wait queue logic.
691 : * params:
692 : * raid_io - pointer to raid_bdev_io
693 : * bdev - the block device that the IO is submitted to
694 : * ch - io channel
695 : * cb_fn - callback when the spdk_bdev_io for bdev becomes available
696 : * returns:
697 : * none
698 : */
699 : void
700 0 : raid_bdev_queue_io_wait(struct raid_bdev_io *raid_io, struct spdk_bdev *bdev,
701 : struct spdk_io_channel *ch, spdk_bdev_io_wait_cb cb_fn)
702 : {
703 0 : raid_io->waitq_entry.bdev = bdev;
704 0 : raid_io->waitq_entry.cb_fn = cb_fn;
705 0 : raid_io->waitq_entry.cb_arg = raid_io;
706 0 : spdk_bdev_queue_io_wait(bdev, ch, &raid_io->waitq_entry);
707 0 : }
708 :
709 : static void
710 32 : raid_base_bdev_reset_complete(struct spdk_bdev_io *bdev_io, bool success, void *cb_arg)
711 : {
712 32 : struct raid_bdev_io *raid_io = cb_arg;
713 :
714 32 : spdk_bdev_free_io(bdev_io);
715 :
716 32 : raid_bdev_io_complete_part(raid_io, 1, success ?
717 : SPDK_BDEV_IO_STATUS_SUCCESS :
718 : SPDK_BDEV_IO_STATUS_FAILED);
719 32 : }
720 :
721 : static void raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io);
722 :
723 : static void
724 0 : _raid_bdev_submit_reset_request(void *_raid_io)
725 : {
726 0 : struct raid_bdev_io *raid_io = _raid_io;
727 :
728 0 : raid_bdev_submit_reset_request(raid_io);
729 0 : }
730 :
731 : /*
732 : * brief:
733 : * raid_bdev_submit_reset_request function submits reset requests
734 : * to member disks; it will submit as many as possible unless a reset fails with -ENOMEM, in
735 : * which case it will queue it for later submission
736 : * params:
737 : * raid_io
738 : * returns:
739 : * none
740 : */
741 : static void
742 1 : raid_bdev_submit_reset_request(struct raid_bdev_io *raid_io)
743 : {
744 : struct raid_bdev *raid_bdev;
745 : int ret;
746 : uint8_t i;
747 : struct raid_base_bdev_info *base_info;
748 : struct spdk_io_channel *base_ch;
749 :
750 1 : raid_bdev = raid_io->raid_bdev;
751 :
752 1 : if (raid_io->base_bdev_io_remaining == 0) {
753 1 : raid_io->base_bdev_io_remaining = raid_bdev->num_base_bdevs;
754 : }
755 :
756 33 : for (i = raid_io->base_bdev_io_submitted; i < raid_bdev->num_base_bdevs; i++) {
757 32 : base_info = &raid_bdev->base_bdev_info[i];
758 32 : base_ch = raid_io->raid_ch->base_channel[i];
759 32 : if (base_ch == NULL) {
760 0 : raid_io->base_bdev_io_submitted++;
761 0 : raid_bdev_io_complete_part(raid_io, 1, SPDK_BDEV_IO_STATUS_SUCCESS);
762 0 : continue;
763 : }
764 32 : ret = spdk_bdev_reset(base_info->desc, base_ch,
765 : raid_base_bdev_reset_complete, raid_io);
766 32 : if (ret == 0) {
767 32 : raid_io->base_bdev_io_submitted++;
768 0 : } else if (ret == -ENOMEM) {
769 0 : raid_bdev_queue_io_wait(raid_io, spdk_bdev_desc_get_bdev(base_info->desc),
770 : base_ch, _raid_bdev_submit_reset_request);
771 0 : return;
772 : } else {
773 0 : SPDK_ERRLOG("bdev io submit error not due to ENOMEM, it should not happen\n");
774 0 : assert(false);
775 : raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
776 : return;
777 : }
778 : }
779 : }
780 :
781 : static void
782 5 : raid_bdev_io_split(struct raid_bdev_io *raid_io, uint64_t split_offset)
783 : {
784 5 : struct raid_bdev *raid_bdev = raid_io->raid_bdev;
785 5 : size_t iov_offset = split_offset * raid_bdev->bdev.blocklen;
786 : int i;
787 :
788 5 : assert(split_offset != 0);
789 5 : assert(raid_io->split.offset == RAID_OFFSET_BLOCKS_INVALID);
790 5 : raid_io->split.offset = split_offset;
791 :
792 5 : raid_io->offset_blocks += split_offset;
793 5 : raid_io->num_blocks -= split_offset;
794 5 : if (raid_io->md_buf != NULL) {
795 5 : raid_io->md_buf += (split_offset * raid_bdev->bdev.md_len);
796 : }
797 :
798 12 : for (i = 0; i < raid_io->iovcnt; i++) {
799 12 : struct iovec *iov = &raid_io->iovs[i];
800 :
801 12 : if (iov_offset < iov->iov_len) {
802 5 : if (iov_offset == 0) {
803 1 : raid_io->split.iov = NULL;
804 : } else {
805 4 : raid_io->split.iov = iov;
806 4 : raid_io->split.iov_copy = *iov;
807 4 : iov->iov_base += iov_offset;
808 4 : iov->iov_len -= iov_offset;
809 : }
810 5 : raid_io->iovs += i;
811 5 : raid_io->iovcnt -= i;
812 5 : break;
813 : }
814 :
815 7 : iov_offset -= iov->iov_len;
816 : }
817 5 : }
818 :
819 : static void
820 5 : raid_bdev_submit_rw_request(struct raid_bdev_io *raid_io)
821 : {
822 5 : struct raid_bdev_io_channel *raid_ch = raid_io->raid_ch;
823 :
824 5 : if (raid_ch->process.offset != RAID_OFFSET_BLOCKS_INVALID) {
825 5 : uint64_t offset_begin = raid_io->offset_blocks;
826 5 : uint64_t offset_end = offset_begin + raid_io->num_blocks;
827 :
828 5 : if (offset_end > raid_ch->process.offset) {
829 5 : if (offset_begin < raid_ch->process.offset) {
830 : /*
831 : * If the I/O spans both the processed and unprocessed ranges,
832 : * split it and first handle the unprocessed part. After it
833 : * completes, the rest will be handled.
834 : * This situation occurs when the process thread is not active
835 : * or is waiting for the process window range to be locked
836 : * (quiesced). When a window is being processed, such I/Os will be
837 : * deferred by the bdev layer until the window is unlocked.
838 : */
839 5 : SPDK_DEBUGLOG(bdev_raid, "split: process_offset: %lu offset_begin: %lu offset_end: %lu\n",
840 : raid_ch->process.offset, offset_begin, offset_end);
841 5 : raid_bdev_io_split(raid_io, raid_ch->process.offset - offset_begin);
842 : }
843 : } else {
844 : /* Use the child channel, which corresponds to the already processed range */
845 0 : raid_io->raid_ch = raid_ch->process.ch_processed;
846 : }
847 : }
848 :
849 5 : raid_io->raid_bdev->module->submit_rw_request(raid_io);
850 5 : }
851 :
852 : /*
853 : * brief:
854 : * Callback function to spdk_bdev_io_get_buf.
855 : * params:
856 : * ch - pointer to raid bdev io channel
857 : * bdev_io - pointer to parent bdev_io on raid bdev device
858 : * success - True if buffer is allocated or false otherwise.
859 : * returns:
860 : * none
861 : */
862 : static void
863 0 : raid_bdev_get_buf_cb(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io,
864 : bool success)
865 : {
866 0 : struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
867 :
868 0 : if (!success) {
869 0 : raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
870 0 : return;
871 : }
872 :
873 0 : raid_bdev_submit_rw_request(raid_io);
874 : }
875 :
876 : void
877 6 : raid_bdev_io_init(struct raid_bdev_io *raid_io, struct raid_bdev_io_channel *raid_ch,
878 : enum spdk_bdev_io_type type, uint64_t offset_blocks,
879 : uint64_t num_blocks, struct iovec *iovs, int iovcnt, void *md_buf,
880 : struct spdk_memory_domain *memory_domain, void *memory_domain_ctx)
881 : {
882 6 : struct spdk_io_channel *ch = spdk_io_channel_from_ctx(raid_ch);
883 6 : struct raid_bdev *raid_bdev = spdk_io_channel_get_io_device(ch);
884 :
885 6 : raid_io->type = type;
886 6 : raid_io->offset_blocks = offset_blocks;
887 6 : raid_io->num_blocks = num_blocks;
888 6 : raid_io->iovs = iovs;
889 6 : raid_io->iovcnt = iovcnt;
890 6 : raid_io->memory_domain = memory_domain;
891 6 : raid_io->memory_domain_ctx = memory_domain_ctx;
892 6 : raid_io->md_buf = md_buf;
893 :
894 6 : raid_io->raid_bdev = raid_bdev;
895 6 : raid_io->raid_ch = raid_ch;
896 6 : raid_io->base_bdev_io_remaining = 0;
897 6 : raid_io->base_bdev_io_submitted = 0;
898 6 : raid_io->completion_cb = NULL;
899 6 : raid_io->split.offset = RAID_OFFSET_BLOCKS_INVALID;
900 :
901 6 : raid_bdev_io_set_default_status(raid_io, SPDK_BDEV_IO_STATUS_SUCCESS);
902 6 : }
903 :
904 : /*
905 : * brief:
906 : * raid_bdev_submit_request function is the submit_request function pointer of
907 : * raid bdev function table. This is used to submit the io on raid_bdev to below
908 : * layers.
909 : * params:
910 : * ch - pointer to raid bdev io channel
911 : * bdev_io - pointer to parent bdev_io on raid bdev device
912 : * returns:
913 : * none
914 : */
915 : static void
916 6 : raid_bdev_submit_request(struct spdk_io_channel *ch, struct spdk_bdev_io *bdev_io)
917 : {
918 6 : struct raid_bdev_io *raid_io = (struct raid_bdev_io *)bdev_io->driver_ctx;
919 :
920 6 : raid_bdev_io_init(raid_io, spdk_io_channel_get_ctx(ch), bdev_io->type,
921 : bdev_io->u.bdev.offset_blocks, bdev_io->u.bdev.num_blocks,
922 : bdev_io->u.bdev.iovs, bdev_io->u.bdev.iovcnt, bdev_io->u.bdev.md_buf,
923 : bdev_io->u.bdev.memory_domain, bdev_io->u.bdev.memory_domain_ctx);
924 :
925 6 : switch (bdev_io->type) {
926 0 : case SPDK_BDEV_IO_TYPE_READ:
927 0 : spdk_bdev_io_get_buf(bdev_io, raid_bdev_get_buf_cb,
928 0 : bdev_io->u.bdev.num_blocks * bdev_io->bdev->blocklen);
929 0 : break;
930 5 : case SPDK_BDEV_IO_TYPE_WRITE:
931 5 : raid_bdev_submit_rw_request(raid_io);
932 5 : break;
933 :
934 1 : case SPDK_BDEV_IO_TYPE_RESET:
935 1 : raid_bdev_submit_reset_request(raid_io);
936 1 : break;
937 :
938 0 : case SPDK_BDEV_IO_TYPE_FLUSH:
939 : case SPDK_BDEV_IO_TYPE_UNMAP:
940 0 : if (raid_io->raid_bdev->process != NULL) {
941 : /* TODO: rebuild support */
942 0 : raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
943 0 : return;
944 : }
945 0 : raid_io->raid_bdev->module->submit_null_payload_request(raid_io);
946 0 : break;
947 :
948 0 : default:
949 0 : SPDK_ERRLOG("submit request, invalid io type %u\n", bdev_io->type);
950 0 : raid_bdev_io_complete(raid_io, SPDK_BDEV_IO_STATUS_FAILED);
951 0 : break;
952 : }
953 : }
954 :
955 : /*
956 : * brief:
957 : * _raid_bdev_io_type_supported checks whether io_type is supported in
958 : * all base bdev modules of raid bdev module. If anyone among the base_bdevs
959 : * doesn't support, the raid device doesn't supports.
960 : *
961 : * params:
962 : * raid_bdev - pointer to raid bdev context
963 : * io_type - io type
964 : * returns:
965 : * true - io_type is supported
966 : * false - io_type is not supported
967 : */
968 : inline static bool
969 1 : _raid_bdev_io_type_supported(struct raid_bdev *raid_bdev, enum spdk_bdev_io_type io_type)
970 : {
971 : struct raid_base_bdev_info *base_info;
972 :
973 1 : if (io_type == SPDK_BDEV_IO_TYPE_FLUSH ||
974 : io_type == SPDK_BDEV_IO_TYPE_UNMAP) {
975 0 : if (raid_bdev->module->submit_null_payload_request == NULL) {
976 0 : return false;
977 : }
978 : }
979 :
980 33 : RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
981 32 : if (base_info->desc == NULL) {
982 0 : continue;
983 : }
984 :
985 32 : if (spdk_bdev_io_type_supported(spdk_bdev_desc_get_bdev(base_info->desc), io_type) == false) {
986 0 : return false;
987 : }
988 : }
989 :
990 1 : return true;
991 : }
992 :
993 : /*
994 : * brief:
995 : * raid_bdev_io_type_supported is the io_supported function for bdev function
996 : * table which returns whether the particular io type is supported or not by
997 : * raid bdev module
998 : * params:
999 : * ctx - pointer to raid bdev context
1000 : * type - io type
1001 : * returns:
1002 : * true - io_type is supported
1003 : * false - io_type is not supported
1004 : */
1005 : static bool
1006 4 : raid_bdev_io_type_supported(void *ctx, enum spdk_bdev_io_type io_type)
1007 : {
1008 4 : switch (io_type) {
1009 2 : case SPDK_BDEV_IO_TYPE_READ:
1010 : case SPDK_BDEV_IO_TYPE_WRITE:
1011 2 : return true;
1012 :
1013 1 : case SPDK_BDEV_IO_TYPE_FLUSH:
1014 : case SPDK_BDEV_IO_TYPE_RESET:
1015 : case SPDK_BDEV_IO_TYPE_UNMAP:
1016 1 : return _raid_bdev_io_type_supported(ctx, io_type);
1017 :
1018 1 : default:
1019 1 : return false;
1020 : }
1021 :
1022 : return false;
1023 : }
1024 :
1025 : /*
1026 : * brief:
1027 : * raid_bdev_get_io_channel is the get_io_channel function table pointer for
1028 : * raid bdev. This is used to return the io channel for this raid bdev
1029 : * params:
1030 : * ctxt - pointer to raid_bdev
1031 : * returns:
1032 : * pointer to io channel for raid bdev
1033 : */
1034 : static struct spdk_io_channel *
1035 0 : raid_bdev_get_io_channel(void *ctxt)
1036 : {
1037 0 : struct raid_bdev *raid_bdev = ctxt;
1038 :
1039 0 : return spdk_get_io_channel(raid_bdev);
1040 : }
1041 :
1042 : void
1043 7 : raid_bdev_write_info_json(struct raid_bdev *raid_bdev, struct spdk_json_write_ctx *w)
1044 : {
1045 : struct raid_base_bdev_info *base_info;
1046 :
1047 7 : assert(raid_bdev != NULL);
1048 7 : assert(spdk_get_thread() == spdk_thread_get_app_thread());
1049 :
1050 7 : spdk_json_write_named_uuid(w, "uuid", &raid_bdev->bdev.uuid);
1051 7 : spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb);
1052 7 : spdk_json_write_named_string(w, "state", raid_bdev_state_to_str(raid_bdev->state));
1053 7 : spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level));
1054 7 : spdk_json_write_named_bool(w, "superblock", raid_bdev->superblock_enabled);
1055 7 : spdk_json_write_named_uint32(w, "num_base_bdevs", raid_bdev->num_base_bdevs);
1056 7 : spdk_json_write_named_uint32(w, "num_base_bdevs_discovered", raid_bdev->num_base_bdevs_discovered);
1057 7 : spdk_json_write_named_uint32(w, "num_base_bdevs_operational",
1058 7 : raid_bdev->num_base_bdevs_operational);
1059 7 : if (raid_bdev->process) {
1060 0 : struct raid_bdev_process *process = raid_bdev->process;
1061 0 : uint64_t offset = process->window_offset;
1062 :
1063 0 : spdk_json_write_named_object_begin(w, "process");
1064 0 : spdk_json_write_name(w, "type");
1065 0 : spdk_json_write_string(w, raid_bdev_process_to_str(process->type));
1066 0 : spdk_json_write_named_string(w, "target", process->target->name);
1067 0 : spdk_json_write_named_object_begin(w, "progress");
1068 0 : spdk_json_write_named_uint64(w, "blocks", offset);
1069 0 : spdk_json_write_named_uint32(w, "percent", offset * 100.0 / raid_bdev->bdev.blockcnt);
1070 0 : spdk_json_write_object_end(w);
1071 0 : spdk_json_write_object_end(w);
1072 : }
1073 7 : spdk_json_write_name(w, "base_bdevs_list");
1074 7 : spdk_json_write_array_begin(w);
1075 231 : RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
1076 224 : spdk_json_write_object_begin(w);
1077 224 : spdk_json_write_name(w, "name");
1078 224 : if (base_info->name) {
1079 224 : spdk_json_write_string(w, base_info->name);
1080 : } else {
1081 0 : spdk_json_write_null(w);
1082 : }
1083 224 : spdk_json_write_named_uuid(w, "uuid", &base_info->uuid);
1084 224 : spdk_json_write_named_bool(w, "is_configured", base_info->is_configured);
1085 224 : spdk_json_write_named_uint64(w, "data_offset", base_info->data_offset);
1086 224 : spdk_json_write_named_uint64(w, "data_size", base_info->data_size);
1087 224 : spdk_json_write_object_end(w);
1088 : }
1089 7 : spdk_json_write_array_end(w);
1090 7 : }
1091 :
1092 : /*
1093 : * brief:
1094 : * raid_bdev_dump_info_json is the function table pointer for raid bdev
1095 : * params:
1096 : * ctx - pointer to raid_bdev
1097 : * w - pointer to json context
1098 : * returns:
1099 : * 0 - success
1100 : * non zero - failure
1101 : */
1102 : static int
1103 1 : raid_bdev_dump_info_json(void *ctx, struct spdk_json_write_ctx *w)
1104 : {
1105 1 : struct raid_bdev *raid_bdev = ctx;
1106 :
1107 1 : SPDK_DEBUGLOG(bdev_raid, "raid_bdev_dump_config_json\n");
1108 :
1109 : /* Dump the raid bdev configuration related information */
1110 1 : spdk_json_write_named_object_begin(w, "raid");
1111 1 : raid_bdev_write_info_json(raid_bdev, w);
1112 1 : spdk_json_write_object_end(w);
1113 :
1114 1 : return 0;
1115 : }
1116 :
1117 : /*
1118 : * brief:
1119 : * raid_bdev_write_config_json is the function table pointer for raid bdev
1120 : * params:
1121 : * bdev - pointer to spdk_bdev
1122 : * w - pointer to json context
1123 : * returns:
1124 : * none
1125 : */
1126 : static void
1127 0 : raid_bdev_write_config_json(struct spdk_bdev *bdev, struct spdk_json_write_ctx *w)
1128 : {
1129 0 : struct raid_bdev *raid_bdev = bdev->ctxt;
1130 : struct raid_base_bdev_info *base_info;
1131 :
1132 0 : assert(spdk_get_thread() == spdk_thread_get_app_thread());
1133 :
1134 0 : if (raid_bdev->superblock_enabled) {
1135 : /* raid bdev configuration is stored in the superblock */
1136 0 : return;
1137 : }
1138 :
1139 0 : spdk_json_write_object_begin(w);
1140 :
1141 0 : spdk_json_write_named_string(w, "method", "bdev_raid_create");
1142 :
1143 0 : spdk_json_write_named_object_begin(w, "params");
1144 0 : spdk_json_write_named_string(w, "name", bdev->name);
1145 0 : spdk_json_write_named_uuid(w, "uuid", &raid_bdev->bdev.uuid);
1146 0 : if (raid_bdev->strip_size_kb != 0) {
1147 0 : spdk_json_write_named_uint32(w, "strip_size_kb", raid_bdev->strip_size_kb);
1148 : }
1149 0 : spdk_json_write_named_string(w, "raid_level", raid_bdev_level_to_str(raid_bdev->level));
1150 :
1151 0 : spdk_json_write_named_array_begin(w, "base_bdevs");
1152 0 : RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
1153 0 : if (base_info->name) {
1154 0 : spdk_json_write_string(w, base_info->name);
1155 : } else {
1156 0 : char str[32];
1157 :
1158 0 : snprintf(str, sizeof(str), "removed_base_bdev_%u", raid_bdev_base_bdev_slot(base_info));
1159 0 : spdk_json_write_string(w, str);
1160 : }
1161 : }
1162 0 : spdk_json_write_array_end(w);
1163 0 : spdk_json_write_object_end(w);
1164 :
1165 0 : spdk_json_write_object_end(w);
1166 : }
1167 :
1168 : static int
1169 0 : raid_bdev_get_memory_domains(void *ctx, struct spdk_memory_domain **domains, int array_size)
1170 : {
1171 0 : struct raid_bdev *raid_bdev = ctx;
1172 : struct raid_base_bdev_info *base_info;
1173 0 : int domains_count = 0, rc = 0;
1174 :
1175 0 : if (raid_bdev->module->memory_domains_supported == false) {
1176 0 : return 0;
1177 : }
1178 :
1179 : /* First loop to get the number of memory domains */
1180 0 : RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
1181 0 : if (base_info->is_configured == false) {
1182 0 : continue;
1183 : }
1184 0 : rc = spdk_bdev_get_memory_domains(spdk_bdev_desc_get_bdev(base_info->desc), NULL, 0);
1185 0 : if (rc < 0) {
1186 0 : return rc;
1187 : }
1188 0 : domains_count += rc;
1189 : }
1190 :
1191 0 : if (!domains || array_size < domains_count) {
1192 0 : return domains_count;
1193 : }
1194 :
1195 0 : RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
1196 0 : if (base_info->is_configured == false) {
1197 0 : continue;
1198 : }
1199 0 : rc = spdk_bdev_get_memory_domains(spdk_bdev_desc_get_bdev(base_info->desc), domains, array_size);
1200 0 : if (rc < 0) {
1201 0 : return rc;
1202 : }
1203 0 : domains += rc;
1204 0 : array_size -= rc;
1205 : }
1206 :
1207 0 : return domains_count;
1208 : }
1209 :
1210 : /* g_raid_bdev_fn_table is the function table for raid bdev */
1211 : static const struct spdk_bdev_fn_table g_raid_bdev_fn_table = {
1212 : .destruct = raid_bdev_destruct,
1213 : .submit_request = raid_bdev_submit_request,
1214 : .io_type_supported = raid_bdev_io_type_supported,
1215 : .get_io_channel = raid_bdev_get_io_channel,
1216 : .dump_info_json = raid_bdev_dump_info_json,
1217 : .write_config_json = raid_bdev_write_config_json,
1218 : .get_memory_domains = raid_bdev_get_memory_domains,
1219 : };
1220 :
1221 : struct raid_bdev *
1222 34 : raid_bdev_find_by_name(const char *name)
1223 : {
1224 : struct raid_bdev *raid_bdev;
1225 :
1226 41 : TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) {
1227 22 : if (strcmp(raid_bdev->bdev.name, name) == 0) {
1228 15 : return raid_bdev;
1229 : }
1230 : }
1231 :
1232 19 : return NULL;
1233 : }
1234 :
1235 : static struct raid_bdev *
1236 0 : raid_bdev_find_by_uuid(const struct spdk_uuid *uuid)
1237 : {
1238 : struct raid_bdev *raid_bdev;
1239 :
1240 0 : TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) {
1241 0 : if (spdk_uuid_compare(&raid_bdev->bdev.uuid, uuid) == 0) {
1242 0 : return raid_bdev;
1243 : }
1244 : }
1245 :
1246 0 : return NULL;
1247 : }
1248 :
1249 : static struct {
1250 : const char *name;
1251 : enum raid_level value;
1252 : } g_raid_level_names[] = {
1253 : { "raid0", RAID0 },
1254 : { "0", RAID0 },
1255 : { "raid1", RAID1 },
1256 : { "1", RAID1 },
1257 : { "raid5f", RAID5F },
1258 : { "5f", RAID5F },
1259 : { "concat", CONCAT },
1260 : { }
1261 : };
1262 :
1263 : const char *g_raid_state_names[] = {
1264 : [RAID_BDEV_STATE_ONLINE] = "online",
1265 : [RAID_BDEV_STATE_CONFIGURING] = "configuring",
1266 : [RAID_BDEV_STATE_OFFLINE] = "offline",
1267 : [RAID_BDEV_STATE_MAX] = NULL
1268 : };
1269 :
1270 : static const char *g_raid_process_type_names[] = {
1271 : [RAID_PROCESS_NONE] = "none",
1272 : [RAID_PROCESS_REBUILD] = "rebuild",
1273 : [RAID_PROCESS_MAX] = NULL
1274 : };
1275 :
1276 : /* We have to use the typedef in the function declaration to appease astyle. */
1277 : typedef enum raid_level raid_level_t;
1278 : typedef enum raid_bdev_state raid_bdev_state_t;
1279 :
1280 : raid_level_t
1281 4 : raid_bdev_str_to_level(const char *str)
1282 : {
1283 : unsigned int i;
1284 :
1285 4 : assert(str != NULL);
1286 :
1287 12 : for (i = 0; g_raid_level_names[i].name != NULL; i++) {
1288 11 : if (strcasecmp(g_raid_level_names[i].name, str) == 0) {
1289 3 : return g_raid_level_names[i].value;
1290 : }
1291 : }
1292 :
1293 1 : return INVALID_RAID_LEVEL;
1294 : }
1295 :
1296 : const char *
1297 11 : raid_bdev_level_to_str(enum raid_level level)
1298 : {
1299 : unsigned int i;
1300 :
1301 81 : for (i = 0; g_raid_level_names[i].name != NULL; i++) {
1302 71 : if (g_raid_level_names[i].value == level) {
1303 1 : return g_raid_level_names[i].name;
1304 : }
1305 : }
1306 :
1307 10 : return "";
1308 : }
1309 :
1310 : raid_bdev_state_t
1311 6 : raid_bdev_str_to_state(const char *str)
1312 : {
1313 : unsigned int i;
1314 :
1315 6 : assert(str != NULL);
1316 :
1317 18 : for (i = 0; i < RAID_BDEV_STATE_MAX; i++) {
1318 15 : if (strcasecmp(g_raid_state_names[i], str) == 0) {
1319 3 : break;
1320 : }
1321 : }
1322 :
1323 6 : return i;
1324 : }
1325 :
1326 : const char *
1327 7 : raid_bdev_state_to_str(enum raid_bdev_state state)
1328 : {
1329 7 : if (state >= RAID_BDEV_STATE_MAX) {
1330 0 : return "";
1331 : }
1332 :
1333 7 : return g_raid_state_names[state];
1334 : }
1335 :
1336 : const char *
1337 3 : raid_bdev_process_to_str(enum raid_process_type value)
1338 : {
1339 3 : if (value >= RAID_PROCESS_MAX) {
1340 0 : return "";
1341 : }
1342 :
1343 3 : return g_raid_process_type_names[value];
1344 : }
1345 :
1346 : /*
1347 : * brief:
1348 : * raid_bdev_fini_start is called when bdev layer is starting the
1349 : * shutdown process
1350 : * params:
1351 : * none
1352 : * returns:
1353 : * none
1354 : */
1355 : static void
1356 0 : raid_bdev_fini_start(void)
1357 : {
1358 0 : SPDK_DEBUGLOG(bdev_raid, "raid_bdev_fini_start\n");
1359 0 : g_shutdown_started = true;
1360 0 : }
1361 :
1362 : /*
1363 : * brief:
1364 : * raid_bdev_exit is called on raid bdev module exit time by bdev layer
1365 : * params:
1366 : * none
1367 : * returns:
1368 : * none
1369 : */
1370 : static void
1371 11 : raid_bdev_exit(void)
1372 : {
1373 : struct raid_bdev *raid_bdev, *tmp;
1374 :
1375 11 : SPDK_DEBUGLOG(bdev_raid, "raid_bdev_exit\n");
1376 :
1377 11 : TAILQ_FOREACH_SAFE(raid_bdev, &g_raid_bdev_list, global_link, tmp) {
1378 0 : raid_bdev_cleanup_and_free(raid_bdev);
1379 : }
1380 11 : }
1381 :
1382 : static void
1383 0 : raid_bdev_opts_config_json(struct spdk_json_write_ctx *w)
1384 : {
1385 0 : spdk_json_write_object_begin(w);
1386 :
1387 0 : spdk_json_write_named_string(w, "method", "bdev_raid_set_options");
1388 :
1389 0 : spdk_json_write_named_object_begin(w, "params");
1390 0 : spdk_json_write_named_uint32(w, "process_window_size_kb", g_opts.process_window_size_kb);
1391 0 : spdk_json_write_object_end(w);
1392 :
1393 0 : spdk_json_write_object_end(w);
1394 0 : }
1395 :
1396 : static int
1397 0 : raid_bdev_config_json(struct spdk_json_write_ctx *w)
1398 : {
1399 0 : raid_bdev_opts_config_json(w);
1400 :
1401 0 : return 0;
1402 : }
1403 :
1404 : /*
1405 : * brief:
1406 : * raid_bdev_get_ctx_size is used to return the context size of bdev_io for raid
1407 : * module
1408 : * params:
1409 : * none
1410 : * returns:
1411 : * size of spdk_bdev_io context for raid
1412 : */
1413 : static int
1414 1 : raid_bdev_get_ctx_size(void)
1415 : {
1416 1 : SPDK_DEBUGLOG(bdev_raid, "raid_bdev_get_ctx_size\n");
1417 1 : return sizeof(struct raid_bdev_io);
1418 : }
1419 :
1420 : static struct spdk_bdev_module g_raid_if = {
1421 : .name = "raid",
1422 : .module_init = raid_bdev_init,
1423 : .fini_start = raid_bdev_fini_start,
1424 : .module_fini = raid_bdev_exit,
1425 : .config_json = raid_bdev_config_json,
1426 : .get_ctx_size = raid_bdev_get_ctx_size,
1427 : .examine_disk = raid_bdev_examine,
1428 : .async_init = false,
1429 : .async_fini = false,
1430 : };
1431 1 : SPDK_BDEV_MODULE_REGISTER(raid, &g_raid_if)
1432 :
1433 : /*
1434 : * brief:
1435 : * raid_bdev_init is the initialization function for raid bdev module
1436 : * params:
1437 : * none
1438 : * returns:
1439 : * 0 - success
1440 : * non zero - failure
1441 : */
1442 : static int
1443 11 : raid_bdev_init(void)
1444 : {
1445 11 : return 0;
1446 : }
1447 :
1448 : static int
1449 19 : _raid_bdev_create(const char *name, uint32_t strip_size, uint8_t num_base_bdevs,
1450 : enum raid_level level, bool superblock_enabled, const struct spdk_uuid *uuid,
1451 : struct raid_bdev **raid_bdev_out)
1452 : {
1453 : struct raid_bdev *raid_bdev;
1454 : struct spdk_bdev *raid_bdev_gen;
1455 : struct raid_bdev_module *module;
1456 : struct raid_base_bdev_info *base_info;
1457 : uint8_t min_operational;
1458 :
1459 19 : if (strnlen(name, RAID_BDEV_SB_NAME_SIZE) == RAID_BDEV_SB_NAME_SIZE) {
1460 0 : SPDK_ERRLOG("Raid bdev name '%s' exceeds %d characters\n", name, RAID_BDEV_SB_NAME_SIZE - 1);
1461 0 : return -EINVAL;
1462 : }
1463 :
1464 19 : if (raid_bdev_find_by_name(name) != NULL) {
1465 1 : SPDK_ERRLOG("Duplicate raid bdev name found: %s\n", name);
1466 1 : return -EEXIST;
1467 : }
1468 :
1469 18 : if (level == RAID1) {
1470 0 : if (strip_size != 0) {
1471 0 : SPDK_ERRLOG("Strip size is not supported by raid1\n");
1472 0 : return -EINVAL;
1473 : }
1474 18 : } else if (spdk_u32_is_pow2(strip_size) == false) {
1475 1 : SPDK_ERRLOG("Invalid strip size %" PRIu32 "\n", strip_size);
1476 1 : return -EINVAL;
1477 : }
1478 :
1479 17 : module = raid_bdev_module_find(level);
1480 17 : if (module == NULL) {
1481 1 : SPDK_ERRLOG("Unsupported raid level '%d'\n", level);
1482 1 : return -EINVAL;
1483 : }
1484 :
1485 16 : assert(module->base_bdevs_min != 0);
1486 16 : if (num_base_bdevs < module->base_bdevs_min) {
1487 0 : SPDK_ERRLOG("At least %u base devices required for %s\n",
1488 : module->base_bdevs_min,
1489 : raid_bdev_level_to_str(level));
1490 0 : return -EINVAL;
1491 : }
1492 :
1493 16 : switch (module->base_bdevs_constraint.type) {
1494 0 : case CONSTRAINT_MAX_BASE_BDEVS_REMOVED:
1495 0 : min_operational = num_base_bdevs - module->base_bdevs_constraint.value;
1496 0 : break;
1497 0 : case CONSTRAINT_MIN_BASE_BDEVS_OPERATIONAL:
1498 0 : min_operational = module->base_bdevs_constraint.value;
1499 0 : break;
1500 16 : case CONSTRAINT_UNSET:
1501 16 : if (module->base_bdevs_constraint.value != 0) {
1502 0 : SPDK_ERRLOG("Unexpected constraint value '%u' provided for raid bdev '%s'.\n",
1503 : (uint8_t)module->base_bdevs_constraint.value, name);
1504 0 : return -EINVAL;
1505 : }
1506 16 : min_operational = num_base_bdevs;
1507 16 : break;
1508 0 : default:
1509 0 : SPDK_ERRLOG("Unrecognised constraint type '%u' in module for raid level '%s'.\n",
1510 : (uint8_t)module->base_bdevs_constraint.type,
1511 : raid_bdev_level_to_str(module->level));
1512 0 : return -EINVAL;
1513 : };
1514 :
1515 16 : if (min_operational == 0 || min_operational > num_base_bdevs) {
1516 0 : SPDK_ERRLOG("Wrong constraint value for raid level '%s'.\n",
1517 : raid_bdev_level_to_str(module->level));
1518 0 : return -EINVAL;
1519 : }
1520 :
1521 16 : raid_bdev = calloc(1, sizeof(*raid_bdev));
1522 16 : if (!raid_bdev) {
1523 0 : SPDK_ERRLOG("Unable to allocate memory for raid bdev\n");
1524 0 : return -ENOMEM;
1525 : }
1526 :
1527 16 : raid_bdev->module = module;
1528 16 : raid_bdev->num_base_bdevs = num_base_bdevs;
1529 16 : raid_bdev->base_bdev_info = calloc(raid_bdev->num_base_bdevs,
1530 : sizeof(struct raid_base_bdev_info));
1531 16 : if (!raid_bdev->base_bdev_info) {
1532 0 : SPDK_ERRLOG("Unable able to allocate base bdev info\n");
1533 0 : raid_bdev_free(raid_bdev);
1534 0 : return -ENOMEM;
1535 : }
1536 :
1537 528 : RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
1538 512 : base_info->raid_bdev = raid_bdev;
1539 : }
1540 :
1541 : /* strip_size_kb is from the rpc param. strip_size is in blocks and used
1542 : * internally and set later.
1543 : */
1544 16 : raid_bdev->strip_size = 0;
1545 16 : raid_bdev->strip_size_kb = strip_size;
1546 16 : raid_bdev->state = RAID_BDEV_STATE_CONFIGURING;
1547 16 : raid_bdev->level = level;
1548 16 : raid_bdev->min_base_bdevs_operational = min_operational;
1549 16 : raid_bdev->superblock_enabled = superblock_enabled;
1550 :
1551 16 : raid_bdev_gen = &raid_bdev->bdev;
1552 :
1553 16 : raid_bdev_gen->name = strdup(name);
1554 16 : if (!raid_bdev_gen->name) {
1555 0 : SPDK_ERRLOG("Unable to allocate name for raid\n");
1556 0 : raid_bdev_free(raid_bdev);
1557 0 : return -ENOMEM;
1558 : }
1559 :
1560 16 : raid_bdev_gen->product_name = "Raid Volume";
1561 16 : raid_bdev_gen->ctxt = raid_bdev;
1562 16 : raid_bdev_gen->fn_table = &g_raid_bdev_fn_table;
1563 16 : raid_bdev_gen->module = &g_raid_if;
1564 16 : raid_bdev_gen->write_cache = 0;
1565 16 : spdk_uuid_copy(&raid_bdev_gen->uuid, uuid);
1566 :
1567 16 : TAILQ_INSERT_TAIL(&g_raid_bdev_list, raid_bdev, global_link);
1568 :
1569 16 : *raid_bdev_out = raid_bdev;
1570 :
1571 16 : return 0;
1572 : }
1573 :
1574 : /*
1575 : * brief:
1576 : * raid_bdev_create allocates raid bdev based on passed configuration
1577 : * params:
1578 : * name - name for raid bdev
1579 : * strip_size - strip size in KB
1580 : * num_base_bdevs - number of base bdevs
1581 : * level - raid level
1582 : * superblock_enabled - true if raid should have superblock
1583 : * uuid - uuid to set for the bdev
1584 : * raid_bdev_out - the created raid bdev
1585 : * returns:
1586 : * 0 - success
1587 : * non zero - failure
1588 : */
1589 : int
1590 19 : raid_bdev_create(const char *name, uint32_t strip_size, uint8_t num_base_bdevs,
1591 : enum raid_level level, bool superblock_enabled, const struct spdk_uuid *uuid,
1592 : struct raid_bdev **raid_bdev_out)
1593 : {
1594 19 : struct raid_bdev *raid_bdev;
1595 : int rc;
1596 :
1597 19 : assert(uuid != NULL);
1598 :
1599 19 : rc = _raid_bdev_create(name, strip_size, num_base_bdevs, level, superblock_enabled, uuid,
1600 : &raid_bdev);
1601 19 : if (rc != 0) {
1602 3 : return rc;
1603 : }
1604 :
1605 16 : if (superblock_enabled && spdk_uuid_is_null(uuid)) {
1606 : /* we need to have the uuid to store in the superblock before the bdev is registered */
1607 1 : spdk_uuid_generate(&raid_bdev->bdev.uuid);
1608 : }
1609 :
1610 16 : raid_bdev->num_base_bdevs_operational = num_base_bdevs;
1611 :
1612 16 : *raid_bdev_out = raid_bdev;
1613 :
1614 16 : return 0;
1615 : }
1616 :
1617 : static void
1618 0 : _raid_bdev_unregistering_cont(void *ctx)
1619 : {
1620 0 : struct raid_bdev *raid_bdev = ctx;
1621 :
1622 0 : spdk_bdev_close(raid_bdev->self_desc);
1623 0 : raid_bdev->self_desc = NULL;
1624 0 : }
1625 :
1626 : static void
1627 0 : raid_bdev_unregistering_cont(void *ctx)
1628 : {
1629 0 : spdk_thread_exec_msg(spdk_thread_get_app_thread(), _raid_bdev_unregistering_cont, ctx);
1630 0 : }
1631 :
1632 : static int
1633 0 : raid_bdev_process_add_finish_action(struct raid_bdev_process *process, spdk_msg_fn cb, void *cb_ctx)
1634 : {
1635 : struct raid_process_finish_action *finish_action;
1636 :
1637 0 : assert(spdk_get_thread() == process->thread);
1638 0 : assert(process->state < RAID_PROCESS_STATE_STOPPED);
1639 :
1640 0 : finish_action = calloc(1, sizeof(*finish_action));
1641 0 : if (finish_action == NULL) {
1642 0 : return -ENOMEM;
1643 : }
1644 :
1645 0 : finish_action->cb = cb;
1646 0 : finish_action->cb_ctx = cb_ctx;
1647 :
1648 0 : TAILQ_INSERT_TAIL(&process->finish_actions, finish_action, link);
1649 :
1650 0 : return 0;
1651 : }
1652 :
1653 : static void
1654 0 : raid_bdev_unregistering_stop_process(void *ctx)
1655 : {
1656 0 : struct raid_bdev_process *process = ctx;
1657 0 : struct raid_bdev *raid_bdev = process->raid_bdev;
1658 : int rc;
1659 :
1660 0 : process->state = RAID_PROCESS_STATE_STOPPING;
1661 0 : if (process->status == 0) {
1662 0 : process->status = -ECANCELED;
1663 : }
1664 :
1665 0 : rc = raid_bdev_process_add_finish_action(process, raid_bdev_unregistering_cont, raid_bdev);
1666 0 : if (rc != 0) {
1667 0 : SPDK_ERRLOG("Failed to add raid bdev '%s' process finish action: %s\n",
1668 : raid_bdev->bdev.name, spdk_strerror(-rc));
1669 : }
1670 0 : }
1671 :
1672 : static void
1673 0 : raid_bdev_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx)
1674 : {
1675 0 : struct raid_bdev *raid_bdev = event_ctx;
1676 :
1677 0 : if (type == SPDK_BDEV_EVENT_REMOVE) {
1678 0 : if (raid_bdev->process != NULL) {
1679 0 : spdk_thread_send_msg(raid_bdev->process->thread, raid_bdev_unregistering_stop_process,
1680 0 : raid_bdev->process);
1681 : } else {
1682 0 : raid_bdev_unregistering_cont(raid_bdev);
1683 : }
1684 : }
1685 0 : }
1686 :
1687 : static void
1688 13 : raid_bdev_configure_cont(struct raid_bdev *raid_bdev)
1689 : {
1690 13 : struct spdk_bdev *raid_bdev_gen = &raid_bdev->bdev;
1691 : int rc;
1692 :
1693 13 : raid_bdev->state = RAID_BDEV_STATE_ONLINE;
1694 13 : SPDK_DEBUGLOG(bdev_raid, "io device register %p\n", raid_bdev);
1695 13 : SPDK_DEBUGLOG(bdev_raid, "blockcnt %" PRIu64 ", blocklen %u\n",
1696 : raid_bdev_gen->blockcnt, raid_bdev_gen->blocklen);
1697 13 : spdk_io_device_register(raid_bdev, raid_bdev_create_cb, raid_bdev_destroy_cb,
1698 : sizeof(struct raid_bdev_io_channel),
1699 13 : raid_bdev_gen->name);
1700 13 : rc = spdk_bdev_register(raid_bdev_gen);
1701 13 : if (rc != 0) {
1702 0 : SPDK_ERRLOG("Failed to register raid bdev '%s': %s\n",
1703 : raid_bdev_gen->name, spdk_strerror(-rc));
1704 0 : goto err;
1705 : }
1706 :
1707 : /*
1708 : * Open the bdev internally to delay unregistering if we need to stop a background process
1709 : * first. The process may still need to unquiesce a range but it will fail because the
1710 : * bdev's internal.spinlock is destroyed by the time the destruct callback is reached.
1711 : * During application shutdown, bdevs automatically get unregistered by the bdev layer
1712 : * so this is the only way currently to do this correctly.
1713 : * TODO: try to handle this correctly in bdev layer instead.
1714 : */
1715 13 : rc = spdk_bdev_open_ext(raid_bdev_gen->name, false, raid_bdev_event_cb, raid_bdev,
1716 : &raid_bdev->self_desc);
1717 13 : if (rc != 0) {
1718 0 : SPDK_ERRLOG("Failed to open raid bdev '%s': %s\n",
1719 : raid_bdev_gen->name, spdk_strerror(-rc));
1720 0 : spdk_bdev_unregister(raid_bdev_gen, NULL, NULL);
1721 0 : goto err;
1722 : }
1723 :
1724 13 : SPDK_DEBUGLOG(bdev_raid, "raid bdev generic %p\n", raid_bdev_gen);
1725 13 : SPDK_DEBUGLOG(bdev_raid, "raid bdev is created with name %s, raid_bdev %p\n",
1726 : raid_bdev_gen->name, raid_bdev);
1727 13 : return;
1728 0 : err:
1729 0 : if (raid_bdev->module->stop != NULL) {
1730 0 : raid_bdev->module->stop(raid_bdev);
1731 : }
1732 0 : spdk_io_device_unregister(raid_bdev, NULL);
1733 0 : raid_bdev->state = RAID_BDEV_STATE_CONFIGURING;
1734 : }
1735 :
1736 : static void
1737 1 : raid_bdev_configure_write_sb_cb(int status, struct raid_bdev *raid_bdev, void *ctx)
1738 : {
1739 1 : if (status == 0) {
1740 1 : raid_bdev_configure_cont(raid_bdev);
1741 : } else {
1742 0 : SPDK_ERRLOG("Failed to write raid bdev '%s' superblock: %s\n",
1743 : raid_bdev->bdev.name, spdk_strerror(-status));
1744 0 : if (raid_bdev->module->stop != NULL) {
1745 0 : raid_bdev->module->stop(raid_bdev);
1746 : }
1747 : }
1748 1 : }
1749 :
1750 : /*
1751 : * brief:
1752 : * If raid bdev config is complete, then only register the raid bdev to
1753 : * bdev layer and remove this raid bdev from configuring list and
1754 : * insert the raid bdev to configured list
1755 : * params:
1756 : * raid_bdev - pointer to raid bdev
1757 : * returns:
1758 : * 0 - success
1759 : * non zero - failure
1760 : */
1761 : static int
1762 13 : raid_bdev_configure(struct raid_bdev *raid_bdev)
1763 : {
1764 13 : uint32_t data_block_size = spdk_bdev_get_data_block_size(&raid_bdev->bdev);
1765 : int rc;
1766 :
1767 13 : assert(raid_bdev->state == RAID_BDEV_STATE_CONFIGURING);
1768 13 : assert(raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs_operational);
1769 13 : assert(raid_bdev->bdev.blocklen > 0);
1770 :
1771 : /* The strip_size_kb is read in from user in KB. Convert to blocks here for
1772 : * internal use.
1773 : */
1774 13 : raid_bdev->strip_size = (raid_bdev->strip_size_kb * 1024) / data_block_size;
1775 13 : if (raid_bdev->strip_size == 0 && raid_bdev->level != RAID1) {
1776 0 : SPDK_ERRLOG("Strip size cannot be smaller than the device block size\n");
1777 0 : return -EINVAL;
1778 : }
1779 13 : raid_bdev->strip_size_shift = spdk_u32log2(raid_bdev->strip_size);
1780 :
1781 13 : rc = raid_bdev->module->start(raid_bdev);
1782 13 : if (rc != 0) {
1783 0 : SPDK_ERRLOG("raid module startup callback failed\n");
1784 0 : return rc;
1785 : }
1786 :
1787 13 : if (raid_bdev->superblock_enabled) {
1788 1 : if (raid_bdev->sb == NULL) {
1789 1 : rc = raid_bdev_alloc_superblock(raid_bdev, data_block_size);
1790 1 : if (rc == 0) {
1791 1 : raid_bdev_init_superblock(raid_bdev);
1792 : }
1793 : } else {
1794 0 : assert(spdk_uuid_compare(&raid_bdev->sb->uuid, &raid_bdev->bdev.uuid) == 0);
1795 0 : if (raid_bdev->sb->block_size != data_block_size) {
1796 0 : SPDK_ERRLOG("blocklen does not match value in superblock\n");
1797 0 : rc = -EINVAL;
1798 : }
1799 0 : if (raid_bdev->sb->raid_size != raid_bdev->bdev.blockcnt) {
1800 0 : SPDK_ERRLOG("blockcnt does not match value in superblock\n");
1801 0 : rc = -EINVAL;
1802 : }
1803 : }
1804 :
1805 1 : if (rc != 0) {
1806 0 : if (raid_bdev->module->stop != NULL) {
1807 0 : raid_bdev->module->stop(raid_bdev);
1808 : }
1809 0 : return rc;
1810 : }
1811 :
1812 1 : raid_bdev_write_superblock(raid_bdev, raid_bdev_configure_write_sb_cb, NULL);
1813 : } else {
1814 12 : raid_bdev_configure_cont(raid_bdev);
1815 : }
1816 :
1817 13 : return 0;
1818 : }
1819 :
1820 : /*
1821 : * brief:
1822 : * If raid bdev is online and registered, change the bdev state to
1823 : * configuring and unregister this raid device. Queue this raid device
1824 : * in configuring list
1825 : * params:
1826 : * raid_bdev - pointer to raid bdev
1827 : * cb_fn - callback function
1828 : * cb_arg - argument to callback function
1829 : * returns:
1830 : * none
1831 : */
1832 : static void
1833 13 : raid_bdev_deconfigure(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn,
1834 : void *cb_arg)
1835 : {
1836 13 : if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) {
1837 0 : if (cb_fn) {
1838 0 : cb_fn(cb_arg, 0);
1839 : }
1840 0 : return;
1841 : }
1842 :
1843 13 : raid_bdev->state = RAID_BDEV_STATE_OFFLINE;
1844 13 : SPDK_DEBUGLOG(bdev_raid, "raid bdev state changing from online to offline\n");
1845 :
1846 13 : spdk_bdev_unregister(&raid_bdev->bdev, cb_fn, cb_arg);
1847 : }
1848 :
1849 : /*
1850 : * brief:
1851 : * raid_bdev_find_base_info_by_bdev function finds the base bdev info by bdev.
1852 : * params:
1853 : * base_bdev - pointer to base bdev
1854 : * returns:
1855 : * base bdev info if found, otherwise NULL.
1856 : */
1857 : static struct raid_base_bdev_info *
1858 0 : raid_bdev_find_base_info_by_bdev(struct spdk_bdev *base_bdev)
1859 : {
1860 : struct raid_bdev *raid_bdev;
1861 : struct raid_base_bdev_info *base_info;
1862 :
1863 0 : TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) {
1864 0 : RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
1865 0 : if (base_info->desc != NULL &&
1866 0 : spdk_bdev_desc_get_bdev(base_info->desc) == base_bdev) {
1867 0 : return base_info;
1868 : }
1869 : }
1870 : }
1871 :
1872 0 : return NULL;
1873 : }
1874 :
1875 : static void
1876 0 : raid_bdev_remove_base_bdev_done(struct raid_base_bdev_info *base_info, int status)
1877 : {
1878 0 : struct raid_bdev *raid_bdev = base_info->raid_bdev;
1879 :
1880 0 : assert(base_info->remove_scheduled);
1881 0 : base_info->remove_scheduled = false;
1882 :
1883 0 : if (status == 0) {
1884 0 : raid_bdev->num_base_bdevs_operational--;
1885 0 : if (raid_bdev->num_base_bdevs_operational < raid_bdev->min_base_bdevs_operational) {
1886 : /* There is not enough base bdevs to keep the raid bdev operational. */
1887 0 : raid_bdev_deconfigure(raid_bdev, base_info->remove_cb, base_info->remove_cb_ctx);
1888 0 : return;
1889 : }
1890 : }
1891 :
1892 0 : if (base_info->remove_cb != NULL) {
1893 0 : base_info->remove_cb(base_info->remove_cb_ctx, status);
1894 : }
1895 : }
1896 :
1897 : static void
1898 0 : raid_bdev_remove_base_bdev_on_unquiesced(void *ctx, int status)
1899 : {
1900 0 : struct raid_base_bdev_info *base_info = ctx;
1901 0 : struct raid_bdev *raid_bdev = base_info->raid_bdev;
1902 :
1903 0 : if (status != 0) {
1904 0 : SPDK_ERRLOG("Failed to unquiesce raid bdev %s: %s\n",
1905 : raid_bdev->bdev.name, spdk_strerror(-status));
1906 : }
1907 :
1908 0 : raid_bdev_remove_base_bdev_done(base_info, status);
1909 0 : }
1910 :
1911 : static void
1912 0 : raid_bdev_channel_remove_base_bdev(struct spdk_io_channel_iter *i)
1913 : {
1914 0 : struct raid_base_bdev_info *base_info = spdk_io_channel_iter_get_ctx(i);
1915 0 : struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
1916 0 : struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(ch);
1917 0 : uint8_t idx = raid_bdev_base_bdev_slot(base_info);
1918 :
1919 0 : SPDK_DEBUGLOG(bdev_raid, "slot: %u raid_ch: %p\n", idx, raid_ch);
1920 :
1921 0 : if (raid_ch->base_channel[idx] != NULL) {
1922 0 : spdk_put_io_channel(raid_ch->base_channel[idx]);
1923 0 : raid_ch->base_channel[idx] = NULL;
1924 : }
1925 :
1926 0 : if (raid_ch->process.ch_processed != NULL) {
1927 0 : raid_ch->process.ch_processed->base_channel[idx] = NULL;
1928 : }
1929 :
1930 0 : spdk_for_each_channel_continue(i, 0);
1931 0 : }
1932 :
1933 : static void
1934 0 : raid_bdev_channels_remove_base_bdev_done(struct spdk_io_channel_iter *i, int status)
1935 : {
1936 0 : struct raid_base_bdev_info *base_info = spdk_io_channel_iter_get_ctx(i);
1937 0 : struct raid_bdev *raid_bdev = base_info->raid_bdev;
1938 :
1939 0 : raid_bdev_free_base_bdev_resource(base_info);
1940 :
1941 0 : spdk_bdev_unquiesce(&raid_bdev->bdev, &g_raid_if, raid_bdev_remove_base_bdev_on_unquiesced,
1942 : base_info);
1943 0 : }
1944 :
1945 : static void
1946 0 : raid_bdev_remove_base_bdev_cont(struct raid_base_bdev_info *base_info)
1947 : {
1948 0 : raid_bdev_deconfigure_base_bdev(base_info);
1949 :
1950 0 : spdk_for_each_channel(base_info->raid_bdev, raid_bdev_channel_remove_base_bdev, base_info,
1951 : raid_bdev_channels_remove_base_bdev_done);
1952 0 : }
1953 :
1954 : static void
1955 0 : raid_bdev_remove_base_bdev_write_sb_cb(int status, struct raid_bdev *raid_bdev, void *ctx)
1956 : {
1957 0 : struct raid_base_bdev_info *base_info = ctx;
1958 :
1959 0 : if (status != 0) {
1960 0 : SPDK_ERRLOG("Failed to write raid bdev '%s' superblock: %s\n",
1961 : raid_bdev->bdev.name, spdk_strerror(-status));
1962 0 : raid_bdev_remove_base_bdev_done(base_info, status);
1963 0 : return;
1964 : }
1965 :
1966 0 : raid_bdev_remove_base_bdev_cont(base_info);
1967 : }
1968 :
1969 : static void
1970 0 : raid_bdev_remove_base_bdev_on_quiesced(void *ctx, int status)
1971 : {
1972 0 : struct raid_base_bdev_info *base_info = ctx;
1973 0 : struct raid_bdev *raid_bdev = base_info->raid_bdev;
1974 :
1975 0 : if (status != 0) {
1976 0 : SPDK_ERRLOG("Failed to quiesce raid bdev %s: %s\n",
1977 : raid_bdev->bdev.name, spdk_strerror(-status));
1978 0 : raid_bdev_remove_base_bdev_done(base_info, status);
1979 0 : return;
1980 : }
1981 :
1982 0 : if (raid_bdev->sb) {
1983 0 : struct raid_bdev_superblock *sb = raid_bdev->sb;
1984 0 : uint8_t slot = raid_bdev_base_bdev_slot(base_info);
1985 : uint8_t i;
1986 :
1987 0 : for (i = 0; i < sb->base_bdevs_size; i++) {
1988 0 : struct raid_bdev_sb_base_bdev *sb_base_bdev = &sb->base_bdevs[i];
1989 :
1990 0 : if (sb_base_bdev->state == RAID_SB_BASE_BDEV_CONFIGURED &&
1991 0 : sb_base_bdev->slot == slot) {
1992 0 : if (base_info->is_failed) {
1993 0 : sb_base_bdev->state = RAID_SB_BASE_BDEV_FAILED;
1994 : } else {
1995 0 : sb_base_bdev->state = RAID_SB_BASE_BDEV_MISSING;
1996 : }
1997 :
1998 0 : raid_bdev_write_superblock(raid_bdev, raid_bdev_remove_base_bdev_write_sb_cb, base_info);
1999 0 : return;
2000 : }
2001 : }
2002 : }
2003 :
2004 0 : raid_bdev_remove_base_bdev_cont(base_info);
2005 : }
2006 :
2007 : static int
2008 0 : raid_bdev_remove_base_bdev_quiesce(struct raid_base_bdev_info *base_info)
2009 : {
2010 0 : assert(spdk_get_thread() == spdk_thread_get_app_thread());
2011 :
2012 0 : return spdk_bdev_quiesce(&base_info->raid_bdev->bdev, &g_raid_if,
2013 : raid_bdev_remove_base_bdev_on_quiesced, base_info);
2014 : }
2015 :
2016 : struct raid_bdev_process_base_bdev_remove_ctx {
2017 : struct raid_bdev_process *process;
2018 : struct raid_base_bdev_info *base_info;
2019 : uint8_t num_base_bdevs_operational;
2020 : };
2021 :
2022 : static void
2023 0 : _raid_bdev_process_base_bdev_remove_cont(void *ctx)
2024 : {
2025 0 : struct raid_base_bdev_info *base_info = ctx;
2026 : int ret;
2027 :
2028 0 : ret = raid_bdev_remove_base_bdev_quiesce(base_info);
2029 0 : if (ret != 0) {
2030 0 : raid_bdev_remove_base_bdev_done(base_info, ret);
2031 : }
2032 0 : }
2033 :
2034 : static void
2035 0 : raid_bdev_process_base_bdev_remove_cont(void *_ctx)
2036 : {
2037 0 : struct raid_bdev_process_base_bdev_remove_ctx *ctx = _ctx;
2038 0 : struct raid_base_bdev_info *base_info = ctx->base_info;
2039 :
2040 0 : free(ctx);
2041 :
2042 0 : spdk_thread_send_msg(spdk_thread_get_app_thread(), _raid_bdev_process_base_bdev_remove_cont,
2043 : base_info);
2044 0 : }
2045 :
2046 : static void
2047 0 : _raid_bdev_process_base_bdev_remove(void *_ctx)
2048 : {
2049 0 : struct raid_bdev_process_base_bdev_remove_ctx *ctx = _ctx;
2050 0 : struct raid_bdev_process *process = ctx->process;
2051 : int ret;
2052 :
2053 0 : if (ctx->base_info != process->target &&
2054 0 : ctx->num_base_bdevs_operational > process->raid_bdev->min_base_bdevs_operational) {
2055 : /* process doesn't need to be stopped */
2056 0 : raid_bdev_process_base_bdev_remove_cont(ctx);
2057 0 : return;
2058 : }
2059 :
2060 0 : assert(process->state > RAID_PROCESS_STATE_INIT &&
2061 : process->state < RAID_PROCESS_STATE_STOPPED);
2062 :
2063 0 : ret = raid_bdev_process_add_finish_action(process, raid_bdev_process_base_bdev_remove_cont, ctx);
2064 0 : if (ret != 0) {
2065 0 : raid_bdev_remove_base_bdev_done(ctx->base_info, ret);
2066 0 : free(ctx);
2067 0 : return;
2068 : }
2069 :
2070 0 : process->state = RAID_PROCESS_STATE_STOPPING;
2071 :
2072 0 : if (process->status == 0) {
2073 0 : process->status = -ENODEV;
2074 : }
2075 : }
2076 :
2077 : static int
2078 0 : raid_bdev_process_base_bdev_remove(struct raid_bdev_process *process,
2079 : struct raid_base_bdev_info *base_info)
2080 : {
2081 : struct raid_bdev_process_base_bdev_remove_ctx *ctx;
2082 :
2083 0 : assert(spdk_get_thread() == spdk_thread_get_app_thread());
2084 :
2085 0 : ctx = calloc(1, sizeof(*ctx));
2086 0 : if (ctx == NULL) {
2087 0 : return -ENOMEM;
2088 : }
2089 :
2090 : /*
2091 : * We have to send the process and num_base_bdevs_operational in the message ctx
2092 : * because the process thread should not access raid_bdev's properties. Particularly,
2093 : * raid_bdev->process may be cleared by the time the message is handled, but ctx->process
2094 : * will still be valid until the process is fully stopped.
2095 : */
2096 0 : ctx->base_info = base_info;
2097 0 : ctx->process = process;
2098 : /*
2099 : * raid_bdev->num_base_bdevs_operational can't be used here because it is decremented
2100 : * after the removal and more than one base bdev may be removed at the same time
2101 : */
2102 0 : RAID_FOR_EACH_BASE_BDEV(process->raid_bdev, base_info) {
2103 0 : if (base_info->is_configured && !base_info->remove_scheduled) {
2104 0 : ctx->num_base_bdevs_operational++;
2105 : }
2106 : }
2107 :
2108 0 : spdk_thread_send_msg(process->thread, _raid_bdev_process_base_bdev_remove, ctx);
2109 :
2110 0 : return 0;
2111 : }
2112 :
2113 : static int
2114 0 : _raid_bdev_remove_base_bdev(struct raid_base_bdev_info *base_info,
2115 : raid_base_bdev_cb cb_fn, void *cb_ctx)
2116 : {
2117 0 : struct raid_bdev *raid_bdev = base_info->raid_bdev;
2118 0 : int ret = 0;
2119 :
2120 0 : SPDK_DEBUGLOG(bdev_raid, "%s\n", base_info->name);
2121 :
2122 0 : assert(spdk_get_thread() == spdk_thread_get_app_thread());
2123 :
2124 0 : if (base_info->remove_scheduled || !base_info->is_configured) {
2125 0 : return -ENODEV;
2126 : }
2127 :
2128 0 : assert(base_info->desc);
2129 0 : base_info->remove_scheduled = true;
2130 :
2131 0 : if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) {
2132 : /*
2133 : * As raid bdev is not registered yet or already unregistered,
2134 : * so cleanup should be done here itself.
2135 : *
2136 : * Removing a base bdev at this stage does not change the number of operational
2137 : * base bdevs, only the number of discovered base bdevs.
2138 : */
2139 0 : raid_bdev_free_base_bdev_resource(base_info);
2140 0 : base_info->remove_scheduled = false;
2141 0 : if (raid_bdev->num_base_bdevs_discovered == 0 &&
2142 0 : raid_bdev->state == RAID_BDEV_STATE_OFFLINE) {
2143 : /* There is no base bdev for this raid, so free the raid device. */
2144 0 : raid_bdev_cleanup_and_free(raid_bdev);
2145 : }
2146 0 : if (cb_fn != NULL) {
2147 0 : cb_fn(cb_ctx, 0);
2148 : }
2149 0 : } else if (raid_bdev->min_base_bdevs_operational == raid_bdev->num_base_bdevs) {
2150 : /* This raid bdev does not tolerate removing a base bdev. */
2151 0 : raid_bdev->num_base_bdevs_operational--;
2152 0 : raid_bdev_deconfigure(raid_bdev, cb_fn, cb_ctx);
2153 : } else {
2154 0 : base_info->remove_cb = cb_fn;
2155 0 : base_info->remove_cb_ctx = cb_ctx;
2156 :
2157 0 : if (raid_bdev->process != NULL) {
2158 0 : ret = raid_bdev_process_base_bdev_remove(raid_bdev->process, base_info);
2159 : } else {
2160 0 : ret = raid_bdev_remove_base_bdev_quiesce(base_info);
2161 : }
2162 :
2163 0 : if (ret != 0) {
2164 0 : base_info->remove_scheduled = false;
2165 : }
2166 : }
2167 :
2168 0 : return ret;
2169 : }
2170 :
2171 : /*
2172 : * brief:
2173 : * raid_bdev_remove_base_bdev function is called by below layers when base_bdev
2174 : * is removed. This function checks if this base bdev is part of any raid bdev
2175 : * or not. If yes, it takes necessary action on that particular raid bdev.
2176 : * params:
2177 : * base_bdev - pointer to base bdev which got removed
2178 : * cb_fn - callback function
2179 : * cb_arg - argument to callback function
2180 : * returns:
2181 : * 0 - success
2182 : * non zero - failure
2183 : */
2184 : int
2185 0 : raid_bdev_remove_base_bdev(struct spdk_bdev *base_bdev, raid_base_bdev_cb cb_fn, void *cb_ctx)
2186 : {
2187 : struct raid_base_bdev_info *base_info;
2188 :
2189 : /* Find the raid_bdev which has claimed this base_bdev */
2190 0 : base_info = raid_bdev_find_base_info_by_bdev(base_bdev);
2191 0 : if (!base_info) {
2192 0 : SPDK_ERRLOG("bdev to remove '%s' not found\n", base_bdev->name);
2193 0 : return -ENODEV;
2194 : }
2195 :
2196 0 : return _raid_bdev_remove_base_bdev(base_info, cb_fn, cb_ctx);
2197 : }
2198 :
2199 : static void
2200 0 : raid_bdev_fail_base_remove_cb(void *ctx, int status)
2201 : {
2202 0 : struct raid_base_bdev_info *base_info = ctx;
2203 :
2204 0 : if (status != 0) {
2205 0 : SPDK_WARNLOG("Failed to remove base bdev %s\n", base_info->name);
2206 0 : base_info->is_failed = false;
2207 : }
2208 0 : }
2209 :
2210 : static void
2211 0 : _raid_bdev_fail_base_bdev(void *ctx)
2212 : {
2213 0 : struct raid_base_bdev_info *base_info = ctx;
2214 : int rc;
2215 :
2216 0 : if (base_info->is_failed) {
2217 0 : return;
2218 : }
2219 0 : base_info->is_failed = true;
2220 :
2221 0 : SPDK_NOTICELOG("Failing base bdev in slot %d ('%s') of raid bdev '%s'\n",
2222 : raid_bdev_base_bdev_slot(base_info), base_info->name, base_info->raid_bdev->bdev.name);
2223 :
2224 0 : rc = _raid_bdev_remove_base_bdev(base_info, raid_bdev_fail_base_remove_cb, base_info);
2225 0 : if (rc != 0) {
2226 0 : raid_bdev_fail_base_remove_cb(base_info, rc);
2227 : }
2228 : }
2229 :
2230 : void
2231 0 : raid_bdev_fail_base_bdev(struct raid_base_bdev_info *base_info)
2232 : {
2233 0 : spdk_thread_exec_msg(spdk_thread_get_app_thread(), _raid_bdev_fail_base_bdev, base_info);
2234 0 : }
2235 :
2236 : static void
2237 0 : raid_bdev_resize_write_sb_cb(int status, struct raid_bdev *raid_bdev, void *ctx)
2238 : {
2239 0 : if (status != 0) {
2240 0 : SPDK_ERRLOG("Failed to write raid bdev '%s' superblock after resizing the bdev: %s\n",
2241 : raid_bdev->bdev.name, spdk_strerror(-status));
2242 : }
2243 0 : }
2244 :
2245 : /*
2246 : * brief:
2247 : * raid_bdev_resize_base_bdev function is called by below layers when base_bdev
2248 : * is resized. This function checks if the smallest size of the base_bdevs is changed.
2249 : * If yes, call module handler to resize the raid_bdev if implemented.
2250 : * params:
2251 : * base_bdev - pointer to base bdev which got resized.
2252 : * returns:
2253 : * none
2254 : */
2255 : static void
2256 0 : raid_bdev_resize_base_bdev(struct spdk_bdev *base_bdev)
2257 : {
2258 : struct raid_bdev *raid_bdev;
2259 : struct raid_base_bdev_info *base_info;
2260 : uint64_t blockcnt_old;
2261 :
2262 0 : SPDK_DEBUGLOG(bdev_raid, "raid_bdev_resize_base_bdev\n");
2263 :
2264 0 : base_info = raid_bdev_find_base_info_by_bdev(base_bdev);
2265 :
2266 : /* Find the raid_bdev which has claimed this base_bdev */
2267 0 : if (!base_info) {
2268 0 : SPDK_ERRLOG("raid_bdev whose base_bdev '%s' not found\n", base_bdev->name);
2269 0 : return;
2270 : }
2271 0 : raid_bdev = base_info->raid_bdev;
2272 :
2273 0 : assert(spdk_get_thread() == spdk_thread_get_app_thread());
2274 :
2275 0 : SPDK_NOTICELOG("base_bdev '%s' was resized: old size %" PRIu64 ", new size %" PRIu64 "\n",
2276 : base_bdev->name, base_info->blockcnt, base_bdev->blockcnt);
2277 :
2278 0 : base_info->blockcnt = base_bdev->blockcnt;
2279 :
2280 0 : if (!raid_bdev->module->resize) {
2281 0 : return;
2282 : }
2283 :
2284 0 : blockcnt_old = raid_bdev->bdev.blockcnt;
2285 0 : if (raid_bdev->module->resize(raid_bdev) == false) {
2286 0 : return;
2287 : }
2288 :
2289 0 : SPDK_NOTICELOG("raid bdev '%s': block count was changed from %" PRIu64 " to %" PRIu64 "\n",
2290 : raid_bdev->bdev.name, blockcnt_old, raid_bdev->bdev.blockcnt);
2291 :
2292 0 : if (raid_bdev->superblock_enabled) {
2293 0 : struct raid_bdev_superblock *sb = raid_bdev->sb;
2294 : uint8_t i;
2295 :
2296 0 : for (i = 0; i < sb->base_bdevs_size; i++) {
2297 0 : struct raid_bdev_sb_base_bdev *sb_base_bdev = &sb->base_bdevs[i];
2298 :
2299 0 : if (sb_base_bdev->state == RAID_SB_BASE_BDEV_CONFIGURED) {
2300 0 : base_info = &raid_bdev->base_bdev_info[sb_base_bdev->slot];
2301 0 : sb_base_bdev->data_size = base_info->data_size;
2302 : }
2303 : }
2304 0 : sb->raid_size = raid_bdev->bdev.blockcnt;
2305 0 : raid_bdev_write_superblock(raid_bdev, raid_bdev_resize_write_sb_cb, NULL);
2306 : }
2307 : }
2308 :
2309 : /*
2310 : * brief:
2311 : * raid_bdev_event_base_bdev function is called by below layers when base_bdev
2312 : * triggers asynchronous event.
2313 : * params:
2314 : * type - event details.
2315 : * bdev - bdev that triggered event.
2316 : * event_ctx - context for event.
2317 : * returns:
2318 : * none
2319 : */
2320 : static void
2321 0 : raid_bdev_event_base_bdev(enum spdk_bdev_event_type type, struct spdk_bdev *bdev,
2322 : void *event_ctx)
2323 : {
2324 : int rc;
2325 :
2326 0 : switch (type) {
2327 0 : case SPDK_BDEV_EVENT_REMOVE:
2328 0 : rc = raid_bdev_remove_base_bdev(bdev, NULL, NULL);
2329 0 : if (rc != 0) {
2330 0 : SPDK_ERRLOG("Failed to remove base bdev %s: %s\n",
2331 : spdk_bdev_get_name(bdev), spdk_strerror(-rc));
2332 : }
2333 0 : break;
2334 0 : case SPDK_BDEV_EVENT_RESIZE:
2335 0 : raid_bdev_resize_base_bdev(bdev);
2336 0 : break;
2337 0 : default:
2338 0 : SPDK_NOTICELOG("Unsupported bdev event: type %d\n", type);
2339 0 : break;
2340 : }
2341 0 : }
2342 :
2343 : /*
2344 : * brief:
2345 : * Deletes the specified raid bdev
2346 : * params:
2347 : * raid_bdev - pointer to raid bdev
2348 : * cb_fn - callback function
2349 : * cb_arg - argument to callback function
2350 : */
2351 : void
2352 15 : raid_bdev_delete(struct raid_bdev *raid_bdev, raid_bdev_destruct_cb cb_fn, void *cb_arg)
2353 : {
2354 : struct raid_base_bdev_info *base_info;
2355 :
2356 15 : SPDK_DEBUGLOG(bdev_raid, "delete raid bdev: %s\n", raid_bdev->bdev.name);
2357 :
2358 15 : if (raid_bdev->destroy_started) {
2359 0 : SPDK_DEBUGLOG(bdev_raid, "destroying raid bdev %s is already started\n",
2360 : raid_bdev->bdev.name);
2361 0 : if (cb_fn) {
2362 0 : cb_fn(cb_arg, -EALREADY);
2363 : }
2364 0 : return;
2365 : }
2366 :
2367 15 : raid_bdev->destroy_started = true;
2368 :
2369 495 : RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
2370 480 : base_info->remove_scheduled = true;
2371 :
2372 480 : if (raid_bdev->state != RAID_BDEV_STATE_ONLINE) {
2373 : /*
2374 : * As raid bdev is not registered yet or already unregistered,
2375 : * so cleanup should be done here itself.
2376 : */
2377 64 : raid_bdev_free_base_bdev_resource(base_info);
2378 : }
2379 : }
2380 :
2381 15 : if (raid_bdev->num_base_bdevs_discovered == 0) {
2382 : /* There is no base bdev for this raid, so free the raid device. */
2383 2 : raid_bdev_cleanup_and_free(raid_bdev);
2384 2 : if (cb_fn) {
2385 0 : cb_fn(cb_arg, 0);
2386 : }
2387 : } else {
2388 13 : raid_bdev_deconfigure(raid_bdev, cb_fn, cb_arg);
2389 : }
2390 : }
2391 :
2392 : static void
2393 0 : raid_bdev_process_finish_write_sb_cb(int status, struct raid_bdev *raid_bdev, void *ctx)
2394 : {
2395 0 : if (status != 0) {
2396 0 : SPDK_ERRLOG("Failed to write raid bdev '%s' superblock after background process finished: %s\n",
2397 : raid_bdev->bdev.name, spdk_strerror(-status));
2398 : }
2399 0 : }
2400 :
2401 : static void
2402 0 : raid_bdev_process_finish_write_sb(void *ctx)
2403 : {
2404 0 : struct raid_bdev *raid_bdev = ctx;
2405 0 : struct raid_bdev_superblock *sb = raid_bdev->sb;
2406 : struct raid_bdev_sb_base_bdev *sb_base_bdev;
2407 : struct raid_base_bdev_info *base_info;
2408 : uint8_t i;
2409 :
2410 0 : for (i = 0; i < sb->base_bdevs_size; i++) {
2411 0 : sb_base_bdev = &sb->base_bdevs[i];
2412 :
2413 0 : if (sb_base_bdev->state != RAID_SB_BASE_BDEV_CONFIGURED &&
2414 0 : sb_base_bdev->slot < raid_bdev->num_base_bdevs) {
2415 0 : base_info = &raid_bdev->base_bdev_info[sb_base_bdev->slot];
2416 0 : if (base_info->is_configured) {
2417 0 : sb_base_bdev->state = RAID_SB_BASE_BDEV_CONFIGURED;
2418 0 : spdk_uuid_copy(&sb_base_bdev->uuid, &base_info->uuid);
2419 : }
2420 : }
2421 : }
2422 :
2423 0 : raid_bdev_write_superblock(raid_bdev, raid_bdev_process_finish_write_sb_cb, NULL);
2424 0 : }
2425 :
2426 : static void raid_bdev_process_free(struct raid_bdev_process *process);
2427 :
2428 : static void
2429 1 : _raid_bdev_process_finish_done(void *ctx)
2430 : {
2431 1 : struct raid_bdev_process *process = ctx;
2432 : struct raid_process_finish_action *finish_action;
2433 :
2434 1 : while ((finish_action = TAILQ_FIRST(&process->finish_actions)) != NULL) {
2435 0 : TAILQ_REMOVE(&process->finish_actions, finish_action, link);
2436 0 : finish_action->cb(finish_action->cb_ctx);
2437 0 : free(finish_action);
2438 : }
2439 :
2440 1 : raid_bdev_process_free(process);
2441 :
2442 1 : spdk_thread_exit(spdk_get_thread());
2443 1 : }
2444 :
2445 : static void
2446 0 : raid_bdev_process_finish_target_removed(void *ctx, int status)
2447 : {
2448 0 : struct raid_bdev_process *process = ctx;
2449 :
2450 0 : if (status != 0) {
2451 0 : SPDK_ERRLOG("Failed to remove target bdev: %s\n", spdk_strerror(-status));
2452 : }
2453 :
2454 0 : spdk_thread_send_msg(process->thread, _raid_bdev_process_finish_done, process);
2455 0 : }
2456 :
2457 : static void
2458 1 : raid_bdev_process_finish_unquiesced(void *ctx, int status)
2459 : {
2460 1 : struct raid_bdev_process *process = ctx;
2461 :
2462 1 : if (status != 0) {
2463 0 : SPDK_ERRLOG("Failed to unquiesce bdev: %s\n", spdk_strerror(-status));
2464 : }
2465 :
2466 1 : if (process->status != 0) {
2467 0 : status = _raid_bdev_remove_base_bdev(process->target, raid_bdev_process_finish_target_removed,
2468 : process);
2469 0 : if (status != 0) {
2470 0 : raid_bdev_process_finish_target_removed(process, status);
2471 : }
2472 0 : return;
2473 : }
2474 :
2475 1 : spdk_thread_send_msg(process->thread, _raid_bdev_process_finish_done, process);
2476 : }
2477 :
2478 : static void
2479 1 : raid_bdev_process_finish_unquiesce(void *ctx)
2480 : {
2481 1 : struct raid_bdev_process *process = ctx;
2482 : int rc;
2483 :
2484 1 : rc = spdk_bdev_unquiesce(&process->raid_bdev->bdev, &g_raid_if,
2485 : raid_bdev_process_finish_unquiesced, process);
2486 1 : if (rc != 0) {
2487 0 : raid_bdev_process_finish_unquiesced(process, rc);
2488 : }
2489 1 : }
2490 :
2491 : static void
2492 1 : raid_bdev_process_finish_done(void *ctx)
2493 : {
2494 1 : struct raid_bdev_process *process = ctx;
2495 1 : struct raid_bdev *raid_bdev = process->raid_bdev;
2496 :
2497 1 : if (process->raid_ch != NULL) {
2498 1 : spdk_put_io_channel(spdk_io_channel_from_ctx(process->raid_ch));
2499 : }
2500 :
2501 1 : process->state = RAID_PROCESS_STATE_STOPPED;
2502 :
2503 1 : if (process->status == 0) {
2504 1 : SPDK_NOTICELOG("Finished %s on raid bdev %s\n",
2505 : raid_bdev_process_to_str(process->type),
2506 : raid_bdev->bdev.name);
2507 1 : if (raid_bdev->superblock_enabled) {
2508 0 : spdk_thread_send_msg(spdk_thread_get_app_thread(),
2509 : raid_bdev_process_finish_write_sb,
2510 : raid_bdev);
2511 : }
2512 : } else {
2513 0 : SPDK_WARNLOG("Finished %s on raid bdev %s: %s\n",
2514 : raid_bdev_process_to_str(process->type),
2515 : raid_bdev->bdev.name,
2516 : spdk_strerror(-process->status));
2517 : }
2518 :
2519 1 : spdk_thread_send_msg(spdk_thread_get_app_thread(), raid_bdev_process_finish_unquiesce,
2520 : process);
2521 1 : }
2522 :
2523 : static void
2524 1 : __raid_bdev_process_finish(struct spdk_io_channel_iter *i, int status)
2525 : {
2526 1 : struct raid_bdev_process *process = spdk_io_channel_iter_get_ctx(i);
2527 :
2528 1 : spdk_thread_send_msg(process->thread, raid_bdev_process_finish_done, process);
2529 1 : }
2530 :
2531 : static void
2532 1 : raid_bdev_channel_process_finish(struct spdk_io_channel_iter *i)
2533 : {
2534 1 : struct raid_bdev_process *process = spdk_io_channel_iter_get_ctx(i);
2535 1 : struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
2536 1 : struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(ch);
2537 :
2538 1 : if (process->status == 0) {
2539 1 : uint8_t slot = raid_bdev_base_bdev_slot(process->target);
2540 :
2541 1 : raid_ch->base_channel[slot] = raid_ch->process.target_ch;
2542 1 : raid_ch->process.target_ch = NULL;
2543 : }
2544 :
2545 1 : raid_bdev_ch_process_cleanup(raid_ch);
2546 :
2547 1 : spdk_for_each_channel_continue(i, 0);
2548 1 : }
2549 :
2550 : static void
2551 1 : raid_bdev_process_finish_quiesced(void *ctx, int status)
2552 : {
2553 1 : struct raid_bdev_process *process = ctx;
2554 1 : struct raid_bdev *raid_bdev = process->raid_bdev;
2555 :
2556 1 : if (status != 0) {
2557 0 : SPDK_ERRLOG("Failed to quiesce bdev: %s\n", spdk_strerror(-status));
2558 0 : return;
2559 : }
2560 :
2561 1 : raid_bdev->process = NULL;
2562 1 : process->target->is_process_target = false;
2563 :
2564 1 : spdk_for_each_channel(process->raid_bdev, raid_bdev_channel_process_finish, process,
2565 : __raid_bdev_process_finish);
2566 : }
2567 :
2568 : static void
2569 1 : _raid_bdev_process_finish(void *ctx)
2570 : {
2571 1 : struct raid_bdev_process *process = ctx;
2572 : int rc;
2573 :
2574 1 : rc = spdk_bdev_quiesce(&process->raid_bdev->bdev, &g_raid_if,
2575 : raid_bdev_process_finish_quiesced, process);
2576 1 : if (rc != 0) {
2577 0 : raid_bdev_process_finish_quiesced(ctx, rc);
2578 : }
2579 1 : }
2580 :
2581 : static void
2582 1 : raid_bdev_process_do_finish(struct raid_bdev_process *process)
2583 : {
2584 1 : spdk_thread_send_msg(spdk_thread_get_app_thread(), _raid_bdev_process_finish, process);
2585 1 : }
2586 :
2587 : static void raid_bdev_process_unlock_window_range(struct raid_bdev_process *process);
2588 : static void raid_bdev_process_thread_run(struct raid_bdev_process *process);
2589 :
2590 : static void
2591 1 : raid_bdev_process_finish(struct raid_bdev_process *process, int status)
2592 : {
2593 1 : assert(spdk_get_thread() == process->thread);
2594 :
2595 1 : if (process->status == 0) {
2596 1 : process->status = status;
2597 : }
2598 :
2599 1 : if (process->state >= RAID_PROCESS_STATE_STOPPING) {
2600 0 : return;
2601 : }
2602 :
2603 1 : assert(process->state == RAID_PROCESS_STATE_RUNNING);
2604 1 : process->state = RAID_PROCESS_STATE_STOPPING;
2605 :
2606 1 : if (process->window_range_locked) {
2607 0 : raid_bdev_process_unlock_window_range(process);
2608 : } else {
2609 1 : raid_bdev_process_thread_run(process);
2610 : }
2611 : }
2612 :
2613 : static void
2614 1 : raid_bdev_process_window_range_unlocked(void *ctx, int status)
2615 : {
2616 1 : struct raid_bdev_process *process = ctx;
2617 :
2618 1 : if (status != 0) {
2619 0 : SPDK_ERRLOG("Failed to unlock LBA range: %s\n", spdk_strerror(-status));
2620 0 : raid_bdev_process_finish(process, status);
2621 0 : return;
2622 : }
2623 :
2624 1 : process->window_range_locked = false;
2625 1 : process->window_offset += process->window_size;
2626 :
2627 1 : raid_bdev_process_thread_run(process);
2628 : }
2629 :
2630 : static void
2631 1 : raid_bdev_process_unlock_window_range(struct raid_bdev_process *process)
2632 : {
2633 : int rc;
2634 :
2635 1 : assert(process->window_range_locked == true);
2636 :
2637 1 : rc = spdk_bdev_unquiesce_range(&process->raid_bdev->bdev, &g_raid_if,
2638 : process->window_offset, process->max_window_size,
2639 : raid_bdev_process_window_range_unlocked, process);
2640 1 : if (rc != 0) {
2641 0 : raid_bdev_process_window_range_unlocked(process, rc);
2642 : }
2643 1 : }
2644 :
2645 : static void
2646 1 : raid_bdev_process_channels_update_done(struct spdk_io_channel_iter *i, int status)
2647 : {
2648 1 : struct raid_bdev_process *process = spdk_io_channel_iter_get_ctx(i);
2649 :
2650 1 : raid_bdev_process_unlock_window_range(process);
2651 1 : }
2652 :
2653 : static void
2654 1 : raid_bdev_process_channel_update(struct spdk_io_channel_iter *i)
2655 : {
2656 1 : struct raid_bdev_process *process = spdk_io_channel_iter_get_ctx(i);
2657 1 : struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
2658 1 : struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(ch);
2659 :
2660 1 : raid_ch->process.offset = process->window_offset + process->window_size;
2661 :
2662 1 : spdk_for_each_channel_continue(i, 0);
2663 1 : }
2664 :
2665 : void
2666 1 : raid_bdev_process_request_complete(struct raid_bdev_process_request *process_req, int status)
2667 : {
2668 1 : struct raid_bdev_process *process = process_req->process;
2669 :
2670 1 : TAILQ_INSERT_TAIL(&process->requests, process_req, link);
2671 :
2672 1 : assert(spdk_get_thread() == process->thread);
2673 1 : assert(process->window_remaining >= process_req->num_blocks);
2674 :
2675 1 : if (status != 0) {
2676 0 : process->window_status = status;
2677 : }
2678 :
2679 1 : process->window_remaining -= process_req->num_blocks;
2680 1 : if (process->window_remaining == 0) {
2681 1 : if (process->window_status != 0) {
2682 0 : raid_bdev_process_finish(process, process->window_status);
2683 0 : return;
2684 : }
2685 :
2686 1 : spdk_for_each_channel(process->raid_bdev, raid_bdev_process_channel_update, process,
2687 : raid_bdev_process_channels_update_done);
2688 : }
2689 : }
2690 :
2691 : static int
2692 1 : raid_bdev_submit_process_request(struct raid_bdev_process *process, uint64_t offset_blocks,
2693 : uint32_t num_blocks)
2694 : {
2695 1 : struct raid_bdev *raid_bdev = process->raid_bdev;
2696 : struct raid_bdev_process_request *process_req;
2697 : int ret;
2698 :
2699 1 : process_req = TAILQ_FIRST(&process->requests);
2700 1 : if (process_req == NULL) {
2701 0 : assert(process->window_remaining > 0);
2702 0 : return 0;
2703 : }
2704 :
2705 1 : process_req->target = process->target;
2706 1 : process_req->target_ch = process->raid_ch->process.target_ch;
2707 1 : process_req->offset_blocks = offset_blocks;
2708 1 : process_req->num_blocks = num_blocks;
2709 1 : process_req->iov.iov_len = num_blocks * raid_bdev->bdev.blocklen;
2710 :
2711 1 : ret = raid_bdev->module->submit_process_request(process_req, process->raid_ch);
2712 1 : if (ret <= 0) {
2713 0 : if (ret < 0) {
2714 0 : SPDK_ERRLOG("Failed to submit process request on %s: %s\n",
2715 : raid_bdev->bdev.name, spdk_strerror(-ret));
2716 0 : process->window_status = ret;
2717 : }
2718 0 : return ret;
2719 : }
2720 :
2721 1 : process_req->num_blocks = ret;
2722 1 : TAILQ_REMOVE(&process->requests, process_req, link);
2723 :
2724 1 : return ret;
2725 : }
2726 :
2727 : static void
2728 1 : _raid_bdev_process_thread_run(struct raid_bdev_process *process)
2729 : {
2730 1 : struct raid_bdev *raid_bdev = process->raid_bdev;
2731 1 : uint64_t offset = process->window_offset;
2732 1 : const uint64_t offset_end = spdk_min(offset + process->max_window_size, raid_bdev->bdev.blockcnt);
2733 : int ret;
2734 :
2735 2 : while (offset < offset_end) {
2736 1 : ret = raid_bdev_submit_process_request(process, offset, offset_end - offset);
2737 1 : if (ret <= 0) {
2738 0 : break;
2739 : }
2740 :
2741 1 : process->window_remaining += ret;
2742 1 : offset += ret;
2743 : }
2744 :
2745 1 : if (process->window_remaining > 0) {
2746 1 : process->window_size = process->window_remaining;
2747 : } else {
2748 0 : raid_bdev_process_finish(process, process->window_status);
2749 : }
2750 1 : }
2751 :
2752 : static void
2753 1 : raid_bdev_process_window_range_locked(void *ctx, int status)
2754 : {
2755 1 : struct raid_bdev_process *process = ctx;
2756 :
2757 1 : if (status != 0) {
2758 0 : SPDK_ERRLOG("Failed to lock LBA range: %s\n", spdk_strerror(-status));
2759 0 : raid_bdev_process_finish(process, status);
2760 0 : return;
2761 : }
2762 :
2763 1 : process->window_range_locked = true;
2764 :
2765 1 : if (process->state == RAID_PROCESS_STATE_STOPPING) {
2766 0 : raid_bdev_process_unlock_window_range(process);
2767 0 : return;
2768 : }
2769 :
2770 1 : _raid_bdev_process_thread_run(process);
2771 : }
2772 :
2773 : static void
2774 3 : raid_bdev_process_thread_run(struct raid_bdev_process *process)
2775 : {
2776 3 : struct raid_bdev *raid_bdev = process->raid_bdev;
2777 : int rc;
2778 :
2779 3 : assert(spdk_get_thread() == process->thread);
2780 3 : assert(process->window_remaining == 0);
2781 3 : assert(process->window_range_locked == false);
2782 :
2783 3 : if (process->state == RAID_PROCESS_STATE_STOPPING) {
2784 1 : raid_bdev_process_do_finish(process);
2785 1 : return;
2786 : }
2787 :
2788 2 : if (process->window_offset == raid_bdev->bdev.blockcnt) {
2789 1 : SPDK_DEBUGLOG(bdev_raid, "process completed on %s\n", raid_bdev->bdev.name);
2790 1 : raid_bdev_process_finish(process, 0);
2791 1 : return;
2792 : }
2793 :
2794 1 : process->max_window_size = spdk_min(raid_bdev->bdev.blockcnt - process->window_offset,
2795 : process->max_window_size);
2796 :
2797 1 : rc = spdk_bdev_quiesce_range(&raid_bdev->bdev, &g_raid_if,
2798 : process->window_offset, process->max_window_size,
2799 : raid_bdev_process_window_range_locked, process);
2800 1 : if (rc != 0) {
2801 0 : raid_bdev_process_window_range_locked(process, rc);
2802 : }
2803 : }
2804 :
2805 : static void
2806 1 : raid_bdev_process_thread_init(void *ctx)
2807 : {
2808 1 : struct raid_bdev_process *process = ctx;
2809 1 : struct raid_bdev *raid_bdev = process->raid_bdev;
2810 : struct spdk_io_channel *ch;
2811 :
2812 1 : process->thread = spdk_get_thread();
2813 :
2814 1 : ch = spdk_get_io_channel(raid_bdev);
2815 1 : if (ch == NULL) {
2816 0 : process->status = -ENOMEM;
2817 0 : raid_bdev_process_do_finish(process);
2818 0 : return;
2819 : }
2820 :
2821 1 : process->raid_ch = spdk_io_channel_get_ctx(ch);
2822 1 : process->state = RAID_PROCESS_STATE_RUNNING;
2823 :
2824 1 : SPDK_NOTICELOG("Started %s on raid bdev %s\n",
2825 : raid_bdev_process_to_str(process->type), raid_bdev->bdev.name);
2826 :
2827 1 : raid_bdev_process_thread_run(process);
2828 : }
2829 :
2830 : static void
2831 0 : raid_bdev_channels_abort_start_process_done(struct spdk_io_channel_iter *i, int status)
2832 : {
2833 0 : struct raid_bdev_process *process = spdk_io_channel_iter_get_ctx(i);
2834 :
2835 0 : _raid_bdev_remove_base_bdev(process->target, NULL, NULL);
2836 0 : raid_bdev_process_free(process);
2837 :
2838 : /* TODO: update sb */
2839 0 : }
2840 :
2841 : static void
2842 0 : raid_bdev_channel_abort_start_process(struct spdk_io_channel_iter *i)
2843 : {
2844 0 : struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
2845 0 : struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(ch);
2846 :
2847 0 : raid_bdev_ch_process_cleanup(raid_ch);
2848 :
2849 0 : spdk_for_each_channel_continue(i, 0);
2850 0 : }
2851 :
2852 : static void
2853 1 : raid_bdev_channels_start_process_done(struct spdk_io_channel_iter *i, int status)
2854 : {
2855 1 : struct raid_bdev_process *process = spdk_io_channel_iter_get_ctx(i);
2856 1 : struct raid_bdev *raid_bdev = process->raid_bdev;
2857 : struct spdk_thread *thread;
2858 1 : char thread_name[RAID_BDEV_SB_NAME_SIZE + 16];
2859 :
2860 1 : if (status == 0 &&
2861 1 : (process->target->remove_scheduled || !process->target->is_configured ||
2862 1 : raid_bdev->num_base_bdevs_operational <= raid_bdev->min_base_bdevs_operational)) {
2863 : /* a base bdev was removed before we got here */
2864 0 : status = -ENODEV;
2865 : }
2866 :
2867 1 : if (status != 0) {
2868 0 : SPDK_ERRLOG("Failed to start %s on %s: %s\n",
2869 : raid_bdev_process_to_str(process->type), raid_bdev->bdev.name,
2870 : spdk_strerror(-status));
2871 0 : goto err;
2872 : }
2873 :
2874 1 : snprintf(thread_name, sizeof(thread_name), "%s_%s",
2875 : raid_bdev->bdev.name, raid_bdev_process_to_str(process->type));
2876 :
2877 1 : thread = spdk_thread_create(thread_name, NULL);
2878 1 : if (thread == NULL) {
2879 0 : SPDK_ERRLOG("Failed to create %s thread for %s\n",
2880 : raid_bdev_process_to_str(process->type), raid_bdev->bdev.name);
2881 0 : goto err;
2882 : }
2883 :
2884 1 : raid_bdev->process = process;
2885 :
2886 1 : spdk_thread_send_msg(thread, raid_bdev_process_thread_init, process);
2887 :
2888 1 : return;
2889 0 : err:
2890 0 : spdk_for_each_channel(process->raid_bdev, raid_bdev_channel_abort_start_process, process,
2891 : raid_bdev_channels_abort_start_process_done);
2892 : }
2893 :
2894 : static void
2895 0 : raid_bdev_channel_start_process(struct spdk_io_channel_iter *i)
2896 : {
2897 0 : struct raid_bdev_process *process = spdk_io_channel_iter_get_ctx(i);
2898 0 : struct spdk_io_channel *ch = spdk_io_channel_iter_get_channel(i);
2899 0 : struct raid_bdev_io_channel *raid_ch = spdk_io_channel_get_ctx(ch);
2900 : int rc;
2901 :
2902 0 : rc = raid_bdev_ch_process_setup(raid_ch, process);
2903 :
2904 0 : spdk_for_each_channel_continue(i, rc);
2905 0 : }
2906 :
2907 : static void
2908 1 : raid_bdev_process_start(struct raid_bdev_process *process)
2909 : {
2910 1 : struct raid_bdev *raid_bdev = process->raid_bdev;
2911 :
2912 1 : assert(raid_bdev->module->submit_process_request != NULL);
2913 :
2914 1 : spdk_for_each_channel(raid_bdev, raid_bdev_channel_start_process, process,
2915 : raid_bdev_channels_start_process_done);
2916 1 : }
2917 :
2918 : static void
2919 16 : raid_bdev_process_request_free(struct raid_bdev_process_request *process_req)
2920 : {
2921 16 : spdk_dma_free(process_req->iov.iov_base);
2922 16 : spdk_dma_free(process_req->md_buf);
2923 16 : free(process_req);
2924 16 : }
2925 :
2926 : static struct raid_bdev_process_request *
2927 16 : raid_bdev_process_alloc_request(struct raid_bdev_process *process)
2928 : {
2929 16 : struct raid_bdev *raid_bdev = process->raid_bdev;
2930 : struct raid_bdev_process_request *process_req;
2931 :
2932 16 : process_req = calloc(1, sizeof(*process_req));
2933 16 : if (process_req == NULL) {
2934 0 : return NULL;
2935 : }
2936 :
2937 16 : process_req->process = process;
2938 16 : process_req->iov.iov_len = process->max_window_size * raid_bdev->bdev.blocklen;
2939 16 : process_req->iov.iov_base = spdk_dma_malloc(process_req->iov.iov_len, 4096, 0);
2940 16 : if (process_req->iov.iov_base == NULL) {
2941 0 : free(process_req);
2942 0 : return NULL;
2943 : }
2944 16 : if (spdk_bdev_is_md_separate(&raid_bdev->bdev)) {
2945 16 : process_req->md_buf = spdk_dma_malloc(process->max_window_size * raid_bdev->bdev.md_len, 4096, 0);
2946 16 : if (process_req->md_buf == NULL) {
2947 0 : raid_bdev_process_request_free(process_req);
2948 0 : return NULL;
2949 : }
2950 : }
2951 :
2952 16 : return process_req;
2953 : }
2954 :
2955 : static void
2956 1 : raid_bdev_process_free(struct raid_bdev_process *process)
2957 : {
2958 : struct raid_bdev_process_request *process_req;
2959 :
2960 17 : while ((process_req = TAILQ_FIRST(&process->requests)) != NULL) {
2961 16 : TAILQ_REMOVE(&process->requests, process_req, link);
2962 16 : raid_bdev_process_request_free(process_req);
2963 : }
2964 :
2965 1 : free(process);
2966 1 : }
2967 :
2968 : static struct raid_bdev_process *
2969 1 : raid_bdev_process_alloc(struct raid_bdev *raid_bdev, enum raid_process_type type,
2970 : struct raid_base_bdev_info *target)
2971 : {
2972 : struct raid_bdev_process *process;
2973 : struct raid_bdev_process_request *process_req;
2974 : int i;
2975 :
2976 1 : process = calloc(1, sizeof(*process));
2977 1 : if (process == NULL) {
2978 0 : return NULL;
2979 : }
2980 :
2981 1 : process->raid_bdev = raid_bdev;
2982 1 : process->type = type;
2983 1 : process->target = target;
2984 1 : process->max_window_size = spdk_max(spdk_divide_round_up(g_opts.process_window_size_kb * 1024UL,
2985 : spdk_bdev_get_data_block_size(&raid_bdev->bdev)),
2986 : raid_bdev->bdev.write_unit_size);
2987 1 : TAILQ_INIT(&process->requests);
2988 1 : TAILQ_INIT(&process->finish_actions);
2989 :
2990 17 : for (i = 0; i < RAID_BDEV_PROCESS_MAX_QD; i++) {
2991 16 : process_req = raid_bdev_process_alloc_request(process);
2992 16 : if (process_req == NULL) {
2993 0 : raid_bdev_process_free(process);
2994 0 : return NULL;
2995 : }
2996 :
2997 16 : TAILQ_INSERT_TAIL(&process->requests, process_req, link);
2998 : }
2999 :
3000 1 : return process;
3001 : }
3002 :
3003 : static int
3004 1 : raid_bdev_start_rebuild(struct raid_base_bdev_info *target)
3005 : {
3006 : struct raid_bdev_process *process;
3007 :
3008 1 : assert(spdk_get_thread() == spdk_thread_get_app_thread());
3009 :
3010 1 : process = raid_bdev_process_alloc(target->raid_bdev, RAID_PROCESS_REBUILD, target);
3011 1 : if (process == NULL) {
3012 0 : return -ENOMEM;
3013 : }
3014 :
3015 1 : raid_bdev_process_start(process);
3016 :
3017 1 : return 0;
3018 : }
3019 :
3020 : static void raid_bdev_configure_base_bdev_cont(struct raid_base_bdev_info *base_info);
3021 :
3022 : static void
3023 0 : _raid_bdev_configure_base_bdev_cont(struct spdk_io_channel_iter *i, int status)
3024 : {
3025 0 : struct raid_base_bdev_info *base_info = spdk_io_channel_iter_get_ctx(i);
3026 :
3027 0 : raid_bdev_configure_base_bdev_cont(base_info);
3028 0 : }
3029 :
3030 : static void
3031 0 : raid_bdev_ch_sync(struct spdk_io_channel_iter *i)
3032 : {
3033 0 : spdk_for_each_channel_continue(i, 0);
3034 0 : }
3035 :
3036 : static void
3037 478 : raid_bdev_configure_base_bdev_cont(struct raid_base_bdev_info *base_info)
3038 : {
3039 478 : struct raid_bdev *raid_bdev = base_info->raid_bdev;
3040 : int rc;
3041 :
3042 478 : if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs_operational &&
3043 0 : base_info->is_process_target == false) {
3044 : /* TODO: defer if rebuild in progress on another base bdev */
3045 0 : assert(raid_bdev->process == NULL);
3046 0 : assert(raid_bdev->state == RAID_BDEV_STATE_ONLINE);
3047 0 : base_info->is_process_target = true;
3048 : /* To assure is_process_target is set before is_configured when checked in raid_bdev_create_cb() */
3049 0 : spdk_for_each_channel(raid_bdev, raid_bdev_ch_sync, base_info, _raid_bdev_configure_base_bdev_cont);
3050 0 : return;
3051 : }
3052 :
3053 478 : base_info->is_configured = true;
3054 :
3055 478 : raid_bdev->num_base_bdevs_discovered++;
3056 478 : assert(raid_bdev->num_base_bdevs_discovered <= raid_bdev->num_base_bdevs);
3057 478 : assert(raid_bdev->num_base_bdevs_operational <= raid_bdev->num_base_bdevs);
3058 478 : assert(raid_bdev->num_base_bdevs_operational >= raid_bdev->min_base_bdevs_operational);
3059 :
3060 : /*
3061 : * Configure the raid bdev when the number of discovered base bdevs reaches the number
3062 : * of base bdevs we know to be operational members of the array. Usually this is equal
3063 : * to the total number of base bdevs (num_base_bdevs) but can be less - when the array is
3064 : * degraded.
3065 : */
3066 478 : if (raid_bdev->num_base_bdevs_discovered == raid_bdev->num_base_bdevs_operational) {
3067 13 : rc = raid_bdev_configure(raid_bdev);
3068 13 : if (rc != 0) {
3069 0 : SPDK_ERRLOG("Failed to configure raid bdev: %s\n", spdk_strerror(-rc));
3070 : }
3071 465 : } else if (base_info->is_process_target) {
3072 0 : raid_bdev->num_base_bdevs_operational++;
3073 0 : rc = raid_bdev_start_rebuild(base_info);
3074 0 : if (rc != 0) {
3075 0 : SPDK_ERRLOG("Failed to start rebuild: %s\n", spdk_strerror(-rc));
3076 0 : _raid_bdev_remove_base_bdev(base_info, NULL, NULL);
3077 : }
3078 : } else {
3079 465 : rc = 0;
3080 : }
3081 :
3082 478 : if (base_info->configure_cb != NULL) {
3083 478 : base_info->configure_cb(base_info->configure_cb_ctx, rc);
3084 : }
3085 : }
3086 :
3087 : static void raid_bdev_examine_sb(const struct raid_bdev_superblock *sb, struct spdk_bdev *bdev,
3088 : raid_base_bdev_cb cb_fn, void *cb_ctx);
3089 :
3090 : static void
3091 478 : raid_bdev_configure_base_bdev_check_sb_cb(const struct raid_bdev_superblock *sb, int status,
3092 : void *ctx)
3093 : {
3094 478 : struct raid_base_bdev_info *base_info = ctx;
3095 :
3096 478 : switch (status) {
3097 0 : case 0:
3098 : /* valid superblock found */
3099 0 : if (spdk_uuid_compare(&base_info->raid_bdev->bdev.uuid, &sb->uuid) == 0) {
3100 0 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(base_info->desc);
3101 :
3102 0 : raid_bdev_free_base_bdev_resource(base_info);
3103 0 : raid_bdev_examine_sb(sb, bdev, base_info->configure_cb, base_info->configure_cb_ctx);
3104 0 : return;
3105 : }
3106 0 : SPDK_ERRLOG("Superblock of a different raid bdev found on bdev %s\n", base_info->name);
3107 0 : status = -EEXIST;
3108 0 : raid_bdev_free_base_bdev_resource(base_info);
3109 0 : break;
3110 478 : case -EINVAL:
3111 : /* no valid superblock */
3112 478 : raid_bdev_configure_base_bdev_cont(base_info);
3113 478 : return;
3114 0 : default:
3115 0 : SPDK_ERRLOG("Failed to examine bdev %s: %s\n",
3116 : base_info->name, spdk_strerror(-status));
3117 0 : break;
3118 : }
3119 :
3120 0 : if (base_info->configure_cb != NULL) {
3121 0 : base_info->configure_cb(base_info->configure_cb_ctx, status);
3122 : }
3123 : }
3124 :
3125 : static int
3126 481 : raid_bdev_configure_base_bdev(struct raid_base_bdev_info *base_info, bool existing,
3127 : raid_base_bdev_cb cb_fn, void *cb_ctx)
3128 : {
3129 481 : struct raid_bdev *raid_bdev = base_info->raid_bdev;
3130 481 : struct spdk_bdev_desc *desc;
3131 : struct spdk_bdev *bdev;
3132 : const struct spdk_uuid *bdev_uuid;
3133 : int rc;
3134 :
3135 481 : assert(spdk_get_thread() == spdk_thread_get_app_thread());
3136 481 : assert(base_info->desc == NULL);
3137 :
3138 : /*
3139 : * Base bdev can be added by name or uuid. Here we assure both properties are set and valid
3140 : * before claiming the bdev.
3141 : */
3142 :
3143 481 : if (!spdk_uuid_is_null(&base_info->uuid)) {
3144 0 : char uuid_str[SPDK_UUID_STRING_LEN];
3145 : const char *bdev_name;
3146 :
3147 0 : spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &base_info->uuid);
3148 :
3149 : /* UUID of a bdev is registered as its alias */
3150 0 : bdev = spdk_bdev_get_by_name(uuid_str);
3151 0 : if (bdev == NULL) {
3152 0 : return -ENODEV;
3153 : }
3154 :
3155 0 : bdev_name = spdk_bdev_get_name(bdev);
3156 :
3157 0 : if (base_info->name == NULL) {
3158 0 : assert(existing == true);
3159 0 : base_info->name = strdup(bdev_name);
3160 0 : if (base_info->name == NULL) {
3161 0 : return -ENOMEM;
3162 : }
3163 0 : } else if (strcmp(base_info->name, bdev_name) != 0) {
3164 0 : SPDK_ERRLOG("Name mismatch for base bdev '%s' - expected '%s'\n",
3165 : bdev_name, base_info->name);
3166 0 : return -EINVAL;
3167 : }
3168 : }
3169 :
3170 481 : assert(base_info->name != NULL);
3171 :
3172 481 : rc = spdk_bdev_open_ext(base_info->name, true, raid_bdev_event_base_bdev, NULL, &desc);
3173 481 : if (rc != 0) {
3174 1 : if (rc != -ENODEV) {
3175 0 : SPDK_ERRLOG("Unable to create desc on bdev '%s'\n", base_info->name);
3176 : }
3177 1 : return rc;
3178 : }
3179 :
3180 480 : bdev = spdk_bdev_desc_get_bdev(desc);
3181 480 : bdev_uuid = spdk_bdev_get_uuid(bdev);
3182 :
3183 480 : if (spdk_uuid_is_null(&base_info->uuid)) {
3184 480 : spdk_uuid_copy(&base_info->uuid, bdev_uuid);
3185 0 : } else if (spdk_uuid_compare(&base_info->uuid, bdev_uuid) != 0) {
3186 0 : SPDK_ERRLOG("UUID mismatch for base bdev '%s'\n", base_info->name);
3187 0 : spdk_bdev_close(desc);
3188 0 : return -EINVAL;
3189 : }
3190 :
3191 480 : rc = spdk_bdev_module_claim_bdev(bdev, NULL, &g_raid_if);
3192 480 : if (rc != 0) {
3193 2 : SPDK_ERRLOG("Unable to claim this bdev as it is already claimed\n");
3194 2 : spdk_bdev_close(desc);
3195 2 : return rc;
3196 : }
3197 :
3198 478 : SPDK_DEBUGLOG(bdev_raid, "bdev %s is claimed\n", bdev->name);
3199 :
3200 478 : base_info->app_thread_ch = spdk_bdev_get_io_channel(desc);
3201 478 : if (base_info->app_thread_ch == NULL) {
3202 0 : SPDK_ERRLOG("Failed to get io channel\n");
3203 0 : spdk_bdev_module_release_bdev(bdev);
3204 0 : spdk_bdev_close(desc);
3205 0 : return -ENOMEM;
3206 : }
3207 :
3208 478 : base_info->desc = desc;
3209 478 : base_info->blockcnt = bdev->blockcnt;
3210 :
3211 478 : if (raid_bdev->superblock_enabled) {
3212 : uint64_t data_offset;
3213 :
3214 32 : if (base_info->data_offset == 0) {
3215 32 : assert((RAID_BDEV_MIN_DATA_OFFSET_SIZE % spdk_bdev_get_data_block_size(bdev)) == 0);
3216 32 : data_offset = RAID_BDEV_MIN_DATA_OFFSET_SIZE / spdk_bdev_get_data_block_size(bdev);
3217 : } else {
3218 0 : data_offset = base_info->data_offset;
3219 : }
3220 :
3221 32 : if (bdev->optimal_io_boundary != 0) {
3222 0 : data_offset = spdk_divide_round_up(data_offset,
3223 0 : bdev->optimal_io_boundary) * bdev->optimal_io_boundary;
3224 0 : if (base_info->data_offset != 0 && base_info->data_offset != data_offset) {
3225 0 : SPDK_WARNLOG("Data offset %lu on bdev '%s' is different than optimal value %lu\n",
3226 : base_info->data_offset, base_info->name, data_offset);
3227 0 : data_offset = base_info->data_offset;
3228 : }
3229 : }
3230 :
3231 32 : base_info->data_offset = data_offset;
3232 : }
3233 :
3234 478 : if (base_info->data_offset >= bdev->blockcnt) {
3235 0 : SPDK_ERRLOG("Data offset %lu exceeds base bdev capacity %lu on bdev '%s'\n",
3236 : base_info->data_offset, bdev->blockcnt, base_info->name);
3237 0 : rc = -EINVAL;
3238 0 : goto out;
3239 : }
3240 :
3241 478 : if (base_info->data_size == 0) {
3242 478 : base_info->data_size = bdev->blockcnt - base_info->data_offset;
3243 0 : } else if (base_info->data_offset + base_info->data_size > bdev->blockcnt) {
3244 0 : SPDK_ERRLOG("Data offset and size exceeds base bdev capacity %lu on bdev '%s'\n",
3245 : bdev->blockcnt, base_info->name);
3246 0 : rc = -EINVAL;
3247 0 : goto out;
3248 : }
3249 :
3250 478 : if (!raid_bdev->module->dif_supported && spdk_bdev_get_dif_type(bdev) != SPDK_DIF_DISABLE) {
3251 0 : SPDK_ERRLOG("Base bdev '%s' has DIF or DIX enabled - unsupported RAID configuration\n",
3252 : bdev->name);
3253 0 : rc = -EINVAL;
3254 0 : goto out;
3255 : }
3256 :
3257 : /*
3258 : * Set the raid bdev properties if this is the first base bdev configured,
3259 : * otherwise - verify. Assumption is that all the base bdevs for any raid bdev should
3260 : * have the same blocklen and metadata format.
3261 : */
3262 478 : if (raid_bdev->bdev.blocklen == 0) {
3263 15 : raid_bdev->bdev.blocklen = bdev->blocklen;
3264 15 : raid_bdev->bdev.md_len = spdk_bdev_get_md_size(bdev);
3265 15 : raid_bdev->bdev.md_interleave = spdk_bdev_is_md_interleaved(bdev);
3266 15 : raid_bdev->bdev.dif_type = spdk_bdev_get_dif_type(bdev);
3267 15 : raid_bdev->bdev.dif_check_flags = bdev->dif_check_flags;
3268 15 : raid_bdev->bdev.dif_is_head_of_md = spdk_bdev_is_dif_head_of_md(bdev);
3269 : } else {
3270 463 : if (raid_bdev->bdev.blocklen != bdev->blocklen) {
3271 0 : SPDK_ERRLOG("Raid bdev '%s' blocklen %u differs from base bdev '%s' blocklen %u\n",
3272 : raid_bdev->bdev.name, raid_bdev->bdev.blocklen, bdev->name, bdev->blocklen);
3273 0 : rc = -EINVAL;
3274 0 : goto out;
3275 : }
3276 :
3277 926 : if (raid_bdev->bdev.md_len != spdk_bdev_get_md_size(bdev) ||
3278 926 : raid_bdev->bdev.md_interleave != spdk_bdev_is_md_interleaved(bdev) ||
3279 463 : raid_bdev->bdev.dif_type != spdk_bdev_get_dif_type(bdev) ||
3280 926 : raid_bdev->bdev.dif_check_flags != bdev->dif_check_flags ||
3281 463 : raid_bdev->bdev.dif_is_head_of_md != spdk_bdev_is_dif_head_of_md(bdev)) {
3282 0 : SPDK_ERRLOG("Raid bdev '%s' has different metadata format than base bdev '%s'\n",
3283 : raid_bdev->bdev.name, bdev->name);
3284 0 : rc = -EINVAL;
3285 0 : goto out;
3286 : }
3287 : }
3288 :
3289 478 : base_info->configure_cb = cb_fn;
3290 478 : base_info->configure_cb_ctx = cb_ctx;
3291 :
3292 478 : if (existing) {
3293 0 : raid_bdev_configure_base_bdev_cont(base_info);
3294 : } else {
3295 : /* check for existing superblock when using a new bdev */
3296 478 : rc = raid_bdev_load_base_bdev_superblock(desc, base_info->app_thread_ch,
3297 : raid_bdev_configure_base_bdev_check_sb_cb, base_info);
3298 478 : if (rc) {
3299 0 : SPDK_ERRLOG("Failed to read bdev %s superblock: %s\n",
3300 : bdev->name, spdk_strerror(-rc));
3301 : }
3302 : }
3303 478 : out:
3304 478 : if (rc != 0) {
3305 0 : raid_bdev_free_base_bdev_resource(base_info);
3306 : }
3307 478 : return rc;
3308 : }
3309 :
3310 : int
3311 481 : raid_bdev_add_base_bdev(struct raid_bdev *raid_bdev, const char *name,
3312 : raid_base_bdev_cb cb_fn, void *cb_ctx)
3313 : {
3314 481 : struct raid_base_bdev_info *base_info = NULL, *iter;
3315 : int rc;
3316 :
3317 481 : assert(name != NULL);
3318 481 : assert(spdk_get_thread() == spdk_thread_get_app_thread());
3319 :
3320 481 : if (raid_bdev->process != NULL) {
3321 0 : SPDK_ERRLOG("raid bdev '%s' is in process\n",
3322 : raid_bdev->bdev.name);
3323 0 : return -EPERM;
3324 : }
3325 :
3326 481 : if (raid_bdev->state == RAID_BDEV_STATE_CONFIGURING) {
3327 481 : struct spdk_bdev *bdev = spdk_bdev_get_by_name(name);
3328 :
3329 481 : if (bdev != NULL) {
3330 15840 : RAID_FOR_EACH_BASE_BDEV(raid_bdev, iter) {
3331 23311 : if (iter->name == NULL &&
3332 7951 : spdk_uuid_compare(&bdev->uuid, &iter->uuid) == 0) {
3333 0 : base_info = iter;
3334 0 : break;
3335 : }
3336 : }
3337 : }
3338 : }
3339 :
3340 481 : if (base_info == NULL || raid_bdev->state == RAID_BDEV_STATE_ONLINE) {
3341 7921 : RAID_FOR_EACH_BASE_BDEV(raid_bdev, iter) {
3342 7921 : if (iter->name == NULL && spdk_uuid_is_null(&iter->uuid)) {
3343 481 : base_info = iter;
3344 481 : break;
3345 : }
3346 : }
3347 : }
3348 :
3349 481 : if (base_info == NULL) {
3350 0 : SPDK_ERRLOG("no empty slot found in raid bdev '%s' for new base bdev '%s'\n",
3351 : raid_bdev->bdev.name, name);
3352 0 : return -EINVAL;
3353 : }
3354 :
3355 481 : assert(base_info->is_configured == false);
3356 :
3357 481 : if (raid_bdev->state == RAID_BDEV_STATE_ONLINE) {
3358 0 : assert(base_info->data_size != 0);
3359 0 : assert(base_info->desc == NULL);
3360 : }
3361 :
3362 481 : base_info->name = strdup(name);
3363 481 : if (base_info->name == NULL) {
3364 0 : return -ENOMEM;
3365 : }
3366 :
3367 481 : rc = raid_bdev_configure_base_bdev(base_info, false, cb_fn, cb_ctx);
3368 481 : if (rc != 0 && (rc != -ENODEV || raid_bdev->state != RAID_BDEV_STATE_CONFIGURING)) {
3369 2 : SPDK_ERRLOG("base bdev '%s' configure failed: %s\n", name, spdk_strerror(-rc));
3370 2 : free(base_info->name);
3371 2 : base_info->name = NULL;
3372 : }
3373 :
3374 481 : return rc;
3375 : }
3376 :
3377 : static int
3378 0 : raid_bdev_create_from_sb(const struct raid_bdev_superblock *sb, struct raid_bdev **raid_bdev_out)
3379 : {
3380 0 : struct raid_bdev *raid_bdev;
3381 : uint8_t i;
3382 : int rc;
3383 :
3384 0 : rc = _raid_bdev_create(sb->name, (sb->strip_size * sb->block_size) / 1024, sb->num_base_bdevs,
3385 0 : sb->level, true, &sb->uuid, &raid_bdev);
3386 0 : if (rc != 0) {
3387 0 : return rc;
3388 : }
3389 :
3390 0 : rc = raid_bdev_alloc_superblock(raid_bdev, sb->block_size);
3391 0 : if (rc != 0) {
3392 0 : raid_bdev_free(raid_bdev);
3393 0 : return rc;
3394 : }
3395 :
3396 0 : assert(sb->length <= RAID_BDEV_SB_MAX_LENGTH);
3397 0 : memcpy(raid_bdev->sb, sb, sb->length);
3398 :
3399 0 : for (i = 0; i < sb->base_bdevs_size; i++) {
3400 0 : const struct raid_bdev_sb_base_bdev *sb_base_bdev = &sb->base_bdevs[i];
3401 0 : struct raid_base_bdev_info *base_info = &raid_bdev->base_bdev_info[sb_base_bdev->slot];
3402 :
3403 0 : if (sb_base_bdev->state == RAID_SB_BASE_BDEV_CONFIGURED) {
3404 0 : spdk_uuid_copy(&base_info->uuid, &sb_base_bdev->uuid);
3405 0 : raid_bdev->num_base_bdevs_operational++;
3406 : }
3407 :
3408 0 : base_info->data_offset = sb_base_bdev->data_offset;
3409 0 : base_info->data_size = sb_base_bdev->data_size;
3410 : }
3411 :
3412 0 : *raid_bdev_out = raid_bdev;
3413 0 : return 0;
3414 : }
3415 :
3416 : static void
3417 0 : raid_bdev_examine_no_sb(struct spdk_bdev *bdev)
3418 : {
3419 : struct raid_bdev *raid_bdev;
3420 : struct raid_base_bdev_info *base_info;
3421 :
3422 0 : TAILQ_FOREACH(raid_bdev, &g_raid_bdev_list, global_link) {
3423 0 : if (raid_bdev->state != RAID_BDEV_STATE_CONFIGURING || raid_bdev->sb != NULL) {
3424 0 : continue;
3425 : }
3426 0 : RAID_FOR_EACH_BASE_BDEV(raid_bdev, base_info) {
3427 0 : if (base_info->desc == NULL &&
3428 0 : ((base_info->name != NULL && strcmp(bdev->name, base_info->name) == 0) ||
3429 0 : spdk_uuid_compare(&base_info->uuid, &bdev->uuid) == 0)) {
3430 0 : raid_bdev_configure_base_bdev(base_info, true, NULL, NULL);
3431 0 : break;
3432 : }
3433 : }
3434 : }
3435 0 : }
3436 :
3437 : struct raid_bdev_examine_others_ctx {
3438 : struct spdk_uuid raid_bdev_uuid;
3439 : uint8_t current_base_bdev_idx;
3440 : raid_base_bdev_cb cb_fn;
3441 : void *cb_ctx;
3442 : };
3443 :
3444 : static void
3445 0 : raid_bdev_examine_others_done(void *_ctx, int status)
3446 : {
3447 0 : struct raid_bdev_examine_others_ctx *ctx = _ctx;
3448 :
3449 0 : if (ctx->cb_fn != NULL) {
3450 0 : ctx->cb_fn(ctx->cb_ctx, status);
3451 : }
3452 0 : free(ctx);
3453 0 : }
3454 :
3455 : typedef void (*raid_bdev_examine_load_sb_cb)(struct spdk_bdev *bdev,
3456 : const struct raid_bdev_superblock *sb, int status, void *ctx);
3457 : static int raid_bdev_examine_load_sb(const char *bdev_name, raid_bdev_examine_load_sb_cb cb,
3458 : void *cb_ctx);
3459 : static void raid_bdev_examine_sb(const struct raid_bdev_superblock *sb, struct spdk_bdev *bdev,
3460 : raid_base_bdev_cb cb_fn, void *cb_ctx);
3461 : static void raid_bdev_examine_others(void *_ctx, int status);
3462 :
3463 : static void
3464 0 : raid_bdev_examine_others_load_cb(struct spdk_bdev *bdev, const struct raid_bdev_superblock *sb,
3465 : int status, void *_ctx)
3466 : {
3467 0 : struct raid_bdev_examine_others_ctx *ctx = _ctx;
3468 :
3469 0 : if (status != 0) {
3470 0 : raid_bdev_examine_others_done(ctx, status);
3471 0 : return;
3472 : }
3473 :
3474 0 : raid_bdev_examine_sb(sb, bdev, raid_bdev_examine_others, ctx);
3475 : }
3476 :
3477 : static void
3478 0 : raid_bdev_examine_others(void *_ctx, int status)
3479 : {
3480 0 : struct raid_bdev_examine_others_ctx *ctx = _ctx;
3481 : struct raid_bdev *raid_bdev;
3482 : struct raid_base_bdev_info *base_info;
3483 0 : char uuid_str[SPDK_UUID_STRING_LEN];
3484 :
3485 0 : if (status != 0) {
3486 0 : goto out;
3487 : }
3488 :
3489 0 : raid_bdev = raid_bdev_find_by_uuid(&ctx->raid_bdev_uuid);
3490 0 : if (raid_bdev == NULL) {
3491 0 : status = -ENODEV;
3492 0 : goto out;
3493 : }
3494 :
3495 0 : for (base_info = &raid_bdev->base_bdev_info[ctx->current_base_bdev_idx];
3496 0 : base_info < &raid_bdev->base_bdev_info[raid_bdev->num_base_bdevs];
3497 0 : base_info++) {
3498 0 : if (base_info->is_configured || spdk_uuid_is_null(&base_info->uuid)) {
3499 0 : continue;
3500 : }
3501 :
3502 0 : spdk_uuid_fmt_lower(uuid_str, sizeof(uuid_str), &base_info->uuid);
3503 :
3504 0 : if (spdk_bdev_get_by_name(uuid_str) == NULL) {
3505 0 : continue;
3506 : }
3507 :
3508 0 : ctx->current_base_bdev_idx = raid_bdev_base_bdev_slot(base_info);
3509 :
3510 0 : status = raid_bdev_examine_load_sb(uuid_str, raid_bdev_examine_others_load_cb, ctx);
3511 0 : if (status != 0) {
3512 0 : continue;
3513 : }
3514 0 : return;
3515 : }
3516 0 : out:
3517 0 : raid_bdev_examine_others_done(ctx, status);
3518 : }
3519 :
3520 : static void
3521 0 : raid_bdev_examine_sb(const struct raid_bdev_superblock *sb, struct spdk_bdev *bdev,
3522 : raid_base_bdev_cb cb_fn, void *cb_ctx)
3523 : {
3524 0 : const struct raid_bdev_sb_base_bdev *sb_base_bdev = NULL;
3525 0 : struct raid_bdev *raid_bdev;
3526 : struct raid_base_bdev_info *iter, *base_info;
3527 : uint8_t i;
3528 : int rc;
3529 :
3530 0 : if (sb->block_size != spdk_bdev_get_data_block_size(bdev)) {
3531 0 : SPDK_WARNLOG("Bdev %s block size (%u) does not match the value in superblock (%u)\n",
3532 : bdev->name, sb->block_size, spdk_bdev_get_data_block_size(bdev));
3533 0 : rc = -EINVAL;
3534 0 : goto out;
3535 : }
3536 :
3537 0 : if (spdk_uuid_is_null(&sb->uuid)) {
3538 0 : SPDK_WARNLOG("NULL raid bdev UUID in superblock on bdev %s\n", bdev->name);
3539 0 : rc = -EINVAL;
3540 0 : goto out;
3541 : }
3542 :
3543 0 : raid_bdev = raid_bdev_find_by_uuid(&sb->uuid);
3544 :
3545 0 : if (raid_bdev) {
3546 0 : if (sb->seq_number > raid_bdev->sb->seq_number) {
3547 0 : SPDK_DEBUGLOG(bdev_raid,
3548 : "raid superblock seq_number on bdev %s (%lu) greater than existing raid bdev %s (%lu)\n",
3549 : bdev->name, sb->seq_number, raid_bdev->bdev.name, raid_bdev->sb->seq_number);
3550 :
3551 0 : if (raid_bdev->state != RAID_BDEV_STATE_CONFIGURING) {
3552 0 : SPDK_WARNLOG("Newer version of raid bdev %s superblock found on bdev %s but raid bdev is not in configuring state.\n",
3553 : raid_bdev->bdev.name, bdev->name);
3554 0 : rc = -EBUSY;
3555 0 : goto out;
3556 : }
3557 :
3558 : /* remove and then recreate the raid bdev using the newer superblock */
3559 0 : raid_bdev_delete(raid_bdev, NULL, NULL);
3560 0 : raid_bdev = NULL;
3561 0 : } else if (sb->seq_number < raid_bdev->sb->seq_number) {
3562 0 : SPDK_DEBUGLOG(bdev_raid,
3563 : "raid superblock seq_number on bdev %s (%lu) smaller than existing raid bdev %s (%lu)\n",
3564 : bdev->name, sb->seq_number, raid_bdev->bdev.name, raid_bdev->sb->seq_number);
3565 : /* use the current raid bdev superblock */
3566 0 : sb = raid_bdev->sb;
3567 : }
3568 : }
3569 :
3570 0 : for (i = 0; i < sb->base_bdevs_size; i++) {
3571 0 : sb_base_bdev = &sb->base_bdevs[i];
3572 :
3573 0 : assert(spdk_uuid_is_null(&sb_base_bdev->uuid) == false);
3574 :
3575 0 : if (spdk_uuid_compare(&sb_base_bdev->uuid, spdk_bdev_get_uuid(bdev)) == 0) {
3576 0 : break;
3577 : }
3578 : }
3579 :
3580 0 : if (i == sb->base_bdevs_size) {
3581 0 : SPDK_DEBUGLOG(bdev_raid, "raid superblock does not contain this bdev's uuid\n");
3582 0 : rc = -EINVAL;
3583 0 : goto out;
3584 : }
3585 :
3586 0 : if (!raid_bdev) {
3587 : struct raid_bdev_examine_others_ctx *ctx;
3588 :
3589 0 : ctx = calloc(1, sizeof(*ctx));
3590 0 : if (ctx == NULL) {
3591 0 : rc = -ENOMEM;
3592 0 : goto out;
3593 : }
3594 :
3595 0 : rc = raid_bdev_create_from_sb(sb, &raid_bdev);
3596 0 : if (rc != 0) {
3597 0 : SPDK_ERRLOG("Failed to create raid bdev %s: %s\n",
3598 : sb->name, spdk_strerror(-rc));
3599 0 : free(ctx);
3600 0 : goto out;
3601 : }
3602 :
3603 : /* after this base bdev is configured, examine other base bdevs that may be present */
3604 0 : spdk_uuid_copy(&ctx->raid_bdev_uuid, &sb->uuid);
3605 0 : ctx->cb_fn = cb_fn;
3606 0 : ctx->cb_ctx = cb_ctx;
3607 :
3608 0 : cb_fn = raid_bdev_examine_others;
3609 0 : cb_ctx = ctx;
3610 : }
3611 :
3612 0 : if (raid_bdev->state == RAID_BDEV_STATE_ONLINE) {
3613 0 : assert(sb_base_bdev->slot < raid_bdev->num_base_bdevs);
3614 0 : base_info = &raid_bdev->base_bdev_info[sb_base_bdev->slot];
3615 0 : assert(base_info->is_configured == false);
3616 0 : assert(sb_base_bdev->state == RAID_SB_BASE_BDEV_MISSING ||
3617 : sb_base_bdev->state == RAID_SB_BASE_BDEV_FAILED);
3618 0 : assert(spdk_uuid_is_null(&base_info->uuid));
3619 0 : spdk_uuid_copy(&base_info->uuid, &sb_base_bdev->uuid);
3620 0 : SPDK_NOTICELOG("Re-adding bdev %s to raid bdev %s.\n", bdev->name, raid_bdev->bdev.name);
3621 0 : rc = raid_bdev_configure_base_bdev(base_info, true, cb_fn, cb_ctx);
3622 0 : if (rc != 0) {
3623 0 : SPDK_ERRLOG("Failed to configure bdev %s as base bdev of raid %s: %s\n",
3624 : bdev->name, raid_bdev->bdev.name, spdk_strerror(-rc));
3625 : }
3626 0 : goto out;
3627 : }
3628 :
3629 0 : if (sb_base_bdev->state != RAID_SB_BASE_BDEV_CONFIGURED) {
3630 0 : SPDK_NOTICELOG("Bdev %s is not an active member of raid bdev %s. Ignoring.\n",
3631 : bdev->name, raid_bdev->bdev.name);
3632 0 : rc = -EINVAL;
3633 0 : goto out;
3634 : }
3635 :
3636 0 : base_info = NULL;
3637 0 : RAID_FOR_EACH_BASE_BDEV(raid_bdev, iter) {
3638 0 : if (spdk_uuid_compare(&iter->uuid, spdk_bdev_get_uuid(bdev)) == 0) {
3639 0 : base_info = iter;
3640 0 : break;
3641 : }
3642 : }
3643 :
3644 0 : if (base_info == NULL) {
3645 0 : SPDK_ERRLOG("Bdev %s is not a member of raid bdev %s\n",
3646 : bdev->name, raid_bdev->bdev.name);
3647 0 : rc = -EINVAL;
3648 0 : goto out;
3649 : }
3650 :
3651 0 : rc = raid_bdev_configure_base_bdev(base_info, true, cb_fn, cb_ctx);
3652 0 : if (rc != 0) {
3653 0 : SPDK_ERRLOG("Failed to configure bdev %s as base bdev of raid %s: %s\n",
3654 : bdev->name, raid_bdev->bdev.name, spdk_strerror(-rc));
3655 : }
3656 0 : out:
3657 0 : if (rc != 0 && cb_fn != 0) {
3658 0 : cb_fn(cb_ctx, rc);
3659 : }
3660 0 : }
3661 :
3662 : struct raid_bdev_examine_ctx {
3663 : struct spdk_bdev_desc *desc;
3664 : struct spdk_io_channel *ch;
3665 : raid_bdev_examine_load_sb_cb cb;
3666 : void *cb_ctx;
3667 : };
3668 :
3669 : static void
3670 0 : raid_bdev_examine_ctx_free(struct raid_bdev_examine_ctx *ctx)
3671 : {
3672 0 : if (!ctx) {
3673 0 : return;
3674 : }
3675 :
3676 0 : if (ctx->ch) {
3677 0 : spdk_put_io_channel(ctx->ch);
3678 : }
3679 :
3680 0 : if (ctx->desc) {
3681 0 : spdk_bdev_close(ctx->desc);
3682 : }
3683 :
3684 0 : free(ctx);
3685 : }
3686 :
3687 : static void
3688 0 : raid_bdev_examine_load_sb_done(const struct raid_bdev_superblock *sb, int status, void *_ctx)
3689 : {
3690 0 : struct raid_bdev_examine_ctx *ctx = _ctx;
3691 0 : struct spdk_bdev *bdev = spdk_bdev_desc_get_bdev(ctx->desc);
3692 :
3693 0 : ctx->cb(bdev, sb, status, ctx->cb_ctx);
3694 :
3695 0 : raid_bdev_examine_ctx_free(ctx);
3696 0 : }
3697 :
3698 : static void
3699 0 : raid_bdev_examine_event_cb(enum spdk_bdev_event_type type, struct spdk_bdev *bdev, void *event_ctx)
3700 : {
3701 0 : }
3702 :
3703 : static int
3704 0 : raid_bdev_examine_load_sb(const char *bdev_name, raid_bdev_examine_load_sb_cb cb, void *cb_ctx)
3705 : {
3706 : struct raid_bdev_examine_ctx *ctx;
3707 : int rc;
3708 :
3709 0 : assert(cb != NULL);
3710 :
3711 0 : ctx = calloc(1, sizeof(*ctx));
3712 0 : if (!ctx) {
3713 0 : return -ENOMEM;
3714 : }
3715 :
3716 0 : rc = spdk_bdev_open_ext(bdev_name, false, raid_bdev_examine_event_cb, NULL, &ctx->desc);
3717 0 : if (rc) {
3718 0 : SPDK_ERRLOG("Failed to open bdev %s: %s\n", bdev_name, spdk_strerror(-rc));
3719 0 : goto err;
3720 : }
3721 :
3722 0 : ctx->ch = spdk_bdev_get_io_channel(ctx->desc);
3723 0 : if (!ctx->ch) {
3724 0 : SPDK_ERRLOG("Failed to get io channel for bdev %s\n", bdev_name);
3725 0 : rc = -ENOMEM;
3726 0 : goto err;
3727 : }
3728 :
3729 0 : ctx->cb = cb;
3730 0 : ctx->cb_ctx = cb_ctx;
3731 :
3732 0 : rc = raid_bdev_load_base_bdev_superblock(ctx->desc, ctx->ch, raid_bdev_examine_load_sb_done, ctx);
3733 0 : if (rc) {
3734 0 : SPDK_ERRLOG("Failed to read bdev %s superblock: %s\n",
3735 : bdev_name, spdk_strerror(-rc));
3736 0 : goto err;
3737 : }
3738 :
3739 0 : return 0;
3740 0 : err:
3741 0 : raid_bdev_examine_ctx_free(ctx);
3742 0 : return rc;
3743 : }
3744 :
3745 : static void
3746 0 : raid_bdev_examine_cont(struct spdk_bdev *bdev, const struct raid_bdev_superblock *sb, int status,
3747 : void *ctx)
3748 : {
3749 0 : switch (status) {
3750 0 : case 0:
3751 : /* valid superblock found */
3752 0 : SPDK_DEBUGLOG(bdev_raid, "raid superblock found on bdev %s\n", bdev->name);
3753 0 : raid_bdev_examine_sb(sb, bdev, NULL, NULL);
3754 0 : break;
3755 0 : case -EINVAL:
3756 : /* no valid superblock, check if it can be claimed anyway */
3757 0 : raid_bdev_examine_no_sb(bdev);
3758 0 : break;
3759 0 : default:
3760 0 : SPDK_ERRLOG("Failed to examine bdev %s: %s\n",
3761 : bdev->name, spdk_strerror(-status));
3762 0 : break;
3763 : }
3764 :
3765 0 : spdk_bdev_module_examine_done(&g_raid_if);
3766 0 : }
3767 :
3768 : /*
3769 : * brief:
3770 : * raid_bdev_examine function is the examine function call by the below layers
3771 : * like bdev_nvme layer. This function will check if this base bdev can be
3772 : * claimed by this raid bdev or not.
3773 : * params:
3774 : * bdev - pointer to base bdev
3775 : * returns:
3776 : * none
3777 : */
3778 : static void
3779 0 : raid_bdev_examine(struct spdk_bdev *bdev)
3780 : {
3781 : int rc;
3782 :
3783 0 : if (raid_bdev_find_base_info_by_bdev(bdev) != NULL) {
3784 0 : goto done;
3785 : }
3786 :
3787 0 : if (spdk_bdev_get_dif_type(bdev) != SPDK_DIF_DISABLE) {
3788 0 : raid_bdev_examine_no_sb(bdev);
3789 0 : goto done;
3790 : }
3791 :
3792 0 : rc = raid_bdev_examine_load_sb(bdev->name, raid_bdev_examine_cont, NULL);
3793 0 : if (rc != 0) {
3794 0 : SPDK_ERRLOG("Failed to examine bdev %s: %s\n",
3795 : bdev->name, spdk_strerror(-rc));
3796 0 : goto done;
3797 : }
3798 :
3799 0 : return;
3800 0 : done:
3801 0 : spdk_bdev_module_examine_done(&g_raid_if);
3802 : }
3803 :
3804 : /* Log component for bdev raid bdev module */
3805 1 : SPDK_LOG_REGISTER_COMPONENT(bdev_raid)
|