Line data Source code
1 : /* SPDX-License-Identifier: BSD-3-Clause
2 : * Copyright (C) 2017 Intel Corporation.
3 : * All rights reserved.
4 : * Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
5 : */
6 :
7 : #include "spdk/stdinc.h"
8 :
9 : #include "spdk/blob.h"
10 : #include "spdk/crc32.h"
11 : #include "spdk/env.h"
12 : #include "spdk/queue.h"
13 : #include "spdk/thread.h"
14 : #include "spdk/bit_array.h"
15 : #include "spdk/bit_pool.h"
16 : #include "spdk/likely.h"
17 : #include "spdk/util.h"
18 : #include "spdk/string.h"
19 :
20 : #include "spdk_internal/assert.h"
21 : #include "spdk/log.h"
22 :
23 : #include "blobstore.h"
24 :
25 : #define BLOB_CRC32C_INITIAL 0xffffffffUL
26 :
27 : static int bs_register_md_thread(struct spdk_blob_store *bs);
28 : static int bs_unregister_md_thread(struct spdk_blob_store *bs);
29 : static void blob_close_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno);
30 : static void blob_insert_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
31 : uint64_t cluster, uint32_t extent, struct spdk_blob_md_page *page,
32 : spdk_blob_op_complete cb_fn, void *cb_arg);
33 : static void blob_free_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
34 : uint32_t extent_page, struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg);
35 :
36 : static int blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
37 : uint16_t value_len, bool internal);
38 : static int blob_get_xattr_value(struct spdk_blob *blob, const char *name,
39 : const void **value, size_t *value_len, bool internal);
40 : static int blob_remove_xattr(struct spdk_blob *blob, const char *name, bool internal);
41 :
42 : static void blob_write_extent_page(struct spdk_blob *blob, uint32_t extent, uint64_t cluster_num,
43 : struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg);
44 : static void blob_freeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg);
45 :
46 : static void bs_shallow_copy_cluster_find_next(void *cb_arg);
47 :
48 : /*
49 : * External snapshots require a channel per thread per esnap bdev. The tree
50 : * is populated lazily as blob IOs are handled by the back_bs_dev. When this
51 : * channel is destroyed, all the channels in the tree are destroyed.
52 : */
53 :
54 : struct blob_esnap_channel {
55 : RB_ENTRY(blob_esnap_channel) node;
56 : spdk_blob_id blob_id;
57 : struct spdk_io_channel *channel;
58 : };
59 :
60 : static int blob_esnap_channel_compare(struct blob_esnap_channel *c1, struct blob_esnap_channel *c2);
61 : static void blob_esnap_destroy_bs_dev_channels(struct spdk_blob *blob, bool abort_io,
62 : spdk_blob_op_with_handle_complete cb_fn, void *cb_arg);
63 : static void blob_esnap_destroy_bs_channel(struct spdk_bs_channel *ch);
64 : static void blob_set_back_bs_dev_frozen(void *_ctx, int bserrno);
65 10245 : RB_GENERATE_STATIC(blob_esnap_channel_tree, blob_esnap_channel, node, blob_esnap_channel_compare)
66 :
67 : static inline bool
68 49582 : blob_is_esnap_clone(const struct spdk_blob *blob)
69 : {
70 49582 : assert(blob != NULL);
71 49582 : return !!(blob->invalid_flags & SPDK_BLOB_EXTERNAL_SNAPSHOT);
72 : }
73 :
74 : static int
75 2289 : blob_id_cmp(struct spdk_blob *blob1, struct spdk_blob *blob2)
76 : {
77 2289 : assert(blob1 != NULL && blob2 != NULL);
78 2289 : return (blob1->id < blob2->id ? -1 : blob1->id > blob2->id);
79 : }
80 :
81 14741 : RB_GENERATE_STATIC(spdk_blob_tree, spdk_blob, link, blob_id_cmp);
82 :
83 : static void
84 37031 : blob_verify_md_op(struct spdk_blob *blob)
85 : {
86 37031 : assert(blob != NULL);
87 37031 : assert(spdk_get_thread() == blob->bs->md_thread);
88 37031 : assert(blob->state != SPDK_BLOB_STATE_LOADING);
89 37031 : }
90 :
91 : static struct spdk_blob_list *
92 3828 : bs_get_snapshot_entry(struct spdk_blob_store *bs, spdk_blob_id blobid)
93 : {
94 3828 : struct spdk_blob_list *snapshot_entry = NULL;
95 :
96 4808 : TAILQ_FOREACH(snapshot_entry, &bs->snapshots, link) {
97 1756 : if (snapshot_entry->id == blobid) {
98 776 : break;
99 : }
100 : }
101 :
102 3828 : return snapshot_entry;
103 : }
104 :
105 : static void
106 2904 : bs_claim_md_page(struct spdk_blob_store *bs, uint32_t page)
107 : {
108 2904 : assert(spdk_spin_held(&bs->used_lock));
109 2904 : assert(page < spdk_bit_array_capacity(bs->used_md_pages));
110 2904 : assert(spdk_bit_array_get(bs->used_md_pages, page) == false);
111 :
112 2904 : spdk_bit_array_set(bs->used_md_pages, page);
113 2904 : }
114 :
115 : static void
116 2200 : bs_release_md_page(struct spdk_blob_store *bs, uint32_t page)
117 : {
118 2200 : assert(spdk_spin_held(&bs->used_lock));
119 2200 : assert(page < spdk_bit_array_capacity(bs->used_md_pages));
120 2200 : assert(spdk_bit_array_get(bs->used_md_pages, page) == true);
121 :
122 2200 : spdk_bit_array_clear(bs->used_md_pages, page);
123 2200 : }
124 :
125 : static uint32_t
126 8220 : bs_claim_cluster(struct spdk_blob_store *bs)
127 : {
128 : uint32_t cluster_num;
129 :
130 8220 : assert(spdk_spin_held(&bs->used_lock));
131 :
132 8220 : cluster_num = spdk_bit_pool_allocate_bit(bs->used_clusters);
133 8220 : if (cluster_num == UINT32_MAX) {
134 0 : return UINT32_MAX;
135 : }
136 :
137 8220 : SPDK_DEBUGLOG(blob, "Claiming cluster %u\n", cluster_num);
138 8220 : bs->num_free_clusters--;
139 :
140 8220 : return cluster_num;
141 : }
142 :
143 : static void
144 2399 : bs_release_cluster(struct spdk_blob_store *bs, uint32_t cluster_num)
145 : {
146 2399 : assert(spdk_spin_held(&bs->used_lock));
147 2399 : assert(cluster_num < spdk_bit_pool_capacity(bs->used_clusters));
148 2399 : assert(spdk_bit_pool_is_allocated(bs->used_clusters, cluster_num) == true);
149 2399 : assert(bs->num_free_clusters < bs->total_clusters);
150 :
151 2399 : SPDK_DEBUGLOG(blob, "Releasing cluster %u\n", cluster_num);
152 :
153 2399 : spdk_bit_pool_free_bit(bs->used_clusters, cluster_num);
154 2399 : bs->num_free_clusters++;
155 2399 : }
156 :
157 : static int
158 8220 : blob_insert_cluster(struct spdk_blob *blob, uint32_t cluster_num, uint64_t cluster)
159 : {
160 8220 : uint64_t *cluster_lba = &blob->active.clusters[cluster_num];
161 :
162 8220 : blob_verify_md_op(blob);
163 :
164 8220 : if (*cluster_lba != 0) {
165 4 : return -EEXIST;
166 : }
167 :
168 8216 : *cluster_lba = bs_cluster_to_lba(blob->bs, cluster);
169 8216 : blob->active.num_allocated_clusters++;
170 :
171 8216 : return 0;
172 : }
173 :
174 : static int
175 8220 : bs_allocate_cluster(struct spdk_blob *blob, uint32_t cluster_num,
176 : uint64_t *cluster, uint32_t *lowest_free_md_page, bool update_map)
177 : {
178 8220 : uint32_t *extent_page = 0;
179 :
180 8220 : assert(spdk_spin_held(&blob->bs->used_lock));
181 :
182 8220 : *cluster = bs_claim_cluster(blob->bs);
183 8220 : if (*cluster == UINT32_MAX) {
184 : /* No more free clusters. Cannot satisfy the request */
185 0 : return -ENOSPC;
186 : }
187 :
188 8220 : if (blob->use_extent_table) {
189 4168 : extent_page = bs_cluster_to_extent_page(blob, cluster_num);
190 4168 : if (*extent_page == 0) {
191 : /* Extent page shall never occupy md_page so start the search from 1 */
192 728 : if (*lowest_free_md_page == 0) {
193 726 : *lowest_free_md_page = 1;
194 : }
195 : /* No extent_page is allocated for the cluster */
196 728 : *lowest_free_md_page = spdk_bit_array_find_first_clear(blob->bs->used_md_pages,
197 : *lowest_free_md_page);
198 728 : if (*lowest_free_md_page == UINT32_MAX) {
199 : /* No more free md pages. Cannot satisfy the request */
200 0 : bs_release_cluster(blob->bs, *cluster);
201 0 : return -ENOSPC;
202 : }
203 728 : bs_claim_md_page(blob->bs, *lowest_free_md_page);
204 : }
205 : }
206 :
207 8220 : SPDK_DEBUGLOG(blob, "Claiming cluster %" PRIu64 " for blob 0x%" PRIx64 "\n", *cluster,
208 : blob->id);
209 :
210 8220 : if (update_map) {
211 7404 : blob_insert_cluster(blob, cluster_num, *cluster);
212 7404 : if (blob->use_extent_table && *extent_page == 0) {
213 644 : *extent_page = *lowest_free_md_page;
214 : }
215 : }
216 :
217 8220 : return 0;
218 : }
219 :
220 : static void
221 5582 : blob_xattrs_init(struct spdk_blob_xattr_opts *xattrs)
222 : {
223 5582 : xattrs->count = 0;
224 5582 : xattrs->names = NULL;
225 5582 : xattrs->ctx = NULL;
226 5582 : xattrs->get_value = NULL;
227 5582 : }
228 :
229 : void
230 3688 : spdk_blob_opts_init(struct spdk_blob_opts *opts, size_t opts_size)
231 : {
232 3688 : if (!opts) {
233 0 : SPDK_ERRLOG("opts should not be NULL\n");
234 0 : return;
235 : }
236 :
237 3688 : if (!opts_size) {
238 0 : SPDK_ERRLOG("opts_size should not be zero value\n");
239 0 : return;
240 : }
241 :
242 3688 : memset(opts, 0, opts_size);
243 3688 : opts->opts_size = opts_size;
244 :
245 : #define FIELD_OK(field) \
246 : offsetof(struct spdk_blob_opts, field) + sizeof(opts->field) <= opts_size
247 :
248 : #define SET_FIELD(field, value) \
249 : if (FIELD_OK(field)) { \
250 : opts->field = value; \
251 : } \
252 :
253 3688 : SET_FIELD(num_clusters, 0);
254 3688 : SET_FIELD(thin_provision, false);
255 3688 : SET_FIELD(clear_method, BLOB_CLEAR_WITH_DEFAULT);
256 :
257 3688 : if (FIELD_OK(xattrs)) {
258 3688 : blob_xattrs_init(&opts->xattrs);
259 : }
260 :
261 3688 : SET_FIELD(use_extent_table, true);
262 :
263 : #undef FIELD_OK
264 : #undef SET_FIELD
265 : }
266 :
267 : void
268 3478 : spdk_blob_open_opts_init(struct spdk_blob_open_opts *opts, size_t opts_size)
269 : {
270 3478 : if (!opts) {
271 0 : SPDK_ERRLOG("opts should not be NULL\n");
272 0 : return;
273 : }
274 :
275 3478 : if (!opts_size) {
276 0 : SPDK_ERRLOG("opts_size should not be zero value\n");
277 0 : return;
278 : }
279 :
280 3478 : memset(opts, 0, opts_size);
281 3478 : opts->opts_size = opts_size;
282 :
283 : #define FIELD_OK(field) \
284 : offsetof(struct spdk_blob_open_opts, field) + sizeof(opts->field) <= opts_size
285 :
286 : #define SET_FIELD(field, value) \
287 : if (FIELD_OK(field)) { \
288 : opts->field = value; \
289 : } \
290 :
291 3478 : SET_FIELD(clear_method, BLOB_CLEAR_WITH_DEFAULT);
292 :
293 : #undef FIELD_OK
294 : #undef SET_FILED
295 : }
296 :
297 : static struct spdk_blob *
298 5368 : blob_alloc(struct spdk_blob_store *bs, spdk_blob_id id)
299 : {
300 : struct spdk_blob *blob;
301 :
302 5368 : blob = calloc(1, sizeof(*blob));
303 5368 : if (!blob) {
304 0 : return NULL;
305 : }
306 :
307 5368 : blob->id = id;
308 5368 : blob->bs = bs;
309 :
310 5368 : blob->parent_id = SPDK_BLOBID_INVALID;
311 :
312 5368 : blob->state = SPDK_BLOB_STATE_DIRTY;
313 5368 : blob->extent_rle_found = false;
314 5368 : blob->extent_table_found = false;
315 5368 : blob->active.num_pages = 1;
316 5368 : blob->active.pages = calloc(1, sizeof(*blob->active.pages));
317 5368 : if (!blob->active.pages) {
318 0 : free(blob);
319 0 : return NULL;
320 : }
321 :
322 5368 : blob->active.pages[0] = bs_blobid_to_page(id);
323 :
324 5368 : TAILQ_INIT(&blob->xattrs);
325 5368 : TAILQ_INIT(&blob->xattrs_internal);
326 5368 : TAILQ_INIT(&blob->pending_persists);
327 5368 : TAILQ_INIT(&blob->persists_to_complete);
328 :
329 5368 : return blob;
330 : }
331 :
332 : static void
333 10736 : xattrs_free(struct spdk_xattr_tailq *xattrs)
334 : {
335 : struct spdk_xattr *xattr, *xattr_tmp;
336 :
337 12502 : TAILQ_FOREACH_SAFE(xattr, xattrs, link, xattr_tmp) {
338 1766 : TAILQ_REMOVE(xattrs, xattr, link);
339 1766 : free(xattr->name);
340 1766 : free(xattr->value);
341 1766 : free(xattr);
342 : }
343 10736 : }
344 :
345 : static void
346 1116 : blob_back_bs_dev_unref(struct spdk_blob *blob)
347 : {
348 1116 : struct spdk_blob **le_prev = blob->back_bs_dev_link.le_prev;
349 1116 : struct spdk_blob *le_next = blob->back_bs_dev_link.le_next;
350 :
351 1116 : if (!le_next && !le_prev) {
352 : /* If this is the last reference to the back_bs_dev, destroy it. */
353 1112 : blob->back_bs_dev->destroy(blob->back_bs_dev);
354 : } else {
355 : /* Remove the reference to back_bs_dev. This is a headless list, in
356 : * which le_prev of first item is NULL. So, we remove the link manually
357 : * instead of using LIST_REMOVE, which assumes le_prev isn't NULL.
358 : */
359 4 : if (le_prev) {
360 0 : *le_prev = le_next;
361 : }
362 :
363 4 : if (le_next) {
364 4 : le_next->back_bs_dev_link.le_prev = le_prev;
365 : }
366 : }
367 :
368 1116 : blob->back_bs_dev = NULL;
369 1116 : }
370 :
371 : static void
372 5368 : blob_free(struct spdk_blob *blob)
373 : {
374 5368 : assert(blob != NULL);
375 5368 : assert(TAILQ_EMPTY(&blob->pending_persists));
376 5368 : assert(TAILQ_EMPTY(&blob->persists_to_complete));
377 :
378 5368 : free(blob->active.extent_pages);
379 5368 : free(blob->clean.extent_pages);
380 5368 : free(blob->active.clusters);
381 5368 : free(blob->clean.clusters);
382 5368 : free(blob->active.pages);
383 5368 : free(blob->clean.pages);
384 :
385 5368 : xattrs_free(&blob->xattrs);
386 5368 : xattrs_free(&blob->xattrs_internal);
387 :
388 5368 : if (blob->back_bs_dev) {
389 1088 : blob_back_bs_dev_unref(blob);
390 : }
391 :
392 5368 : free(blob);
393 5368 : }
394 :
395 : static void
396 328 : blob_back_bs_destroy_esnap_done(void *ctx, struct spdk_blob *blob, int bserrno)
397 : {
398 328 : struct spdk_bs_dev *bs_dev = ctx;
399 :
400 328 : if (bserrno != 0) {
401 : /*
402 : * This is probably due to a memory allocation failure when creating the
403 : * blob_esnap_destroy_ctx before iterating threads.
404 : */
405 0 : SPDK_ERRLOG("blob 0x%" PRIx64 ": Unable to destroy bs dev channels: error %d\n",
406 : blob->id, bserrno);
407 0 : assert(false);
408 : }
409 :
410 328 : if (bs_dev == NULL) {
411 : /*
412 : * This check exists to make scanbuild happy.
413 : *
414 : * blob->back_bs_dev for an esnap is NULL during the first iteration of blobs while
415 : * the blobstore is being loaded. It could also be NULL if there was an error
416 : * opening the esnap device. In each of these cases, no channels could have been
417 : * created because back_bs_dev->create_channel() would have led to a NULL pointer
418 : * deref.
419 : */
420 0 : assert(false);
421 : return;
422 : }
423 :
424 328 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": calling destroy on back_bs_dev\n", blob->id);
425 328 : bs_dev->destroy(bs_dev);
426 : }
427 :
428 : static void
429 328 : blob_back_bs_destroy(struct spdk_blob *blob)
430 : {
431 328 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": preparing to destroy back_bs_dev\n",
432 : blob->id);
433 :
434 328 : blob_esnap_destroy_bs_dev_channels(blob, false, blob_back_bs_destroy_esnap_done,
435 328 : blob->back_bs_dev);
436 328 : blob->back_bs_dev = NULL;
437 328 : }
438 :
439 : struct blob_parent {
440 : union {
441 : struct {
442 : spdk_blob_id id;
443 : struct spdk_blob *blob;
444 : } snapshot;
445 :
446 : struct {
447 : void *id;
448 : uint32_t id_len;
449 : struct spdk_bs_dev *back_bs_dev;
450 : } esnap;
451 : } u;
452 : };
453 :
454 : typedef int (*set_parent_refs_cb)(struct spdk_blob *blob, struct blob_parent *parent);
455 :
456 : struct set_bs_dev_ctx {
457 : struct spdk_blob *blob;
458 : struct spdk_bs_dev *back_bs_dev;
459 :
460 : /*
461 : * This callback is used during a set parent operation to change the references
462 : * to the parent of the blob.
463 : */
464 : set_parent_refs_cb parent_refs_cb_fn;
465 : struct blob_parent *parent_refs_cb_arg;
466 :
467 : spdk_blob_op_complete cb_fn;
468 : void *cb_arg;
469 : int bserrno;
470 : };
471 :
472 : static void
473 28 : blob_set_back_bs_dev(struct spdk_blob *blob, struct spdk_bs_dev *back_bs_dev,
474 : set_parent_refs_cb parent_refs_cb_fn, struct blob_parent *parent_refs_cb_arg,
475 : spdk_blob_op_complete cb_fn, void *cb_arg)
476 : {
477 : struct set_bs_dev_ctx *ctx;
478 :
479 28 : ctx = calloc(1, sizeof(*ctx));
480 28 : if (ctx == NULL) {
481 0 : SPDK_ERRLOG("blob 0x%" PRIx64 ": out of memory while setting back_bs_dev\n",
482 : blob->id);
483 0 : cb_fn(cb_arg, -ENOMEM);
484 0 : return;
485 : }
486 :
487 28 : ctx->parent_refs_cb_fn = parent_refs_cb_fn;
488 28 : ctx->parent_refs_cb_arg = parent_refs_cb_arg;
489 28 : ctx->cb_fn = cb_fn;
490 28 : ctx->cb_arg = cb_arg;
491 28 : ctx->back_bs_dev = back_bs_dev;
492 28 : ctx->blob = blob;
493 :
494 28 : blob_freeze_io(blob, blob_set_back_bs_dev_frozen, ctx);
495 : }
496 :
497 : struct freeze_io_ctx {
498 : struct spdk_bs_cpl cpl;
499 : struct spdk_blob *blob;
500 : };
501 :
502 : static void
503 530 : blob_io_sync(struct spdk_io_channel_iter *i)
504 : {
505 530 : spdk_for_each_channel_continue(i, 0);
506 530 : }
507 :
508 : static void
509 518 : blob_execute_queued_io(struct spdk_io_channel_iter *i)
510 : {
511 518 : struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
512 518 : struct spdk_bs_channel *ch = spdk_io_channel_get_ctx(_ch);
513 518 : struct freeze_io_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
514 : struct spdk_bs_request_set *set;
515 : struct spdk_bs_user_op_args *args;
516 : spdk_bs_user_op_t *op, *tmp;
517 :
518 522 : TAILQ_FOREACH_SAFE(op, &ch->queued_io, link, tmp) {
519 4 : set = (struct spdk_bs_request_set *)op;
520 4 : args = &set->u.user_op;
521 :
522 4 : if (args->blob == ctx->blob) {
523 4 : TAILQ_REMOVE(&ch->queued_io, op, link);
524 4 : bs_user_op_execute(op);
525 : }
526 : }
527 :
528 518 : spdk_for_each_channel_continue(i, 0);
529 518 : }
530 :
531 : static void
532 1016 : blob_io_cpl(struct spdk_io_channel_iter *i, int status)
533 : {
534 1016 : struct freeze_io_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
535 :
536 1016 : ctx->cpl.u.blob_basic.cb_fn(ctx->cpl.u.blob_basic.cb_arg, 0);
537 :
538 1016 : free(ctx);
539 1016 : }
540 :
541 : static void
542 514 : blob_freeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
543 : {
544 : struct freeze_io_ctx *ctx;
545 :
546 514 : blob_verify_md_op(blob);
547 :
548 514 : ctx = calloc(1, sizeof(*ctx));
549 514 : if (!ctx) {
550 0 : cb_fn(cb_arg, -ENOMEM);
551 0 : return;
552 : }
553 :
554 514 : ctx->cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
555 514 : ctx->cpl.u.blob_basic.cb_fn = cb_fn;
556 514 : ctx->cpl.u.blob_basic.cb_arg = cb_arg;
557 514 : ctx->blob = blob;
558 :
559 : /* Freeze I/O on blob */
560 514 : blob->frozen_refcnt++;
561 :
562 514 : spdk_for_each_channel(blob->bs, blob_io_sync, ctx, blob_io_cpl);
563 : }
564 :
565 : static void
566 502 : blob_unfreeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
567 : {
568 : struct freeze_io_ctx *ctx;
569 :
570 502 : blob_verify_md_op(blob);
571 :
572 502 : ctx = calloc(1, sizeof(*ctx));
573 502 : if (!ctx) {
574 0 : cb_fn(cb_arg, -ENOMEM);
575 0 : return;
576 : }
577 :
578 502 : ctx->cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
579 502 : ctx->cpl.u.blob_basic.cb_fn = cb_fn;
580 502 : ctx->cpl.u.blob_basic.cb_arg = cb_arg;
581 502 : ctx->blob = blob;
582 :
583 502 : assert(blob->frozen_refcnt > 0);
584 :
585 502 : blob->frozen_refcnt--;
586 :
587 502 : spdk_for_each_channel(blob->bs, blob_execute_queued_io, ctx, blob_io_cpl);
588 : }
589 :
590 : static int
591 8474 : blob_mark_clean(struct spdk_blob *blob)
592 : {
593 8474 : uint32_t *extent_pages = NULL;
594 8474 : uint64_t *clusters = NULL;
595 8474 : uint32_t *pages = NULL;
596 :
597 8474 : assert(blob != NULL);
598 :
599 8474 : if (blob->active.num_extent_pages) {
600 2859 : assert(blob->active.extent_pages);
601 2859 : extent_pages = calloc(blob->active.num_extent_pages, sizeof(*blob->active.extent_pages));
602 2859 : if (!extent_pages) {
603 0 : return -ENOMEM;
604 : }
605 2859 : memcpy(extent_pages, blob->active.extent_pages,
606 2859 : blob->active.num_extent_pages * sizeof(*extent_pages));
607 : }
608 :
609 8474 : if (blob->active.num_clusters) {
610 5946 : assert(blob->active.clusters);
611 5946 : clusters = calloc(blob->active.num_clusters, sizeof(*blob->active.clusters));
612 5946 : if (!clusters) {
613 0 : free(extent_pages);
614 0 : return -ENOMEM;
615 : }
616 5946 : memcpy(clusters, blob->active.clusters, blob->active.num_clusters * sizeof(*blob->active.clusters));
617 : }
618 :
619 8474 : if (blob->active.num_pages) {
620 6986 : assert(blob->active.pages);
621 6986 : pages = calloc(blob->active.num_pages, sizeof(*blob->active.pages));
622 6986 : if (!pages) {
623 0 : free(extent_pages);
624 0 : free(clusters);
625 0 : return -ENOMEM;
626 : }
627 6986 : memcpy(pages, blob->active.pages, blob->active.num_pages * sizeof(*blob->active.pages));
628 : }
629 :
630 8474 : free(blob->clean.extent_pages);
631 8474 : free(blob->clean.clusters);
632 8474 : free(blob->clean.pages);
633 :
634 8474 : blob->clean.num_extent_pages = blob->active.num_extent_pages;
635 8474 : blob->clean.extent_pages = blob->active.extent_pages;
636 8474 : blob->clean.num_clusters = blob->active.num_clusters;
637 8474 : blob->clean.clusters = blob->active.clusters;
638 8474 : blob->clean.num_allocated_clusters = blob->active.num_allocated_clusters;
639 8474 : blob->clean.num_pages = blob->active.num_pages;
640 8474 : blob->clean.pages = blob->active.pages;
641 :
642 8474 : blob->active.extent_pages = extent_pages;
643 8474 : blob->active.clusters = clusters;
644 8474 : blob->active.pages = pages;
645 :
646 : /* If the metadata was dirtied again while the metadata was being written to disk,
647 : * we do not want to revert the DIRTY state back to CLEAN here.
648 : */
649 8474 : if (blob->state == SPDK_BLOB_STATE_LOADING) {
650 3410 : blob->state = SPDK_BLOB_STATE_CLEAN;
651 : }
652 :
653 8474 : return 0;
654 : }
655 :
656 : static int
657 1284 : blob_deserialize_xattr(struct spdk_blob *blob,
658 : struct spdk_blob_md_descriptor_xattr *desc_xattr, bool internal)
659 : {
660 : struct spdk_xattr *xattr;
661 :
662 1284 : if (desc_xattr->length != sizeof(desc_xattr->name_length) +
663 : sizeof(desc_xattr->value_length) +
664 1284 : desc_xattr->name_length + desc_xattr->value_length) {
665 0 : return -EINVAL;
666 : }
667 :
668 1284 : xattr = calloc(1, sizeof(*xattr));
669 1284 : if (xattr == NULL) {
670 0 : return -ENOMEM;
671 : }
672 :
673 1284 : xattr->name = malloc(desc_xattr->name_length + 1);
674 1284 : if (xattr->name == NULL) {
675 0 : free(xattr);
676 0 : return -ENOMEM;
677 : }
678 :
679 1284 : xattr->value = malloc(desc_xattr->value_length);
680 1284 : if (xattr->value == NULL) {
681 0 : free(xattr->name);
682 0 : free(xattr);
683 0 : return -ENOMEM;
684 : }
685 :
686 1284 : memcpy(xattr->name, desc_xattr->name, desc_xattr->name_length);
687 1284 : xattr->name[desc_xattr->name_length] = '\0';
688 1284 : xattr->value_len = desc_xattr->value_length;
689 1284 : memcpy(xattr->value,
690 1284 : (void *)((uintptr_t)desc_xattr->name + desc_xattr->name_length),
691 1284 : desc_xattr->value_length);
692 :
693 1284 : TAILQ_INSERT_TAIL(internal ? &blob->xattrs_internal : &blob->xattrs, xattr, link);
694 :
695 1284 : return 0;
696 : }
697 :
698 :
699 : static int
700 4588 : blob_parse_page(const struct spdk_blob_md_page *page, struct spdk_blob *blob)
701 : {
702 : struct spdk_blob_md_descriptor *desc;
703 4588 : size_t cur_desc = 0;
704 : void *tmp;
705 :
706 4588 : desc = (struct spdk_blob_md_descriptor *)page->descriptors;
707 13476 : while (cur_desc < sizeof(page->descriptors)) {
708 13476 : if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
709 4540 : if (desc->length == 0) {
710 : /* If padding and length are 0, this terminates the page */
711 4540 : break;
712 : }
713 8936 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
714 : struct spdk_blob_md_descriptor_flags *desc_flags;
715 :
716 3442 : desc_flags = (struct spdk_blob_md_descriptor_flags *)desc;
717 :
718 3442 : if (desc_flags->length != sizeof(*desc_flags) - sizeof(*desc)) {
719 0 : return -EINVAL;
720 : }
721 :
722 3442 : if ((desc_flags->invalid_flags | SPDK_BLOB_INVALID_FLAGS_MASK) !=
723 : SPDK_BLOB_INVALID_FLAGS_MASK) {
724 8 : return -EINVAL;
725 : }
726 :
727 3434 : if ((desc_flags->data_ro_flags | SPDK_BLOB_DATA_RO_FLAGS_MASK) !=
728 : SPDK_BLOB_DATA_RO_FLAGS_MASK) {
729 12 : blob->data_ro = true;
730 12 : blob->md_ro = true;
731 : }
732 :
733 3434 : if ((desc_flags->md_ro_flags | SPDK_BLOB_MD_RO_FLAGS_MASK) !=
734 : SPDK_BLOB_MD_RO_FLAGS_MASK) {
735 12 : blob->md_ro = true;
736 : }
737 :
738 3434 : if ((desc_flags->data_ro_flags & SPDK_BLOB_READ_ONLY)) {
739 566 : blob->data_ro = true;
740 566 : blob->md_ro = true;
741 : }
742 :
743 3434 : blob->invalid_flags = desc_flags->invalid_flags;
744 3434 : blob->data_ro_flags = desc_flags->data_ro_flags;
745 3434 : blob->md_ro_flags = desc_flags->md_ro_flags;
746 :
747 5494 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE) {
748 : struct spdk_blob_md_descriptor_extent_rle *desc_extent_rle;
749 : unsigned int i, j;
750 1396 : unsigned int cluster_count = blob->active.num_clusters;
751 :
752 1396 : if (blob->extent_table_found) {
753 : /* Extent Table already present in the md,
754 : * both descriptors should never be at the same time. */
755 0 : return -EINVAL;
756 : }
757 1396 : blob->extent_rle_found = true;
758 :
759 1396 : desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)desc;
760 :
761 1396 : if (desc_extent_rle->length == 0 ||
762 1396 : (desc_extent_rle->length % sizeof(desc_extent_rle->extents[0]) != 0)) {
763 0 : return -EINVAL;
764 : }
765 :
766 2970 : for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
767 21282 : for (j = 0; j < desc_extent_rle->extents[i].length; j++) {
768 19708 : if (desc_extent_rle->extents[i].cluster_idx != 0) {
769 6692 : if (!spdk_bit_pool_is_allocated(blob->bs->used_clusters,
770 6692 : desc_extent_rle->extents[i].cluster_idx + j)) {
771 0 : return -EINVAL;
772 : }
773 : }
774 19708 : cluster_count++;
775 : }
776 : }
777 :
778 1396 : if (cluster_count == 0) {
779 0 : return -EINVAL;
780 : }
781 1396 : tmp = realloc(blob->active.clusters, cluster_count * sizeof(*blob->active.clusters));
782 1396 : if (tmp == NULL) {
783 0 : return -ENOMEM;
784 : }
785 1396 : blob->active.clusters = tmp;
786 1396 : blob->active.cluster_array_size = cluster_count;
787 :
788 2970 : for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
789 21282 : for (j = 0; j < desc_extent_rle->extents[i].length; j++) {
790 19708 : if (desc_extent_rle->extents[i].cluster_idx != 0) {
791 13384 : blob->active.clusters[blob->active.num_clusters++] = bs_cluster_to_lba(blob->bs,
792 6692 : desc_extent_rle->extents[i].cluster_idx + j);
793 6692 : blob->active.num_allocated_clusters++;
794 13016 : } else if (spdk_blob_is_thin_provisioned(blob)) {
795 13016 : blob->active.clusters[blob->active.num_clusters++] = 0;
796 : } else {
797 0 : return -EINVAL;
798 : }
799 : }
800 : }
801 4098 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
802 : struct spdk_blob_md_descriptor_extent_table *desc_extent_table;
803 1768 : uint32_t num_extent_pages = blob->active.num_extent_pages;
804 : uint32_t i, j;
805 : size_t extent_pages_length;
806 :
807 1768 : desc_extent_table = (struct spdk_blob_md_descriptor_extent_table *)desc;
808 1768 : extent_pages_length = desc_extent_table->length - sizeof(desc_extent_table->num_clusters);
809 :
810 1768 : if (blob->extent_rle_found) {
811 : /* This means that Extent RLE is present in MD,
812 : * both should never be at the same time. */
813 0 : return -EINVAL;
814 1768 : } else if (blob->extent_table_found &&
815 0 : desc_extent_table->num_clusters != blob->remaining_clusters_in_et) {
816 : /* Number of clusters in this ET does not match number
817 : * from previously read EXTENT_TABLE. */
818 0 : return -EINVAL;
819 : }
820 :
821 1768 : if (desc_extent_table->length == 0 ||
822 1768 : (extent_pages_length % sizeof(desc_extent_table->extent_page[0]) != 0)) {
823 0 : return -EINVAL;
824 : }
825 :
826 1768 : blob->extent_table_found = true;
827 :
828 3246 : for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
829 1478 : num_extent_pages += desc_extent_table->extent_page[i].num_pages;
830 : }
831 :
832 1768 : if (num_extent_pages > 0) {
833 1462 : tmp = realloc(blob->active.extent_pages, num_extent_pages * sizeof(uint32_t));
834 1462 : if (tmp == NULL) {
835 0 : return -ENOMEM;
836 : }
837 1462 : blob->active.extent_pages = tmp;
838 : }
839 1768 : blob->active.extent_pages_array_size = num_extent_pages;
840 :
841 1768 : blob->remaining_clusters_in_et = desc_extent_table->num_clusters;
842 :
843 : /* Extent table entries contain md page numbers for extent pages.
844 : * Zeroes represent unallocated extent pages, those are run-length-encoded.
845 : */
846 3246 : for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
847 1478 : if (desc_extent_table->extent_page[i].page_idx != 0) {
848 1052 : assert(desc_extent_table->extent_page[i].num_pages == 1);
849 1052 : blob->active.extent_pages[blob->active.num_extent_pages++] =
850 1052 : desc_extent_table->extent_page[i].page_idx;
851 426 : } else if (spdk_blob_is_thin_provisioned(blob)) {
852 852 : for (j = 0; j < desc_extent_table->extent_page[i].num_pages; j++) {
853 426 : blob->active.extent_pages[blob->active.num_extent_pages++] = 0;
854 : }
855 : } else {
856 0 : return -EINVAL;
857 : }
858 : }
859 2330 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
860 : struct spdk_blob_md_descriptor_extent_page *desc_extent;
861 : unsigned int i;
862 1046 : unsigned int cluster_count = 0;
863 : size_t cluster_idx_length;
864 :
865 1046 : if (blob->extent_rle_found) {
866 : /* This means that Extent RLE is present in MD,
867 : * both should never be at the same time. */
868 0 : return -EINVAL;
869 : }
870 :
871 1046 : desc_extent = (struct spdk_blob_md_descriptor_extent_page *)desc;
872 1046 : cluster_idx_length = desc_extent->length - sizeof(desc_extent->start_cluster_idx);
873 :
874 1046 : if (desc_extent->length <= sizeof(desc_extent->start_cluster_idx) ||
875 1046 : (cluster_idx_length % sizeof(desc_extent->cluster_idx[0]) != 0)) {
876 0 : return -EINVAL;
877 : }
878 :
879 16344 : for (i = 0; i < cluster_idx_length / sizeof(desc_extent->cluster_idx[0]); i++) {
880 15298 : if (desc_extent->cluster_idx[i] != 0) {
881 6962 : if (!spdk_bit_pool_is_allocated(blob->bs->used_clusters, desc_extent->cluster_idx[i])) {
882 0 : return -EINVAL;
883 : }
884 : }
885 15298 : cluster_count++;
886 : }
887 :
888 1046 : if (cluster_count == 0) {
889 0 : return -EINVAL;
890 : }
891 :
892 : /* When reading extent pages sequentially starting cluster idx should match
893 : * current size of a blob.
894 : * If changed to batch reading, this check shall be removed. */
895 1046 : if (desc_extent->start_cluster_idx != blob->active.num_clusters) {
896 0 : return -EINVAL;
897 : }
898 :
899 1046 : tmp = realloc(blob->active.clusters,
900 1046 : (cluster_count + blob->active.num_clusters) * sizeof(*blob->active.clusters));
901 1046 : if (tmp == NULL) {
902 0 : return -ENOMEM;
903 : }
904 1046 : blob->active.clusters = tmp;
905 1046 : blob->active.cluster_array_size = (cluster_count + blob->active.num_clusters);
906 :
907 16344 : for (i = 0; i < cluster_idx_length / sizeof(desc_extent->cluster_idx[0]); i++) {
908 15298 : if (desc_extent->cluster_idx[i] != 0) {
909 6962 : blob->active.clusters[blob->active.num_clusters++] = bs_cluster_to_lba(blob->bs,
910 : desc_extent->cluster_idx[i]);
911 6962 : blob->active.num_allocated_clusters++;
912 8336 : } else if (spdk_blob_is_thin_provisioned(blob)) {
913 8336 : blob->active.clusters[blob->active.num_clusters++] = 0;
914 : } else {
915 0 : return -EINVAL;
916 : }
917 : }
918 1046 : assert(desc_extent->start_cluster_idx + cluster_count == blob->active.num_clusters);
919 1046 : assert(blob->remaining_clusters_in_et >= cluster_count);
920 1046 : blob->remaining_clusters_in_et -= cluster_count;
921 1284 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
922 : int rc;
923 :
924 394 : rc = blob_deserialize_xattr(blob,
925 : (struct spdk_blob_md_descriptor_xattr *) desc, false);
926 394 : if (rc != 0) {
927 0 : return rc;
928 : }
929 890 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
930 : int rc;
931 :
932 890 : rc = blob_deserialize_xattr(blob,
933 : (struct spdk_blob_md_descriptor_xattr *) desc, true);
934 890 : if (rc != 0) {
935 0 : return rc;
936 : }
937 : } else {
938 : /* Unrecognized descriptor type. Do not fail - just continue to the
939 : * next descriptor. If this descriptor is associated with some feature
940 : * defined in a newer version of blobstore, that version of blobstore
941 : * should create and set an associated feature flag to specify if this
942 : * blob can be loaded or not.
943 : */
944 : }
945 :
946 : /* Advance to the next descriptor */
947 8928 : cur_desc += sizeof(*desc) + desc->length;
948 8928 : if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
949 40 : break;
950 : }
951 8888 : desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
952 : }
953 :
954 4580 : return 0;
955 : }
956 :
957 : static bool bs_load_cur_extent_page_valid(struct spdk_blob_md_page *page);
958 :
959 : static int
960 1046 : blob_parse_extent_page(struct spdk_blob_md_page *extent_page, struct spdk_blob *blob)
961 : {
962 1046 : assert(blob != NULL);
963 1046 : assert(blob->state == SPDK_BLOB_STATE_LOADING);
964 :
965 1046 : if (bs_load_cur_extent_page_valid(extent_page) == false) {
966 0 : return -ENOENT;
967 : }
968 :
969 1046 : return blob_parse_page(extent_page, blob);
970 : }
971 :
972 : static int
973 3446 : blob_parse(const struct spdk_blob_md_page *pages, uint32_t page_count,
974 : struct spdk_blob *blob)
975 : {
976 : const struct spdk_blob_md_page *page;
977 : uint32_t i;
978 : int rc;
979 : void *tmp;
980 :
981 3446 : assert(page_count > 0);
982 3446 : assert(pages[0].sequence_num == 0);
983 3446 : assert(blob != NULL);
984 3446 : assert(blob->state == SPDK_BLOB_STATE_LOADING);
985 3446 : assert(blob->active.clusters == NULL);
986 :
987 : /* The blobid provided doesn't match what's in the MD, this can
988 : * happen for example if a bogus blobid is passed in through open.
989 : */
990 3446 : if (blob->id != pages[0].id) {
991 4 : SPDK_ERRLOG("Blobid (0x%" PRIx64 ") doesn't match what's in metadata "
992 : "(0x%" PRIx64 ")\n", blob->id, pages[0].id);
993 4 : return -ENOENT;
994 : }
995 :
996 3442 : tmp = realloc(blob->active.pages, page_count * sizeof(*blob->active.pages));
997 3442 : if (!tmp) {
998 0 : return -ENOMEM;
999 : }
1000 3442 : blob->active.pages = tmp;
1001 :
1002 3442 : blob->active.pages[0] = pages[0].id;
1003 :
1004 3542 : for (i = 1; i < page_count; i++) {
1005 100 : assert(spdk_bit_array_get(blob->bs->used_md_pages, pages[i - 1].next));
1006 100 : blob->active.pages[i] = pages[i - 1].next;
1007 : }
1008 3442 : blob->active.num_pages = page_count;
1009 :
1010 6976 : for (i = 0; i < page_count; i++) {
1011 3542 : page = &pages[i];
1012 :
1013 3542 : assert(page->id == blob->id);
1014 3542 : assert(page->sequence_num == i);
1015 :
1016 3542 : rc = blob_parse_page(page, blob);
1017 3542 : if (rc != 0) {
1018 8 : return rc;
1019 : }
1020 : }
1021 :
1022 3434 : return 0;
1023 : }
1024 :
1025 : static int
1026 4370 : blob_serialize_add_page(const struct spdk_blob *blob,
1027 : struct spdk_blob_md_page **pages,
1028 : uint32_t *page_count,
1029 : struct spdk_blob_md_page **last_page)
1030 : {
1031 : struct spdk_blob_md_page *page, *tmp_pages;
1032 :
1033 4370 : assert(pages != NULL);
1034 4370 : assert(page_count != NULL);
1035 :
1036 4370 : *last_page = NULL;
1037 4370 : if (*page_count == 0) {
1038 4282 : assert(*pages == NULL);
1039 4282 : *pages = spdk_malloc(SPDK_BS_PAGE_SIZE, 0,
1040 : NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
1041 4282 : if (*pages == NULL) {
1042 0 : return -ENOMEM;
1043 : }
1044 4282 : *page_count = 1;
1045 : } else {
1046 88 : assert(*pages != NULL);
1047 88 : tmp_pages = spdk_realloc(*pages, SPDK_BS_PAGE_SIZE * (*page_count + 1), 0);
1048 88 : if (tmp_pages == NULL) {
1049 0 : return -ENOMEM;
1050 : }
1051 88 : (*page_count)++;
1052 88 : *pages = tmp_pages;
1053 : }
1054 :
1055 4370 : page = &(*pages)[*page_count - 1];
1056 4370 : memset(page, 0, sizeof(*page));
1057 4370 : page->id = blob->id;
1058 4370 : page->sequence_num = *page_count - 1;
1059 4370 : page->next = SPDK_INVALID_MD_PAGE;
1060 4370 : *last_page = page;
1061 :
1062 4370 : return 0;
1063 : }
1064 :
1065 : /* Transform the in-memory representation 'xattr' into an on-disk xattr descriptor.
1066 : * Update required_sz on both success and failure.
1067 : *
1068 : */
1069 : static int
1070 1795 : blob_serialize_xattr(const struct spdk_xattr *xattr,
1071 : uint8_t *buf, size_t buf_sz,
1072 : size_t *required_sz, bool internal)
1073 : {
1074 : struct spdk_blob_md_descriptor_xattr *desc;
1075 :
1076 1795 : *required_sz = sizeof(struct spdk_blob_md_descriptor_xattr) +
1077 1795 : strlen(xattr->name) +
1078 1795 : xattr->value_len;
1079 :
1080 1795 : if (buf_sz < *required_sz) {
1081 48 : return -1;
1082 : }
1083 :
1084 1747 : desc = (struct spdk_blob_md_descriptor_xattr *)buf;
1085 :
1086 1747 : desc->type = internal ? SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL : SPDK_MD_DESCRIPTOR_TYPE_XATTR;
1087 1747 : desc->length = sizeof(desc->name_length) +
1088 : sizeof(desc->value_length) +
1089 1747 : strlen(xattr->name) +
1090 1747 : xattr->value_len;
1091 1747 : desc->name_length = strlen(xattr->name);
1092 1747 : desc->value_length = xattr->value_len;
1093 :
1094 1747 : memcpy(desc->name, xattr->name, desc->name_length);
1095 1747 : memcpy((void *)((uintptr_t)desc->name + desc->name_length),
1096 1747 : xattr->value,
1097 1747 : desc->value_length);
1098 :
1099 1747 : return 0;
1100 : }
1101 :
1102 : static void
1103 1695 : blob_serialize_extent_table_entry(const struct spdk_blob *blob,
1104 : uint64_t start_ep, uint64_t *next_ep,
1105 : uint8_t **buf, size_t *remaining_sz)
1106 : {
1107 : struct spdk_blob_md_descriptor_extent_table *desc;
1108 : size_t cur_sz;
1109 : uint64_t i, et_idx;
1110 : uint32_t extent_page, ep_len;
1111 :
1112 : /* The buffer must have room for at least num_clusters entry */
1113 1695 : cur_sz = sizeof(struct spdk_blob_md_descriptor) + sizeof(desc->num_clusters);
1114 1695 : if (*remaining_sz < cur_sz) {
1115 20 : *next_ep = start_ep;
1116 20 : return;
1117 : }
1118 :
1119 1675 : desc = (struct spdk_blob_md_descriptor_extent_table *)*buf;
1120 1675 : desc->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE;
1121 :
1122 1675 : desc->num_clusters = blob->active.num_clusters;
1123 :
1124 1675 : ep_len = 1;
1125 1675 : et_idx = 0;
1126 4256 : for (i = start_ep; i < blob->active.num_extent_pages; i++) {
1127 2581 : if (*remaining_sz < cur_sz + sizeof(desc->extent_page[0])) {
1128 : /* If we ran out of buffer space, return */
1129 0 : break;
1130 : }
1131 :
1132 2581 : extent_page = blob->active.extent_pages[i];
1133 : /* Verify that next extent_page is unallocated */
1134 2581 : if (extent_page == 0 &&
1135 1528 : (i + 1 < blob->active.num_extent_pages && blob->active.extent_pages[i + 1] == 0)) {
1136 1078 : ep_len++;
1137 1078 : continue;
1138 : }
1139 1503 : desc->extent_page[et_idx].page_idx = extent_page;
1140 1503 : desc->extent_page[et_idx].num_pages = ep_len;
1141 1503 : et_idx++;
1142 :
1143 1503 : ep_len = 1;
1144 1503 : cur_sz += sizeof(desc->extent_page[et_idx]);
1145 : }
1146 1675 : *next_ep = i;
1147 :
1148 1675 : desc->length = sizeof(desc->num_clusters) + sizeof(desc->extent_page[0]) * et_idx;
1149 1675 : *remaining_sz -= sizeof(struct spdk_blob_md_descriptor) + desc->length;
1150 1675 : *buf += sizeof(struct spdk_blob_md_descriptor) + desc->length;
1151 : }
1152 :
1153 : static int
1154 1677 : blob_serialize_extent_table(const struct spdk_blob *blob,
1155 : struct spdk_blob_md_page **pages,
1156 : struct spdk_blob_md_page *cur_page,
1157 : uint32_t *page_count, uint8_t **buf,
1158 : size_t *remaining_sz)
1159 : {
1160 1677 : uint64_t last_extent_page;
1161 : int rc;
1162 :
1163 1677 : last_extent_page = 0;
1164 : /* At least single extent table entry has to be always persisted.
1165 : * Such case occurs with num_extent_pages == 0. */
1166 1695 : while (last_extent_page <= blob->active.num_extent_pages) {
1167 1695 : blob_serialize_extent_table_entry(blob, last_extent_page, &last_extent_page, buf,
1168 : remaining_sz);
1169 :
1170 1695 : if (last_extent_page == blob->active.num_extent_pages) {
1171 1677 : break;
1172 : }
1173 :
1174 18 : rc = blob_serialize_add_page(blob, pages, page_count, &cur_page);
1175 18 : if (rc < 0) {
1176 0 : return rc;
1177 : }
1178 :
1179 18 : *buf = (uint8_t *)cur_page->descriptors;
1180 18 : *remaining_sz = sizeof(cur_page->descriptors);
1181 : }
1182 :
1183 1677 : return 0;
1184 : }
1185 :
1186 : static void
1187 1747 : blob_serialize_extent_rle(const struct spdk_blob *blob,
1188 : uint64_t start_cluster, uint64_t *next_cluster,
1189 : uint8_t **buf, size_t *buf_sz)
1190 : {
1191 : struct spdk_blob_md_descriptor_extent_rle *desc_extent_rle;
1192 : size_t cur_sz;
1193 : uint64_t i, extent_idx;
1194 : uint64_t lba, lba_per_cluster, lba_count;
1195 :
1196 : /* The buffer must have room for at least one extent */
1197 1747 : cur_sz = sizeof(struct spdk_blob_md_descriptor) + sizeof(desc_extent_rle->extents[0]);
1198 1747 : if (*buf_sz < cur_sz) {
1199 18 : *next_cluster = start_cluster;
1200 18 : return;
1201 : }
1202 :
1203 1729 : desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)*buf;
1204 1729 : desc_extent_rle->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE;
1205 :
1206 1729 : lba_per_cluster = bs_cluster_to_lba(blob->bs, 1);
1207 : /* Assert for scan-build false positive */
1208 1729 : assert(lba_per_cluster > 0);
1209 :
1210 1729 : lba = blob->active.clusters[start_cluster];
1211 1729 : lba_count = lba_per_cluster;
1212 1729 : extent_idx = 0;
1213 810550 : for (i = start_cluster + 1; i < blob->active.num_clusters; i++) {
1214 808825 : if ((lba + lba_count) == blob->active.clusters[i] && lba != 0) {
1215 : /* Run-length encode sequential non-zero LBA */
1216 7276 : lba_count += lba_per_cluster;
1217 7276 : continue;
1218 801549 : } else if (lba == 0 && blob->active.clusters[i] == 0) {
1219 : /* Run-length encode unallocated clusters */
1220 800356 : lba_count += lba_per_cluster;
1221 800356 : continue;
1222 : }
1223 1193 : desc_extent_rle->extents[extent_idx].cluster_idx = lba / lba_per_cluster;
1224 1193 : desc_extent_rle->extents[extent_idx].length = lba_count / lba_per_cluster;
1225 1193 : extent_idx++;
1226 :
1227 1193 : cur_sz += sizeof(desc_extent_rle->extents[extent_idx]);
1228 :
1229 1193 : if (*buf_sz < cur_sz) {
1230 : /* If we ran out of buffer space, return */
1231 4 : *next_cluster = i;
1232 4 : break;
1233 : }
1234 :
1235 1189 : lba = blob->active.clusters[i];
1236 1189 : lba_count = lba_per_cluster;
1237 : }
1238 :
1239 1729 : if (*buf_sz >= cur_sz) {
1240 1725 : desc_extent_rle->extents[extent_idx].cluster_idx = lba / lba_per_cluster;
1241 1725 : desc_extent_rle->extents[extent_idx].length = lba_count / lba_per_cluster;
1242 1725 : extent_idx++;
1243 :
1244 1725 : *next_cluster = blob->active.num_clusters;
1245 : }
1246 :
1247 1729 : desc_extent_rle->length = sizeof(desc_extent_rle->extents[0]) * extent_idx;
1248 1729 : *buf_sz -= sizeof(struct spdk_blob_md_descriptor) + desc_extent_rle->length;
1249 1729 : *buf += sizeof(struct spdk_blob_md_descriptor) + desc_extent_rle->length;
1250 : }
1251 :
1252 : static int
1253 1939 : blob_serialize_extents_rle(const struct spdk_blob *blob,
1254 : struct spdk_blob_md_page **pages,
1255 : struct spdk_blob_md_page *cur_page,
1256 : uint32_t *page_count, uint8_t **buf,
1257 : size_t *remaining_sz)
1258 : {
1259 1939 : uint64_t last_cluster;
1260 : int rc;
1261 :
1262 1939 : last_cluster = 0;
1263 1961 : while (last_cluster < blob->active.num_clusters) {
1264 1747 : blob_serialize_extent_rle(blob, last_cluster, &last_cluster, buf, remaining_sz);
1265 :
1266 1747 : if (last_cluster == blob->active.num_clusters) {
1267 1725 : break;
1268 : }
1269 :
1270 22 : rc = blob_serialize_add_page(blob, pages, page_count, &cur_page);
1271 22 : if (rc < 0) {
1272 0 : return rc;
1273 : }
1274 :
1275 22 : *buf = (uint8_t *)cur_page->descriptors;
1276 22 : *remaining_sz = sizeof(cur_page->descriptors);
1277 : }
1278 :
1279 1939 : return 0;
1280 : }
1281 :
1282 : static void
1283 1100 : blob_serialize_extent_page(const struct spdk_blob *blob,
1284 : uint64_t cluster, struct spdk_blob_md_page *page)
1285 : {
1286 : struct spdk_blob_md_descriptor_extent_page *desc_extent;
1287 : uint64_t i, extent_idx;
1288 : uint64_t lba, lba_per_cluster;
1289 1100 : uint64_t start_cluster_idx = (cluster / SPDK_EXTENTS_PER_EP) * SPDK_EXTENTS_PER_EP;
1290 :
1291 1100 : desc_extent = (struct spdk_blob_md_descriptor_extent_page *) page->descriptors;
1292 1100 : desc_extent->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE;
1293 :
1294 1100 : lba_per_cluster = bs_cluster_to_lba(blob->bs, 1);
1295 :
1296 1100 : desc_extent->start_cluster_idx = start_cluster_idx;
1297 1100 : extent_idx = 0;
1298 42406 : for (i = start_cluster_idx; i < blob->active.num_clusters; i++) {
1299 41372 : lba = blob->active.clusters[i];
1300 41372 : desc_extent->cluster_idx[extent_idx++] = lba / lba_per_cluster;
1301 41372 : if (extent_idx >= SPDK_EXTENTS_PER_EP) {
1302 66 : break;
1303 : }
1304 : }
1305 1100 : desc_extent->length = sizeof(desc_extent->start_cluster_idx) +
1306 : sizeof(desc_extent->cluster_idx[0]) * extent_idx;
1307 1100 : }
1308 :
1309 : static void
1310 3616 : blob_serialize_flags(const struct spdk_blob *blob,
1311 : uint8_t *buf, size_t *buf_sz)
1312 : {
1313 : struct spdk_blob_md_descriptor_flags *desc;
1314 :
1315 : /*
1316 : * Flags get serialized first, so we should always have room for the flags
1317 : * descriptor.
1318 : */
1319 3616 : assert(*buf_sz >= sizeof(*desc));
1320 :
1321 3616 : desc = (struct spdk_blob_md_descriptor_flags *)buf;
1322 3616 : desc->type = SPDK_MD_DESCRIPTOR_TYPE_FLAGS;
1323 3616 : desc->length = sizeof(*desc) - sizeof(struct spdk_blob_md_descriptor);
1324 3616 : desc->invalid_flags = blob->invalid_flags;
1325 3616 : desc->data_ro_flags = blob->data_ro_flags;
1326 3616 : desc->md_ro_flags = blob->md_ro_flags;
1327 :
1328 3616 : *buf_sz -= sizeof(*desc);
1329 3616 : }
1330 :
1331 : static int
1332 7232 : blob_serialize_xattrs(const struct spdk_blob *blob,
1333 : const struct spdk_xattr_tailq *xattrs, bool internal,
1334 : struct spdk_blob_md_page **pages,
1335 : struct spdk_blob_md_page *cur_page,
1336 : uint32_t *page_count, uint8_t **buf,
1337 : size_t *remaining_sz)
1338 : {
1339 : const struct spdk_xattr *xattr;
1340 : int rc;
1341 :
1342 8979 : TAILQ_FOREACH(xattr, xattrs, link) {
1343 1747 : size_t required_sz = 0;
1344 :
1345 1747 : rc = blob_serialize_xattr(xattr,
1346 : *buf, *remaining_sz,
1347 : &required_sz, internal);
1348 1747 : if (rc < 0) {
1349 : /* Need to add a new page to the chain */
1350 48 : rc = blob_serialize_add_page(blob, pages, page_count,
1351 : &cur_page);
1352 48 : if (rc < 0) {
1353 0 : spdk_free(*pages);
1354 0 : *pages = NULL;
1355 0 : *page_count = 0;
1356 0 : return rc;
1357 : }
1358 :
1359 48 : *buf = (uint8_t *)cur_page->descriptors;
1360 48 : *remaining_sz = sizeof(cur_page->descriptors);
1361 :
1362 : /* Try again */
1363 48 : required_sz = 0;
1364 48 : rc = blob_serialize_xattr(xattr,
1365 : *buf, *remaining_sz,
1366 : &required_sz, internal);
1367 :
1368 48 : if (rc < 0) {
1369 0 : spdk_free(*pages);
1370 0 : *pages = NULL;
1371 0 : *page_count = 0;
1372 0 : return rc;
1373 : }
1374 : }
1375 :
1376 1747 : *remaining_sz -= required_sz;
1377 1747 : *buf += required_sz;
1378 : }
1379 :
1380 7232 : return 0;
1381 : }
1382 :
1383 : static int
1384 3616 : blob_serialize(const struct spdk_blob *blob, struct spdk_blob_md_page **pages,
1385 : uint32_t *page_count)
1386 : {
1387 3616 : struct spdk_blob_md_page *cur_page;
1388 : int rc;
1389 3616 : uint8_t *buf;
1390 3616 : size_t remaining_sz;
1391 :
1392 3616 : assert(pages != NULL);
1393 3616 : assert(page_count != NULL);
1394 3616 : assert(blob != NULL);
1395 3616 : assert(blob->state == SPDK_BLOB_STATE_DIRTY);
1396 :
1397 3616 : *pages = NULL;
1398 3616 : *page_count = 0;
1399 :
1400 : /* A blob always has at least 1 page, even if it has no descriptors */
1401 3616 : rc = blob_serialize_add_page(blob, pages, page_count, &cur_page);
1402 3616 : if (rc < 0) {
1403 0 : return rc;
1404 : }
1405 :
1406 3616 : buf = (uint8_t *)cur_page->descriptors;
1407 3616 : remaining_sz = sizeof(cur_page->descriptors);
1408 :
1409 : /* Serialize flags */
1410 3616 : blob_serialize_flags(blob, buf, &remaining_sz);
1411 3616 : buf += sizeof(struct spdk_blob_md_descriptor_flags);
1412 :
1413 : /* Serialize xattrs */
1414 3616 : rc = blob_serialize_xattrs(blob, &blob->xattrs, false,
1415 : pages, cur_page, page_count, &buf, &remaining_sz);
1416 3616 : if (rc < 0) {
1417 0 : return rc;
1418 : }
1419 :
1420 : /* Serialize internal xattrs */
1421 3616 : rc = blob_serialize_xattrs(blob, &blob->xattrs_internal, true,
1422 : pages, cur_page, page_count, &buf, &remaining_sz);
1423 3616 : if (rc < 0) {
1424 0 : return rc;
1425 : }
1426 :
1427 3616 : if (blob->use_extent_table) {
1428 : /* Serialize extent table */
1429 1677 : rc = blob_serialize_extent_table(blob, pages, cur_page, page_count, &buf, &remaining_sz);
1430 : } else {
1431 : /* Serialize extents */
1432 1939 : rc = blob_serialize_extents_rle(blob, pages, cur_page, page_count, &buf, &remaining_sz);
1433 : }
1434 :
1435 3616 : return rc;
1436 : }
1437 :
1438 : struct spdk_blob_load_ctx {
1439 : struct spdk_blob *blob;
1440 :
1441 : struct spdk_blob_md_page *pages;
1442 : uint32_t num_pages;
1443 : uint32_t next_extent_page;
1444 : spdk_bs_sequence_t *seq;
1445 :
1446 : spdk_bs_sequence_cpl cb_fn;
1447 : void *cb_arg;
1448 : };
1449 :
1450 : static uint32_t
1451 19958 : blob_md_page_calc_crc(void *page)
1452 : {
1453 : uint32_t crc;
1454 :
1455 19958 : crc = BLOB_CRC32C_INITIAL;
1456 19958 : crc = spdk_crc32c_update(page, SPDK_BS_PAGE_SIZE - 4, crc);
1457 19958 : crc ^= BLOB_CRC32C_INITIAL;
1458 :
1459 19958 : return crc;
1460 :
1461 : }
1462 :
1463 : static void
1464 3474 : blob_load_final(struct spdk_blob_load_ctx *ctx, int bserrno)
1465 : {
1466 3474 : struct spdk_blob *blob = ctx->blob;
1467 :
1468 3474 : if (bserrno == 0) {
1469 3410 : blob_mark_clean(blob);
1470 : }
1471 :
1472 3474 : ctx->cb_fn(ctx->seq, ctx->cb_arg, bserrno);
1473 :
1474 : /* Free the memory */
1475 3474 : spdk_free(ctx->pages);
1476 3474 : free(ctx);
1477 3474 : }
1478 :
1479 : static void
1480 454 : blob_load_snapshot_cpl(void *cb_arg, struct spdk_blob *snapshot, int bserrno)
1481 : {
1482 454 : struct spdk_blob_load_ctx *ctx = cb_arg;
1483 454 : struct spdk_blob *blob = ctx->blob;
1484 :
1485 454 : if (bserrno == 0) {
1486 448 : blob->back_bs_dev = bs_create_blob_bs_dev(snapshot);
1487 448 : if (blob->back_bs_dev == NULL) {
1488 0 : bserrno = -ENOMEM;
1489 : }
1490 : }
1491 454 : if (bserrno != 0) {
1492 6 : SPDK_ERRLOG("Snapshot fail\n");
1493 : }
1494 :
1495 454 : blob_load_final(ctx, bserrno);
1496 454 : }
1497 :
1498 : static void blob_update_clear_method(struct spdk_blob *blob);
1499 :
1500 : static int
1501 124 : blob_load_esnap(struct spdk_blob *blob, void *blob_ctx)
1502 : {
1503 124 : struct spdk_blob_store *bs = blob->bs;
1504 124 : struct spdk_bs_dev *bs_dev = NULL;
1505 124 : const void *esnap_id = NULL;
1506 124 : size_t id_len = 0;
1507 : int rc;
1508 :
1509 124 : if (bs->esnap_bs_dev_create == NULL) {
1510 8 : SPDK_NOTICELOG("blob 0x%" PRIx64 " is an esnap clone but the blobstore was opened "
1511 : "without support for esnap clones\n", blob->id);
1512 8 : return -ENOTSUP;
1513 : }
1514 116 : assert(blob->back_bs_dev == NULL);
1515 :
1516 116 : rc = blob_get_xattr_value(blob, BLOB_EXTERNAL_SNAPSHOT_ID, &esnap_id, &id_len, true);
1517 116 : if (rc != 0) {
1518 0 : SPDK_ERRLOG("blob 0x%" PRIx64 " is an esnap clone but has no esnap ID\n", blob->id);
1519 0 : return -EINVAL;
1520 : }
1521 116 : assert(id_len > 0 && id_len < UINT32_MAX);
1522 :
1523 116 : SPDK_INFOLOG(blob, "Creating external snapshot device\n");
1524 :
1525 116 : rc = bs->esnap_bs_dev_create(bs->esnap_ctx, blob_ctx, blob, esnap_id, (uint32_t)id_len,
1526 : &bs_dev);
1527 116 : if (rc != 0) {
1528 0 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": failed to load back_bs_dev "
1529 : "with error %d\n", blob->id, rc);
1530 0 : return rc;
1531 : }
1532 :
1533 : /*
1534 : * Note: bs_dev might be NULL if the consumer chose to not open the external snapshot.
1535 : * This especially might happen during spdk_bs_load() iteration.
1536 : */
1537 116 : if (bs_dev != NULL) {
1538 116 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": loaded back_bs_dev\n", blob->id);
1539 116 : if ((bs->io_unit_size % bs_dev->blocklen) != 0) {
1540 4 : SPDK_NOTICELOG("blob 0x%" PRIx64 " external snapshot device block size %u "
1541 : "is not compatible with blobstore block size %u\n",
1542 : blob->id, bs_dev->blocklen, bs->io_unit_size);
1543 4 : bs_dev->destroy(bs_dev);
1544 4 : return -EINVAL;
1545 : }
1546 : }
1547 :
1548 112 : blob->back_bs_dev = bs_dev;
1549 112 : blob->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
1550 :
1551 112 : return 0;
1552 : }
1553 :
1554 : static void
1555 3428 : blob_load_backing_dev(spdk_bs_sequence_t *seq, void *cb_arg)
1556 : {
1557 3428 : struct spdk_blob_load_ctx *ctx = cb_arg;
1558 3428 : struct spdk_blob *blob = ctx->blob;
1559 3428 : const void *value;
1560 3428 : size_t len;
1561 : int rc;
1562 :
1563 3428 : if (blob_is_esnap_clone(blob)) {
1564 124 : rc = blob_load_esnap(blob, seq->cpl.u.blob_handle.esnap_ctx);
1565 124 : blob_load_final(ctx, rc);
1566 124 : return;
1567 : }
1568 :
1569 3304 : if (spdk_blob_is_thin_provisioned(blob)) {
1570 1034 : rc = blob_get_xattr_value(blob, BLOB_SNAPSHOT, &value, &len, true);
1571 1034 : if (rc == 0) {
1572 454 : if (len != sizeof(spdk_blob_id)) {
1573 0 : blob_load_final(ctx, -EINVAL);
1574 0 : return;
1575 : }
1576 : /* open snapshot blob and continue in the callback function */
1577 454 : blob->parent_id = *(spdk_blob_id *)value;
1578 454 : spdk_bs_open_blob(blob->bs, blob->parent_id,
1579 : blob_load_snapshot_cpl, ctx);
1580 454 : return;
1581 : } else {
1582 : /* add zeroes_dev for thin provisioned blob */
1583 580 : blob->back_bs_dev = bs_create_zeroes_dev();
1584 : }
1585 : } else {
1586 : /* standard blob */
1587 2270 : blob->back_bs_dev = NULL;
1588 : }
1589 2850 : blob_load_final(ctx, 0);
1590 : }
1591 :
1592 : static void
1593 2820 : blob_load_cpl_extents_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
1594 : {
1595 2820 : struct spdk_blob_load_ctx *ctx = cb_arg;
1596 2820 : struct spdk_blob *blob = ctx->blob;
1597 : struct spdk_blob_md_page *page;
1598 : uint64_t i;
1599 : uint32_t crc;
1600 : uint64_t lba;
1601 : void *tmp;
1602 : uint64_t sz;
1603 :
1604 2820 : if (bserrno) {
1605 6 : SPDK_ERRLOG("Extent page read failed: %d\n", bserrno);
1606 6 : blob_load_final(ctx, bserrno);
1607 6 : return;
1608 : }
1609 :
1610 2814 : if (ctx->pages == NULL) {
1611 : /* First iteration of this function, allocate buffer for single EXTENT_PAGE */
1612 1768 : ctx->pages = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0,
1613 : NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
1614 1768 : if (!ctx->pages) {
1615 0 : blob_load_final(ctx, -ENOMEM);
1616 0 : return;
1617 : }
1618 1768 : ctx->num_pages = 1;
1619 1768 : ctx->next_extent_page = 0;
1620 : } else {
1621 1046 : page = &ctx->pages[0];
1622 1046 : crc = blob_md_page_calc_crc(page);
1623 1046 : if (crc != page->crc) {
1624 0 : blob_load_final(ctx, -EINVAL);
1625 0 : return;
1626 : }
1627 :
1628 1046 : if (page->next != SPDK_INVALID_MD_PAGE) {
1629 0 : blob_load_final(ctx, -EINVAL);
1630 0 : return;
1631 : }
1632 :
1633 1046 : bserrno = blob_parse_extent_page(page, blob);
1634 1046 : if (bserrno) {
1635 0 : blob_load_final(ctx, bserrno);
1636 0 : return;
1637 : }
1638 : }
1639 :
1640 3240 : for (i = ctx->next_extent_page; i < blob->active.num_extent_pages; i++) {
1641 1478 : if (blob->active.extent_pages[i] != 0) {
1642 : /* Extent page was allocated, read and parse it. */
1643 1052 : lba = bs_md_page_to_lba(blob->bs, blob->active.extent_pages[i]);
1644 1052 : ctx->next_extent_page = i + 1;
1645 :
1646 1052 : bs_sequence_read_dev(seq, &ctx->pages[0], lba,
1647 1052 : bs_byte_to_lba(blob->bs, SPDK_BS_PAGE_SIZE),
1648 : blob_load_cpl_extents_cpl, ctx);
1649 1052 : return;
1650 : } else {
1651 : /* Thin provisioned blobs can point to unallocated extent pages.
1652 : * In this case blob size should be increased by up to the amount left in remaining_clusters_in_et. */
1653 :
1654 426 : sz = spdk_min(blob->remaining_clusters_in_et, SPDK_EXTENTS_PER_EP);
1655 426 : blob->active.num_clusters += sz;
1656 426 : blob->remaining_clusters_in_et -= sz;
1657 :
1658 426 : assert(spdk_blob_is_thin_provisioned(blob));
1659 426 : assert(i + 1 < blob->active.num_extent_pages || blob->remaining_clusters_in_et == 0);
1660 :
1661 426 : tmp = realloc(blob->active.clusters, blob->active.num_clusters * sizeof(*blob->active.clusters));
1662 426 : if (tmp == NULL) {
1663 0 : blob_load_final(ctx, -ENOMEM);
1664 0 : return;
1665 : }
1666 426 : memset(tmp + sizeof(*blob->active.clusters) * blob->active.cluster_array_size, 0,
1667 426 : sizeof(*blob->active.clusters) * (blob->active.num_clusters - blob->active.cluster_array_size));
1668 426 : blob->active.clusters = tmp;
1669 426 : blob->active.cluster_array_size = blob->active.num_clusters;
1670 : }
1671 : }
1672 :
1673 1762 : blob_load_backing_dev(seq, ctx);
1674 : }
1675 :
1676 : static void
1677 3574 : blob_load_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
1678 : {
1679 3574 : struct spdk_blob_load_ctx *ctx = cb_arg;
1680 3574 : struct spdk_blob *blob = ctx->blob;
1681 : struct spdk_blob_md_page *page;
1682 : int rc;
1683 : uint32_t crc;
1684 : uint32_t current_page;
1685 :
1686 3574 : if (ctx->num_pages == 1) {
1687 3474 : current_page = bs_blobid_to_page(blob->id);
1688 : } else {
1689 100 : assert(ctx->num_pages != 0);
1690 100 : page = &ctx->pages[ctx->num_pages - 2];
1691 100 : current_page = page->next;
1692 : }
1693 :
1694 3574 : if (bserrno) {
1695 20 : SPDK_ERRLOG("Metadata page %d read failed for blobid 0x%" PRIx64 ": %d\n",
1696 : current_page, blob->id, bserrno);
1697 20 : blob_load_final(ctx, bserrno);
1698 20 : return;
1699 : }
1700 :
1701 3554 : page = &ctx->pages[ctx->num_pages - 1];
1702 3554 : crc = blob_md_page_calc_crc(page);
1703 3554 : if (crc != page->crc) {
1704 8 : SPDK_ERRLOG("Metadata page %d crc mismatch for blobid 0x%" PRIx64 "\n",
1705 : current_page, blob->id);
1706 8 : blob_load_final(ctx, -EINVAL);
1707 8 : return;
1708 : }
1709 :
1710 3546 : if (page->next != SPDK_INVALID_MD_PAGE) {
1711 : struct spdk_blob_md_page *tmp_pages;
1712 100 : uint32_t next_page = page->next;
1713 100 : uint64_t next_lba = bs_md_page_to_lba(blob->bs, next_page);
1714 :
1715 : /* Read the next page */
1716 100 : tmp_pages = spdk_realloc(ctx->pages, (sizeof(*page) * (ctx->num_pages + 1)), 0);
1717 100 : if (tmp_pages == NULL) {
1718 0 : blob_load_final(ctx, -ENOMEM);
1719 0 : return;
1720 : }
1721 100 : ctx->num_pages++;
1722 100 : ctx->pages = tmp_pages;
1723 :
1724 100 : bs_sequence_read_dev(seq, &ctx->pages[ctx->num_pages - 1],
1725 : next_lba,
1726 100 : bs_byte_to_lba(blob->bs, sizeof(*page)),
1727 : blob_load_cpl, ctx);
1728 100 : return;
1729 : }
1730 :
1731 : /* Parse the pages */
1732 3446 : rc = blob_parse(ctx->pages, ctx->num_pages, blob);
1733 3446 : if (rc) {
1734 12 : blob_load_final(ctx, rc);
1735 12 : return;
1736 : }
1737 :
1738 3434 : if (blob->extent_table_found == true) {
1739 : /* If EXTENT_TABLE was found, that means support for it should be enabled. */
1740 1768 : assert(blob->extent_rle_found == false);
1741 1768 : blob->use_extent_table = true;
1742 : } else {
1743 : /* If EXTENT_RLE or no extent_* descriptor was found disable support
1744 : * for extent table. No extent_* descriptors means that blob has length of 0
1745 : * and no extent_rle descriptors were persisted for it.
1746 : * EXTENT_TABLE if used, is always present in metadata regardless of length. */
1747 1666 : blob->use_extent_table = false;
1748 : }
1749 :
1750 : /* Check the clear_method stored in metadata vs what may have been passed
1751 : * via spdk_bs_open_blob_ext() and update accordingly.
1752 : */
1753 3434 : blob_update_clear_method(blob);
1754 :
1755 3434 : spdk_free(ctx->pages);
1756 3434 : ctx->pages = NULL;
1757 :
1758 3434 : if (blob->extent_table_found) {
1759 1768 : blob_load_cpl_extents_cpl(seq, ctx, 0);
1760 : } else {
1761 1666 : blob_load_backing_dev(seq, ctx);
1762 : }
1763 : }
1764 :
1765 : /* Load a blob from disk given a blobid */
1766 : static void
1767 3474 : blob_load(spdk_bs_sequence_t *seq, struct spdk_blob *blob,
1768 : spdk_bs_sequence_cpl cb_fn, void *cb_arg)
1769 : {
1770 : struct spdk_blob_load_ctx *ctx;
1771 : struct spdk_blob_store *bs;
1772 : uint32_t page_num;
1773 : uint64_t lba;
1774 :
1775 3474 : blob_verify_md_op(blob);
1776 :
1777 3474 : bs = blob->bs;
1778 :
1779 3474 : ctx = calloc(1, sizeof(*ctx));
1780 3474 : if (!ctx) {
1781 0 : cb_fn(seq, cb_arg, -ENOMEM);
1782 0 : return;
1783 : }
1784 :
1785 3474 : ctx->blob = blob;
1786 3474 : ctx->pages = spdk_realloc(ctx->pages, SPDK_BS_PAGE_SIZE, 0);
1787 3474 : if (!ctx->pages) {
1788 0 : free(ctx);
1789 0 : cb_fn(seq, cb_arg, -ENOMEM);
1790 0 : return;
1791 : }
1792 3474 : ctx->num_pages = 1;
1793 3474 : ctx->cb_fn = cb_fn;
1794 3474 : ctx->cb_arg = cb_arg;
1795 3474 : ctx->seq = seq;
1796 :
1797 3474 : page_num = bs_blobid_to_page(blob->id);
1798 3474 : lba = bs_md_page_to_lba(blob->bs, page_num);
1799 :
1800 3474 : blob->state = SPDK_BLOB_STATE_LOADING;
1801 :
1802 3474 : bs_sequence_read_dev(seq, &ctx->pages[0], lba,
1803 3474 : bs_byte_to_lba(bs, SPDK_BS_PAGE_SIZE),
1804 : blob_load_cpl, ctx);
1805 : }
1806 :
1807 : struct spdk_blob_persist_ctx {
1808 : struct spdk_blob *blob;
1809 :
1810 : struct spdk_blob_md_page *pages;
1811 : uint32_t next_extent_page;
1812 : struct spdk_blob_md_page *extent_page;
1813 :
1814 : spdk_bs_sequence_t *seq;
1815 : spdk_bs_sequence_cpl cb_fn;
1816 : void *cb_arg;
1817 : TAILQ_ENTRY(spdk_blob_persist_ctx) link;
1818 : };
1819 :
1820 : static void
1821 1262 : bs_batch_clear_dev(struct spdk_blob *blob, spdk_bs_batch_t *batch, uint64_t lba,
1822 : uint64_t lba_count)
1823 : {
1824 1262 : switch (blob->clear_method) {
1825 1262 : case BLOB_CLEAR_WITH_DEFAULT:
1826 : case BLOB_CLEAR_WITH_UNMAP:
1827 1262 : bs_batch_unmap_dev(batch, lba, lba_count);
1828 1262 : break;
1829 0 : case BLOB_CLEAR_WITH_WRITE_ZEROES:
1830 0 : bs_batch_write_zeroes_dev(batch, lba, lba_count);
1831 0 : break;
1832 0 : case BLOB_CLEAR_WITH_NONE:
1833 : default:
1834 0 : break;
1835 : }
1836 1262 : }
1837 :
1838 : static int
1839 1152 : bs_super_validate(struct spdk_bs_super_block *super, struct spdk_blob_store *bs)
1840 : {
1841 : uint32_t crc;
1842 : static const char zeros[SPDK_BLOBSTORE_TYPE_LENGTH];
1843 :
1844 1152 : if (super->version > SPDK_BS_VERSION ||
1845 1148 : super->version < SPDK_BS_INITIAL_VERSION) {
1846 8 : return -EILSEQ;
1847 : }
1848 :
1849 1144 : if (memcmp(super->signature, SPDK_BS_SUPER_BLOCK_SIG,
1850 : sizeof(super->signature)) != 0) {
1851 0 : return -EILSEQ;
1852 : }
1853 :
1854 1144 : crc = blob_md_page_calc_crc(super);
1855 1144 : if (crc != super->crc) {
1856 4 : return -EILSEQ;
1857 : }
1858 :
1859 1140 : if (memcmp(&bs->bstype, &super->bstype, SPDK_BLOBSTORE_TYPE_LENGTH) == 0) {
1860 1126 : SPDK_DEBUGLOG(blob, "Bstype matched - loading blobstore\n");
1861 14 : } else if (memcmp(&bs->bstype, zeros, SPDK_BLOBSTORE_TYPE_LENGTH) == 0) {
1862 6 : SPDK_DEBUGLOG(blob, "Bstype wildcard used - loading blobstore regardless bstype\n");
1863 : } else {
1864 8 : SPDK_DEBUGLOG(blob, "Unexpected bstype\n");
1865 8 : SPDK_LOGDUMP(blob, "Expected:", bs->bstype.bstype, SPDK_BLOBSTORE_TYPE_LENGTH);
1866 8 : SPDK_LOGDUMP(blob, "Found:", super->bstype.bstype, SPDK_BLOBSTORE_TYPE_LENGTH);
1867 8 : return -ENXIO;
1868 : }
1869 :
1870 1132 : if (super->size > bs->dev->blockcnt * bs->dev->blocklen) {
1871 8 : SPDK_NOTICELOG("Size mismatch, dev size: %" PRIu64 ", blobstore size: %" PRIu64 "\n",
1872 : bs->dev->blockcnt * bs->dev->blocklen, super->size);
1873 8 : return -EILSEQ;
1874 : }
1875 :
1876 1124 : return 0;
1877 : }
1878 :
1879 : static void bs_mark_dirty(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
1880 : spdk_bs_sequence_cpl cb_fn, void *cb_arg);
1881 :
1882 : static void
1883 5116 : blob_persist_complete_cb(void *arg)
1884 : {
1885 5116 : struct spdk_blob_persist_ctx *ctx = arg;
1886 :
1887 : /* Call user callback */
1888 5116 : ctx->cb_fn(ctx->seq, ctx->cb_arg, 0);
1889 :
1890 : /* Free the memory */
1891 5116 : spdk_free(ctx->pages);
1892 5116 : free(ctx);
1893 5116 : }
1894 :
1895 : static void blob_persist_start(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno);
1896 :
1897 : static void
1898 5116 : blob_persist_complete(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx, int bserrno)
1899 : {
1900 : struct spdk_blob_persist_ctx *next_persist, *tmp;
1901 5116 : struct spdk_blob *blob = ctx->blob;
1902 :
1903 5116 : if (bserrno == 0) {
1904 5064 : blob_mark_clean(blob);
1905 : }
1906 :
1907 5116 : assert(ctx == TAILQ_FIRST(&blob->persists_to_complete));
1908 :
1909 : /* Complete all persists that were pending when the current persist started */
1910 10232 : TAILQ_FOREACH_SAFE(next_persist, &blob->persists_to_complete, link, tmp) {
1911 5116 : TAILQ_REMOVE(&blob->persists_to_complete, next_persist, link);
1912 5116 : spdk_thread_send_msg(spdk_get_thread(), blob_persist_complete_cb, next_persist);
1913 : }
1914 :
1915 5116 : if (TAILQ_EMPTY(&blob->pending_persists)) {
1916 5093 : return;
1917 : }
1918 :
1919 : /* Queue up all pending persists for completion and start blob persist with first one */
1920 23 : TAILQ_SWAP(&blob->persists_to_complete, &blob->pending_persists, spdk_blob_persist_ctx, link);
1921 23 : next_persist = TAILQ_FIRST(&blob->persists_to_complete);
1922 :
1923 23 : blob->state = SPDK_BLOB_STATE_DIRTY;
1924 23 : bs_mark_dirty(seq, blob->bs, blob_persist_start, next_persist);
1925 : }
1926 :
1927 : static void
1928 5064 : blob_persist_clear_extents_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
1929 : {
1930 5064 : struct spdk_blob_persist_ctx *ctx = cb_arg;
1931 5064 : struct spdk_blob *blob = ctx->blob;
1932 5064 : struct spdk_blob_store *bs = blob->bs;
1933 : size_t i;
1934 :
1935 5064 : if (bserrno != 0) {
1936 0 : blob_persist_complete(seq, ctx, bserrno);
1937 0 : return;
1938 : }
1939 :
1940 5064 : spdk_spin_lock(&bs->used_lock);
1941 :
1942 : /* Release all extent_pages that were truncated */
1943 6800 : for (i = blob->active.num_extent_pages; i < blob->active.extent_pages_array_size; i++) {
1944 : /* Nothing to release if it was not allocated */
1945 1736 : if (blob->active.extent_pages[i] != 0) {
1946 626 : bs_release_md_page(bs, blob->active.extent_pages[i]);
1947 : }
1948 : }
1949 :
1950 5064 : spdk_spin_unlock(&bs->used_lock);
1951 :
1952 5064 : if (blob->active.num_extent_pages == 0) {
1953 3651 : free(blob->active.extent_pages);
1954 3651 : blob->active.extent_pages = NULL;
1955 3651 : blob->active.extent_pages_array_size = 0;
1956 1413 : } else if (blob->active.num_extent_pages != blob->active.extent_pages_array_size) {
1957 : #ifndef __clang_analyzer__
1958 : void *tmp;
1959 :
1960 : /* scan-build really can't figure reallocs, workaround it */
1961 2 : tmp = realloc(blob->active.extent_pages, sizeof(uint32_t) * blob->active.num_extent_pages);
1962 2 : assert(tmp != NULL);
1963 2 : blob->active.extent_pages = tmp;
1964 : #endif
1965 2 : blob->active.extent_pages_array_size = blob->active.num_extent_pages;
1966 : }
1967 :
1968 5064 : blob_persist_complete(seq, ctx, bserrno);
1969 : }
1970 :
1971 : static void
1972 5064 : blob_persist_clear_extents(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx)
1973 : {
1974 5064 : struct spdk_blob *blob = ctx->blob;
1975 5064 : struct spdk_blob_store *bs = blob->bs;
1976 : size_t i;
1977 : uint64_t lba;
1978 : uint64_t lba_count;
1979 : spdk_bs_batch_t *batch;
1980 :
1981 5064 : batch = bs_sequence_to_batch(seq, blob_persist_clear_extents_cpl, ctx);
1982 5064 : lba_count = bs_byte_to_lba(bs, SPDK_BS_PAGE_SIZE);
1983 :
1984 : /* Clear all extent_pages that were truncated */
1985 6800 : for (i = blob->active.num_extent_pages; i < blob->active.extent_pages_array_size; i++) {
1986 : /* Nothing to clear if it was not allocated */
1987 1736 : if (blob->active.extent_pages[i] != 0) {
1988 626 : lba = bs_md_page_to_lba(bs, blob->active.extent_pages[i]);
1989 626 : bs_batch_write_zeroes_dev(batch, lba, lba_count);
1990 : }
1991 : }
1992 :
1993 5064 : bs_batch_close(batch);
1994 5064 : }
1995 :
1996 : static void
1997 5064 : blob_persist_clear_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
1998 : {
1999 5064 : struct spdk_blob_persist_ctx *ctx = cb_arg;
2000 5064 : struct spdk_blob *blob = ctx->blob;
2001 5064 : struct spdk_blob_store *bs = blob->bs;
2002 : size_t i;
2003 :
2004 5064 : if (bserrno != 0) {
2005 0 : blob_persist_complete(seq, ctx, bserrno);
2006 0 : return;
2007 : }
2008 :
2009 5064 : spdk_spin_lock(&bs->used_lock);
2010 : /* Release all clusters that were truncated */
2011 1074111 : for (i = blob->active.num_clusters; i < blob->active.cluster_array_size; i++) {
2012 1069047 : uint32_t cluster_num = bs_lba_to_cluster(bs, blob->active.clusters[i]);
2013 :
2014 : /* Nothing to release if it was not allocated */
2015 1069047 : if (blob->active.clusters[i] != 0) {
2016 2343 : bs_release_cluster(bs, cluster_num);
2017 : }
2018 : }
2019 5064 : spdk_spin_unlock(&bs->used_lock);
2020 :
2021 5064 : if (blob->active.num_clusters == 0) {
2022 1944 : free(blob->active.clusters);
2023 1944 : blob->active.clusters = NULL;
2024 1944 : blob->active.cluster_array_size = 0;
2025 3120 : } else if (blob->active.num_clusters != blob->active.cluster_array_size) {
2026 : #ifndef __clang_analyzer__
2027 : void *tmp;
2028 :
2029 : /* scan-build really can't figure reallocs, workaround it */
2030 14 : tmp = realloc(blob->active.clusters, sizeof(*blob->active.clusters) * blob->active.num_clusters);
2031 14 : assert(tmp != NULL);
2032 14 : blob->active.clusters = tmp;
2033 :
2034 : #endif
2035 14 : blob->active.cluster_array_size = blob->active.num_clusters;
2036 : }
2037 :
2038 : /* Move on to clearing extent pages */
2039 5064 : blob_persist_clear_extents(seq, ctx);
2040 : }
2041 :
2042 : static void
2043 5064 : blob_persist_clear_clusters(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx)
2044 : {
2045 5064 : struct spdk_blob *blob = ctx->blob;
2046 5064 : struct spdk_blob_store *bs = blob->bs;
2047 : spdk_bs_batch_t *batch;
2048 : size_t i;
2049 : uint64_t lba;
2050 : uint64_t lba_count;
2051 :
2052 : /* Clusters don't move around in blobs. The list shrinks or grows
2053 : * at the end, but no changes ever occur in the middle of the list.
2054 : */
2055 :
2056 5064 : batch = bs_sequence_to_batch(seq, blob_persist_clear_clusters_cpl, ctx);
2057 :
2058 : /* Clear all clusters that were truncated */
2059 5064 : lba = 0;
2060 5064 : lba_count = 0;
2061 1074111 : for (i = blob->active.num_clusters; i < blob->active.cluster_array_size; i++) {
2062 1069047 : uint64_t next_lba = blob->active.clusters[i];
2063 1069047 : uint64_t next_lba_count = bs_cluster_to_lba(bs, 1);
2064 :
2065 1069047 : if (next_lba > 0 && (lba + lba_count) == next_lba) {
2066 : /* This cluster is contiguous with the previous one. */
2067 1085 : lba_count += next_lba_count;
2068 1085 : continue;
2069 1067962 : } else if (next_lba == 0) {
2070 1066704 : continue;
2071 : }
2072 :
2073 : /* This cluster is not contiguous with the previous one. */
2074 :
2075 : /* If a run of LBAs previously existing, clear them now */
2076 1258 : if (lba_count > 0) {
2077 36 : bs_batch_clear_dev(ctx->blob, batch, lba, lba_count);
2078 : }
2079 :
2080 : /* Start building the next batch */
2081 1258 : lba = next_lba;
2082 1258 : if (next_lba > 0) {
2083 1258 : lba_count = next_lba_count;
2084 : } else {
2085 0 : lba_count = 0;
2086 : }
2087 : }
2088 :
2089 : /* If we ended with a contiguous set of LBAs, clear them now */
2090 5064 : if (lba_count > 0) {
2091 1222 : bs_batch_clear_dev(ctx->blob, batch, lba, lba_count);
2092 : }
2093 :
2094 5064 : bs_batch_close(batch);
2095 5064 : }
2096 :
2097 : static void
2098 5068 : blob_persist_zero_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
2099 : {
2100 5068 : struct spdk_blob_persist_ctx *ctx = cb_arg;
2101 5068 : struct spdk_blob *blob = ctx->blob;
2102 5068 : struct spdk_blob_store *bs = blob->bs;
2103 : size_t i;
2104 :
2105 5068 : if (bserrno != 0) {
2106 4 : blob_persist_complete(seq, ctx, bserrno);
2107 4 : return;
2108 : }
2109 :
2110 5064 : spdk_spin_lock(&bs->used_lock);
2111 :
2112 : /* This loop starts at 1 because the first page is special and handled
2113 : * below. The pages (except the first) are never written in place,
2114 : * so any pages in the clean list must be zeroed.
2115 : */
2116 5132 : for (i = 1; i < blob->clean.num_pages; i++) {
2117 68 : bs_release_md_page(bs, blob->clean.pages[i]);
2118 : }
2119 :
2120 5064 : if (blob->active.num_pages == 0) {
2121 : uint32_t page_num;
2122 :
2123 1488 : page_num = bs_blobid_to_page(blob->id);
2124 1488 : bs_release_md_page(bs, page_num);
2125 : }
2126 :
2127 5064 : spdk_spin_unlock(&bs->used_lock);
2128 :
2129 : /* Move on to clearing clusters */
2130 5064 : blob_persist_clear_clusters(seq, ctx);
2131 : }
2132 :
2133 : static void
2134 5108 : blob_persist_zero_pages(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
2135 : {
2136 5108 : struct spdk_blob_persist_ctx *ctx = cb_arg;
2137 5108 : struct spdk_blob *blob = ctx->blob;
2138 5108 : struct spdk_blob_store *bs = blob->bs;
2139 : uint64_t lba;
2140 : uint64_t lba_count;
2141 : spdk_bs_batch_t *batch;
2142 : size_t i;
2143 :
2144 5108 : if (bserrno != 0) {
2145 40 : blob_persist_complete(seq, ctx, bserrno);
2146 40 : return;
2147 : }
2148 :
2149 5068 : batch = bs_sequence_to_batch(seq, blob_persist_zero_pages_cpl, ctx);
2150 :
2151 5068 : lba_count = bs_byte_to_lba(bs, SPDK_BS_PAGE_SIZE);
2152 :
2153 : /* This loop starts at 1 because the first page is special and handled
2154 : * below. The pages (except the first) are never written in place,
2155 : * so any pages in the clean list must be zeroed.
2156 : */
2157 5136 : for (i = 1; i < blob->clean.num_pages; i++) {
2158 68 : lba = bs_md_page_to_lba(bs, blob->clean.pages[i]);
2159 :
2160 68 : bs_batch_write_zeroes_dev(batch, lba, lba_count);
2161 : }
2162 :
2163 : /* The first page will only be zeroed if this is a delete. */
2164 5068 : if (blob->active.num_pages == 0) {
2165 : uint32_t page_num;
2166 :
2167 : /* The first page in the metadata goes where the blobid indicates */
2168 1492 : page_num = bs_blobid_to_page(blob->id);
2169 1492 : lba = bs_md_page_to_lba(bs, page_num);
2170 :
2171 1492 : bs_batch_write_zeroes_dev(batch, lba, lba_count);
2172 : }
2173 :
2174 5068 : bs_batch_close(batch);
2175 : }
2176 :
2177 : static void
2178 3616 : blob_persist_write_page_root(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
2179 : {
2180 3616 : struct spdk_blob_persist_ctx *ctx = cb_arg;
2181 3616 : struct spdk_blob *blob = ctx->blob;
2182 3616 : struct spdk_blob_store *bs = blob->bs;
2183 : uint64_t lba;
2184 : uint32_t lba_count;
2185 : struct spdk_blob_md_page *page;
2186 :
2187 3616 : if (bserrno != 0) {
2188 0 : blob_persist_complete(seq, ctx, bserrno);
2189 0 : return;
2190 : }
2191 :
2192 3616 : if (blob->active.num_pages == 0) {
2193 : /* Move on to the next step */
2194 0 : blob_persist_zero_pages(seq, ctx, 0);
2195 0 : return;
2196 : }
2197 :
2198 3616 : lba_count = bs_byte_to_lba(bs, sizeof(*page));
2199 :
2200 3616 : page = &ctx->pages[0];
2201 : /* The first page in the metadata goes where the blobid indicates */
2202 3616 : lba = bs_md_page_to_lba(bs, bs_blobid_to_page(blob->id));
2203 :
2204 3616 : bs_sequence_write_dev(seq, page, lba, lba_count,
2205 : blob_persist_zero_pages, ctx);
2206 : }
2207 :
2208 : static void
2209 3616 : blob_persist_write_page_chain(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx)
2210 : {
2211 3616 : struct spdk_blob *blob = ctx->blob;
2212 3616 : struct spdk_blob_store *bs = blob->bs;
2213 : uint64_t lba;
2214 : uint32_t lba_count;
2215 : struct spdk_blob_md_page *page;
2216 : spdk_bs_batch_t *batch;
2217 : size_t i;
2218 :
2219 : /* Clusters don't move around in blobs. The list shrinks or grows
2220 : * at the end, but no changes ever occur in the middle of the list.
2221 : */
2222 :
2223 3616 : lba_count = bs_byte_to_lba(bs, sizeof(*page));
2224 :
2225 3616 : batch = bs_sequence_to_batch(seq, blob_persist_write_page_root, ctx);
2226 :
2227 : /* This starts at 1. The root page is not written until
2228 : * all of the others are finished
2229 : */
2230 3704 : for (i = 1; i < blob->active.num_pages; i++) {
2231 88 : page = &ctx->pages[i];
2232 88 : assert(page->sequence_num == i);
2233 :
2234 88 : lba = bs_md_page_to_lba(bs, blob->active.pages[i]);
2235 :
2236 88 : bs_batch_write_dev(batch, page, lba, lba_count);
2237 : }
2238 :
2239 3616 : bs_batch_close(batch);
2240 3616 : }
2241 :
2242 : static int
2243 3576 : blob_resize(struct spdk_blob *blob, uint64_t sz)
2244 : {
2245 : uint64_t i;
2246 : uint64_t *tmp;
2247 3576 : uint64_t cluster;
2248 3576 : uint32_t lfmd; /* lowest free md page */
2249 : uint64_t num_clusters;
2250 : uint32_t *ep_tmp;
2251 3576 : uint64_t new_num_ep = 0, current_num_ep = 0;
2252 : struct spdk_blob_store *bs;
2253 : int rc;
2254 :
2255 3576 : bs = blob->bs;
2256 :
2257 3576 : blob_verify_md_op(blob);
2258 :
2259 3576 : if (blob->active.num_clusters == sz) {
2260 456 : return 0;
2261 : }
2262 :
2263 3120 : if (blob->active.num_clusters < blob->active.cluster_array_size) {
2264 : /* If this blob was resized to be larger, then smaller, then
2265 : * larger without syncing, then the cluster array already
2266 : * contains spare assigned clusters we can use.
2267 : */
2268 0 : num_clusters = spdk_min(blob->active.cluster_array_size,
2269 : sz);
2270 : } else {
2271 3120 : num_clusters = blob->active.num_clusters;
2272 : }
2273 :
2274 3120 : if (blob->use_extent_table) {
2275 : /* Round up since every cluster beyond current Extent Table size,
2276 : * requires new extent page. */
2277 1582 : new_num_ep = spdk_divide_round_up(sz, SPDK_EXTENTS_PER_EP);
2278 1582 : current_num_ep = spdk_divide_round_up(num_clusters, SPDK_EXTENTS_PER_EP);
2279 : }
2280 :
2281 3120 : assert(!spdk_spin_held(&bs->used_lock));
2282 :
2283 : /* Check first that we have enough clusters and md pages before we start claiming them.
2284 : * bs->used_lock is held to ensure that clusters we think are free are still free when we go
2285 : * to claim them later in this function.
2286 : */
2287 3120 : if (sz > num_clusters && spdk_blob_is_thin_provisioned(blob) == false) {
2288 1302 : spdk_spin_lock(&bs->used_lock);
2289 1302 : if ((sz - num_clusters) > bs->num_free_clusters) {
2290 8 : rc = -ENOSPC;
2291 8 : goto out;
2292 : }
2293 1294 : lfmd = 0;
2294 1938 : for (i = current_num_ep; i < new_num_ep ; i++) {
2295 644 : lfmd = spdk_bit_array_find_first_clear(blob->bs->used_md_pages, lfmd);
2296 644 : if (lfmd == UINT32_MAX) {
2297 : /* No more free md pages. Cannot satisfy the request */
2298 0 : rc = -ENOSPC;
2299 0 : goto out;
2300 : }
2301 : }
2302 : }
2303 :
2304 3112 : if (sz > num_clusters) {
2305 : /* Expand the cluster array if necessary.
2306 : * We only shrink the array when persisting.
2307 : */
2308 1706 : tmp = realloc(blob->active.clusters, sizeof(*blob->active.clusters) * sz);
2309 1706 : if (sz > 0 && tmp == NULL) {
2310 0 : rc = -ENOMEM;
2311 0 : goto out;
2312 : }
2313 1706 : memset(tmp + blob->active.cluster_array_size, 0,
2314 1706 : sizeof(*blob->active.clusters) * (sz - blob->active.cluster_array_size));
2315 1706 : blob->active.clusters = tmp;
2316 1706 : blob->active.cluster_array_size = sz;
2317 :
2318 : /* Expand the extents table, only if enough clusters were added */
2319 1706 : if (new_num_ep > current_num_ep && blob->use_extent_table) {
2320 842 : ep_tmp = realloc(blob->active.extent_pages, sizeof(*blob->active.extent_pages) * new_num_ep);
2321 842 : if (new_num_ep > 0 && ep_tmp == NULL) {
2322 0 : rc = -ENOMEM;
2323 0 : goto out;
2324 : }
2325 842 : memset(ep_tmp + blob->active.extent_pages_array_size, 0,
2326 842 : sizeof(*blob->active.extent_pages) * (new_num_ep - blob->active.extent_pages_array_size));
2327 842 : blob->active.extent_pages = ep_tmp;
2328 842 : blob->active.extent_pages_array_size = new_num_ep;
2329 : }
2330 : }
2331 :
2332 3112 : blob->state = SPDK_BLOB_STATE_DIRTY;
2333 :
2334 3112 : if (spdk_blob_is_thin_provisioned(blob) == false) {
2335 2428 : cluster = 0;
2336 2428 : lfmd = 0;
2337 9832 : for (i = num_clusters; i < sz; i++) {
2338 7404 : bs_allocate_cluster(blob, i, &cluster, &lfmd, true);
2339 : /* Do not increment lfmd here. lfmd will get updated
2340 : * to the md_page allocated (if any) when a new extent
2341 : * page is needed. Just pass that value again,
2342 : * bs_allocate_cluster will just start at that index
2343 : * to find the next free md_page when needed.
2344 : */
2345 : }
2346 : }
2347 :
2348 : /* If we are shrinking the blob, we must adjust num_allocated_clusters */
2349 1072199 : for (i = sz; i < num_clusters; i++) {
2350 1069087 : if (blob->active.clusters[i] != 0) {
2351 2343 : blob->active.num_allocated_clusters--;
2352 : }
2353 : }
2354 :
2355 3112 : blob->active.num_clusters = sz;
2356 3112 : blob->active.num_extent_pages = new_num_ep;
2357 :
2358 3112 : rc = 0;
2359 3120 : out:
2360 3120 : if (spdk_spin_held(&bs->used_lock)) {
2361 1302 : spdk_spin_unlock(&bs->used_lock);
2362 : }
2363 :
2364 3120 : return rc;
2365 : }
2366 :
2367 : static void
2368 3616 : blob_persist_generate_new_md(struct spdk_blob_persist_ctx *ctx)
2369 : {
2370 3616 : spdk_bs_sequence_t *seq = ctx->seq;
2371 3616 : struct spdk_blob *blob = ctx->blob;
2372 3616 : struct spdk_blob_store *bs = blob->bs;
2373 : uint64_t i;
2374 : uint32_t page_num;
2375 : void *tmp;
2376 : int rc;
2377 :
2378 : /* Generate the new metadata */
2379 3616 : rc = blob_serialize(blob, &ctx->pages, &blob->active.num_pages);
2380 3616 : if (rc < 0) {
2381 0 : blob_persist_complete(seq, ctx, rc);
2382 0 : return;
2383 : }
2384 :
2385 3616 : assert(blob->active.num_pages >= 1);
2386 :
2387 : /* Resize the cache of page indices */
2388 3616 : tmp = realloc(blob->active.pages, blob->active.num_pages * sizeof(*blob->active.pages));
2389 3616 : if (!tmp) {
2390 0 : blob_persist_complete(seq, ctx, -ENOMEM);
2391 0 : return;
2392 : }
2393 3616 : blob->active.pages = tmp;
2394 :
2395 : /* Assign this metadata to pages. This requires two passes - one to verify that there are
2396 : * enough pages and a second to actually claim them. The used_lock is held across
2397 : * both passes to ensure things don't change in the middle.
2398 : */
2399 3616 : spdk_spin_lock(&bs->used_lock);
2400 3616 : page_num = 0;
2401 : /* Note that this loop starts at one. The first page location is fixed by the blobid. */
2402 3704 : for (i = 1; i < blob->active.num_pages; i++) {
2403 88 : page_num = spdk_bit_array_find_first_clear(bs->used_md_pages, page_num);
2404 88 : if (page_num == UINT32_MAX) {
2405 0 : spdk_spin_unlock(&bs->used_lock);
2406 0 : blob_persist_complete(seq, ctx, -ENOMEM);
2407 0 : return;
2408 : }
2409 88 : page_num++;
2410 : }
2411 :
2412 3616 : page_num = 0;
2413 3616 : blob->active.pages[0] = bs_blobid_to_page(blob->id);
2414 3704 : for (i = 1; i < blob->active.num_pages; i++) {
2415 88 : page_num = spdk_bit_array_find_first_clear(bs->used_md_pages, page_num);
2416 88 : ctx->pages[i - 1].next = page_num;
2417 : /* Now that previous metadata page is complete, calculate the crc for it. */
2418 88 : ctx->pages[i - 1].crc = blob_md_page_calc_crc(&ctx->pages[i - 1]);
2419 88 : blob->active.pages[i] = page_num;
2420 88 : bs_claim_md_page(bs, page_num);
2421 88 : SPDK_DEBUGLOG(blob, "Claiming page %u for blob 0x%" PRIx64 "\n", page_num,
2422 : blob->id);
2423 88 : page_num++;
2424 : }
2425 3616 : spdk_spin_unlock(&bs->used_lock);
2426 3616 : ctx->pages[i - 1].crc = blob_md_page_calc_crc(&ctx->pages[i - 1]);
2427 : /* Start writing the metadata from last page to first */
2428 3616 : blob->state = SPDK_BLOB_STATE_CLEAN;
2429 3616 : blob_persist_write_page_chain(seq, ctx);
2430 : }
2431 :
2432 : static void
2433 2358 : blob_persist_write_extent_pages(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
2434 : {
2435 2358 : struct spdk_blob_persist_ctx *ctx = cb_arg;
2436 2358 : struct spdk_blob *blob = ctx->blob;
2437 : size_t i;
2438 : uint32_t extent_page_id;
2439 2358 : uint32_t page_count = 0;
2440 : int rc;
2441 :
2442 2358 : if (ctx->extent_page != NULL) {
2443 666 : spdk_free(ctx->extent_page);
2444 666 : ctx->extent_page = NULL;
2445 : }
2446 :
2447 2358 : if (bserrno != 0) {
2448 0 : blob_persist_complete(seq, ctx, bserrno);
2449 0 : return;
2450 : }
2451 :
2452 : /* Only write out Extent Pages when blob was resized. */
2453 4614 : for (i = ctx->next_extent_page; i < blob->active.extent_pages_array_size; i++) {
2454 2922 : extent_page_id = blob->active.extent_pages[i];
2455 2922 : if (extent_page_id == 0) {
2456 : /* No Extent Page to persist */
2457 2256 : assert(spdk_blob_is_thin_provisioned(blob));
2458 2256 : continue;
2459 : }
2460 666 : assert(spdk_bit_array_get(blob->bs->used_md_pages, extent_page_id));
2461 666 : ctx->next_extent_page = i + 1;
2462 666 : rc = blob_serialize_add_page(ctx->blob, &ctx->extent_page, &page_count, &ctx->extent_page);
2463 666 : if (rc < 0) {
2464 0 : blob_persist_complete(seq, ctx, rc);
2465 0 : return;
2466 : }
2467 :
2468 666 : blob->state = SPDK_BLOB_STATE_DIRTY;
2469 666 : blob_serialize_extent_page(blob, i * SPDK_EXTENTS_PER_EP, ctx->extent_page);
2470 :
2471 666 : ctx->extent_page->crc = blob_md_page_calc_crc(ctx->extent_page);
2472 :
2473 666 : bs_sequence_write_dev(seq, ctx->extent_page, bs_md_page_to_lba(blob->bs, extent_page_id),
2474 666 : bs_byte_to_lba(blob->bs, SPDK_BS_PAGE_SIZE),
2475 : blob_persist_write_extent_pages, ctx);
2476 666 : return;
2477 : }
2478 :
2479 1692 : blob_persist_generate_new_md(ctx);
2480 : }
2481 :
2482 : static void
2483 5116 : blob_persist_start(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
2484 : {
2485 5116 : struct spdk_blob_persist_ctx *ctx = cb_arg;
2486 5116 : struct spdk_blob *blob = ctx->blob;
2487 :
2488 5116 : if (bserrno != 0) {
2489 8 : blob_persist_complete(seq, ctx, bserrno);
2490 8 : return;
2491 : }
2492 :
2493 5108 : if (blob->active.num_pages == 0) {
2494 : /* This is the signal that the blob should be deleted.
2495 : * Immediately jump to the clean up routine. */
2496 1492 : assert(blob->clean.num_pages > 0);
2497 1492 : blob->state = SPDK_BLOB_STATE_CLEAN;
2498 1492 : blob_persist_zero_pages(seq, ctx, 0);
2499 1492 : return;
2500 :
2501 : }
2502 :
2503 3616 : if (blob->clean.num_clusters < blob->active.num_clusters) {
2504 : /* Blob was resized up */
2505 1678 : assert(blob->clean.num_extent_pages <= blob->active.num_extent_pages);
2506 1678 : ctx->next_extent_page = spdk_max(1, blob->clean.num_extent_pages) - 1;
2507 1938 : } else if (blob->active.num_clusters < blob->active.cluster_array_size) {
2508 : /* Blob was resized down */
2509 14 : assert(blob->clean.num_extent_pages >= blob->active.num_extent_pages);
2510 14 : ctx->next_extent_page = spdk_max(1, blob->active.num_extent_pages) - 1;
2511 : } else {
2512 : /* No change in size occurred */
2513 1924 : blob_persist_generate_new_md(ctx);
2514 1924 : return;
2515 : }
2516 :
2517 1692 : blob_persist_write_extent_pages(seq, ctx, 0);
2518 : }
2519 :
2520 : struct spdk_bs_mark_dirty {
2521 : struct spdk_blob_store *bs;
2522 : struct spdk_bs_super_block *super;
2523 : spdk_bs_sequence_cpl cb_fn;
2524 : void *cb_arg;
2525 : };
2526 :
2527 : static void
2528 158 : bs_mark_dirty_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
2529 : {
2530 158 : struct spdk_bs_mark_dirty *ctx = cb_arg;
2531 :
2532 158 : if (bserrno == 0) {
2533 150 : ctx->bs->clean = 0;
2534 : }
2535 :
2536 158 : ctx->cb_fn(seq, ctx->cb_arg, bserrno);
2537 :
2538 158 : spdk_free(ctx->super);
2539 158 : free(ctx);
2540 158 : }
2541 :
2542 : static void bs_write_super(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
2543 : struct spdk_bs_super_block *super, spdk_bs_sequence_cpl cb_fn, void *cb_arg);
2544 :
2545 :
2546 : static void
2547 158 : bs_mark_dirty_write(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
2548 : {
2549 158 : struct spdk_bs_mark_dirty *ctx = cb_arg;
2550 : int rc;
2551 :
2552 158 : if (bserrno != 0) {
2553 4 : bs_mark_dirty_write_cpl(seq, ctx, bserrno);
2554 4 : return;
2555 : }
2556 :
2557 154 : rc = bs_super_validate(ctx->super, ctx->bs);
2558 154 : if (rc != 0) {
2559 0 : bs_mark_dirty_write_cpl(seq, ctx, rc);
2560 0 : return;
2561 : }
2562 :
2563 154 : ctx->super->clean = 0;
2564 154 : if (ctx->super->size == 0) {
2565 4 : ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
2566 : }
2567 :
2568 154 : bs_write_super(seq, ctx->bs, ctx->super, bs_mark_dirty_write_cpl, ctx);
2569 : }
2570 :
2571 : static void
2572 5550 : bs_mark_dirty(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
2573 : spdk_bs_sequence_cpl cb_fn, void *cb_arg)
2574 : {
2575 : struct spdk_bs_mark_dirty *ctx;
2576 :
2577 : /* Blobstore is already marked dirty */
2578 5550 : if (bs->clean == 0) {
2579 5392 : cb_fn(seq, cb_arg, 0);
2580 5392 : return;
2581 : }
2582 :
2583 158 : ctx = calloc(1, sizeof(*ctx));
2584 158 : if (!ctx) {
2585 0 : cb_fn(seq, cb_arg, -ENOMEM);
2586 0 : return;
2587 : }
2588 158 : ctx->bs = bs;
2589 158 : ctx->cb_fn = cb_fn;
2590 158 : ctx->cb_arg = cb_arg;
2591 :
2592 158 : ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
2593 : SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
2594 158 : if (!ctx->super) {
2595 0 : free(ctx);
2596 0 : cb_fn(seq, cb_arg, -ENOMEM);
2597 0 : return;
2598 : }
2599 :
2600 158 : bs_sequence_read_dev(seq, ctx->super, bs_page_to_lba(bs, 0),
2601 158 : bs_byte_to_lba(bs, sizeof(*ctx->super)),
2602 : bs_mark_dirty_write, ctx);
2603 : }
2604 :
2605 : /* Write a blob to disk */
2606 : static void
2607 9144 : blob_persist(spdk_bs_sequence_t *seq, struct spdk_blob *blob,
2608 : spdk_bs_sequence_cpl cb_fn, void *cb_arg)
2609 : {
2610 : struct spdk_blob_persist_ctx *ctx;
2611 :
2612 9144 : blob_verify_md_op(blob);
2613 :
2614 9144 : if (blob->state == SPDK_BLOB_STATE_CLEAN && TAILQ_EMPTY(&blob->persists_to_complete)) {
2615 4028 : cb_fn(seq, cb_arg, 0);
2616 4028 : return;
2617 : }
2618 :
2619 5116 : ctx = calloc(1, sizeof(*ctx));
2620 5116 : if (!ctx) {
2621 0 : cb_fn(seq, cb_arg, -ENOMEM);
2622 0 : return;
2623 : }
2624 5116 : ctx->blob = blob;
2625 5116 : ctx->seq = seq;
2626 5116 : ctx->cb_fn = cb_fn;
2627 5116 : ctx->cb_arg = cb_arg;
2628 :
2629 : /* Multiple blob persists can affect one another, via blob->state or
2630 : * blob mutable data changes. To prevent it, queue up the persists. */
2631 5116 : if (!TAILQ_EMPTY(&blob->persists_to_complete)) {
2632 23 : TAILQ_INSERT_TAIL(&blob->pending_persists, ctx, link);
2633 23 : return;
2634 : }
2635 5093 : TAILQ_INSERT_HEAD(&blob->persists_to_complete, ctx, link);
2636 :
2637 5093 : bs_mark_dirty(seq, blob->bs, blob_persist_start, ctx);
2638 : }
2639 :
2640 : struct spdk_blob_copy_cluster_ctx {
2641 : struct spdk_blob *blob;
2642 : uint8_t *buf;
2643 : uint64_t page;
2644 : uint64_t new_cluster;
2645 : uint32_t new_extent_page;
2646 : spdk_bs_sequence_t *seq;
2647 : struct spdk_blob_md_page *new_cluster_page;
2648 : };
2649 :
2650 : struct spdk_blob_free_cluster_ctx {
2651 : struct spdk_blob *blob;
2652 : uint64_t page;
2653 : struct spdk_blob_md_page *md_page;
2654 : uint64_t cluster_num;
2655 : uint32_t extent_page;
2656 : spdk_bs_sequence_t *seq;
2657 : };
2658 :
2659 : static void
2660 812 : blob_allocate_and_copy_cluster_cpl(void *cb_arg, int bserrno)
2661 : {
2662 812 : struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
2663 812 : struct spdk_bs_request_set *set = (struct spdk_bs_request_set *)ctx->seq;
2664 812 : TAILQ_HEAD(, spdk_bs_request_set) requests;
2665 : spdk_bs_user_op_t *op;
2666 :
2667 812 : TAILQ_INIT(&requests);
2668 812 : TAILQ_SWAP(&set->channel->need_cluster_alloc, &requests, spdk_bs_request_set, link);
2669 :
2670 1624 : while (!TAILQ_EMPTY(&requests)) {
2671 812 : op = TAILQ_FIRST(&requests);
2672 812 : TAILQ_REMOVE(&requests, op, link);
2673 812 : if (bserrno == 0) {
2674 812 : bs_user_op_execute(op);
2675 : } else {
2676 0 : bs_user_op_abort(op, bserrno);
2677 : }
2678 : }
2679 :
2680 812 : spdk_free(ctx->buf);
2681 812 : free(ctx);
2682 812 : }
2683 :
2684 : static void
2685 60 : blob_free_cluster_cpl(void *cb_arg, int bserrno)
2686 : {
2687 60 : struct spdk_blob_free_cluster_ctx *ctx = cb_arg;
2688 60 : spdk_bs_sequence_t *seq = ctx->seq;
2689 :
2690 60 : bs_sequence_finish(seq, bserrno);
2691 :
2692 60 : free(ctx);
2693 60 : }
2694 :
2695 : static void
2696 4 : blob_insert_cluster_revert(struct spdk_blob_copy_cluster_ctx *ctx)
2697 : {
2698 4 : spdk_spin_lock(&ctx->blob->bs->used_lock);
2699 4 : bs_release_cluster(ctx->blob->bs, ctx->new_cluster);
2700 4 : if (ctx->new_extent_page != 0) {
2701 2 : bs_release_md_page(ctx->blob->bs, ctx->new_extent_page);
2702 : }
2703 4 : spdk_spin_unlock(&ctx->blob->bs->used_lock);
2704 4 : }
2705 :
2706 : static void
2707 4 : blob_insert_cluster_clear_cpl(void *cb_arg, int bserrno)
2708 : {
2709 4 : struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
2710 :
2711 4 : if (bserrno) {
2712 0 : SPDK_WARNLOG("Failed to clear cluster: %d\n", bserrno);
2713 : }
2714 :
2715 4 : blob_insert_cluster_revert(ctx);
2716 4 : bs_sequence_finish(ctx->seq, bserrno);
2717 4 : }
2718 :
2719 : static void
2720 4 : blob_insert_cluster_clear(struct spdk_blob_copy_cluster_ctx *ctx)
2721 : {
2722 4 : struct spdk_bs_cpl cpl;
2723 : spdk_bs_batch_t *batch;
2724 4 : struct spdk_io_channel *ch = spdk_io_channel_from_ctx(ctx->seq->channel);
2725 :
2726 : /*
2727 : * We allocated a cluster and we copied data to it. But now, we realized that we don't need
2728 : * this cluster and we want to release it. We must ensure that we clear the data on this
2729 : * cluster.
2730 : * The cluster may later be re-allocated by a thick-provisioned blob for example. When
2731 : * reading from this thick-provisioned blob before writing data, we should read zeroes.
2732 : */
2733 :
2734 4 : cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
2735 4 : cpl.u.blob_basic.cb_fn = blob_insert_cluster_clear_cpl;
2736 4 : cpl.u.blob_basic.cb_arg = ctx;
2737 :
2738 4 : batch = bs_batch_open(ch, &cpl, ctx->blob);
2739 4 : if (!batch) {
2740 0 : blob_insert_cluster_clear_cpl(ctx, -ENOMEM);
2741 0 : return;
2742 : }
2743 :
2744 4 : bs_batch_clear_dev(ctx->blob, batch, bs_cluster_to_lba(ctx->blob->bs, ctx->new_cluster),
2745 4 : bs_cluster_to_lba(ctx->blob->bs, 1));
2746 4 : bs_batch_close(batch);
2747 : }
2748 :
2749 : static void
2750 812 : blob_insert_cluster_cpl(void *cb_arg, int bserrno)
2751 : {
2752 812 : struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
2753 :
2754 812 : if (bserrno) {
2755 4 : if (bserrno == -EEXIST) {
2756 : /* The metadata insert failed because another thread
2757 : * allocated the cluster first. Clear and free our cluster
2758 : * but continue without error. */
2759 4 : blob_insert_cluster_clear(ctx);
2760 4 : return;
2761 : }
2762 :
2763 0 : blob_insert_cluster_revert(ctx);
2764 : }
2765 :
2766 808 : bs_sequence_finish(ctx->seq, bserrno);
2767 : }
2768 :
2769 : static void
2770 408 : blob_write_copy_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
2771 : {
2772 408 : struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
2773 : uint32_t cluster_number;
2774 :
2775 408 : if (bserrno) {
2776 : /* The write failed, so jump to the final completion handler */
2777 0 : bs_sequence_finish(seq, bserrno);
2778 0 : return;
2779 : }
2780 :
2781 408 : cluster_number = bs_page_to_cluster(ctx->blob->bs, ctx->page);
2782 :
2783 408 : blob_insert_cluster_on_md_thread(ctx->blob, cluster_number, ctx->new_cluster,
2784 : ctx->new_extent_page, ctx->new_cluster_page, blob_insert_cluster_cpl, ctx);
2785 : }
2786 :
2787 : static void
2788 280 : blob_write_copy(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
2789 : {
2790 280 : struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
2791 :
2792 280 : if (bserrno != 0) {
2793 : /* The read failed, so jump to the final completion handler */
2794 0 : bs_sequence_finish(seq, bserrno);
2795 0 : return;
2796 : }
2797 :
2798 : /* Write whole cluster */
2799 280 : bs_sequence_write_dev(seq, ctx->buf,
2800 280 : bs_cluster_to_lba(ctx->blob->bs, ctx->new_cluster),
2801 280 : bs_cluster_to_lba(ctx->blob->bs, 1),
2802 : blob_write_copy_cpl, ctx);
2803 : }
2804 :
2805 : static bool
2806 796 : blob_can_copy(struct spdk_blob *blob, uint32_t cluster_start_page, uint64_t *base_lba)
2807 : {
2808 796 : uint64_t lba = bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page);
2809 :
2810 1146 : return (!blob_is_esnap_clone(blob) && blob->bs->dev->copy != NULL) &&
2811 350 : blob->back_bs_dev->translate_lba(blob->back_bs_dev, lba, base_lba);
2812 : }
2813 :
2814 : static void
2815 128 : blob_copy(struct spdk_blob_copy_cluster_ctx *ctx, spdk_bs_user_op_t *op, uint64_t src_lba)
2816 : {
2817 128 : struct spdk_blob *blob = ctx->blob;
2818 128 : uint64_t lba_count = bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz);
2819 :
2820 128 : bs_sequence_copy_dev(ctx->seq,
2821 128 : bs_cluster_to_lba(blob->bs, ctx->new_cluster),
2822 : src_lba,
2823 : lba_count,
2824 : blob_write_copy_cpl, ctx);
2825 128 : }
2826 :
2827 : static void
2828 812 : bs_allocate_and_copy_cluster(struct spdk_blob *blob,
2829 : struct spdk_io_channel *_ch,
2830 : uint64_t io_unit, spdk_bs_user_op_t *op)
2831 : {
2832 812 : struct spdk_bs_cpl cpl;
2833 : struct spdk_bs_channel *ch;
2834 : struct spdk_blob_copy_cluster_ctx *ctx;
2835 : uint32_t cluster_start_page;
2836 : uint32_t cluster_number;
2837 : bool is_zeroes;
2838 : bool can_copy;
2839 : bool is_valid_range;
2840 812 : uint64_t copy_src_lba;
2841 : int rc;
2842 :
2843 812 : ch = spdk_io_channel_get_ctx(_ch);
2844 :
2845 812 : if (!TAILQ_EMPTY(&ch->need_cluster_alloc)) {
2846 : /* There are already operations pending. Queue this user op
2847 : * and return because it will be re-executed when the outstanding
2848 : * cluster allocation completes. */
2849 0 : TAILQ_INSERT_TAIL(&ch->need_cluster_alloc, op, link);
2850 0 : return;
2851 : }
2852 :
2853 : /* Round the io_unit offset down to the first page in the cluster */
2854 812 : cluster_start_page = bs_io_unit_to_cluster_start(blob, io_unit);
2855 :
2856 : /* Calculate which index in the metadata cluster array the corresponding
2857 : * cluster is supposed to be at. */
2858 812 : cluster_number = bs_io_unit_to_cluster_number(blob, io_unit);
2859 :
2860 812 : ctx = calloc(1, sizeof(*ctx));
2861 812 : if (!ctx) {
2862 0 : bs_user_op_abort(op, -ENOMEM);
2863 0 : return;
2864 : }
2865 :
2866 812 : assert(blob->bs->cluster_sz % blob->back_bs_dev->blocklen == 0);
2867 :
2868 812 : ctx->blob = blob;
2869 812 : ctx->page = cluster_start_page;
2870 812 : ctx->new_cluster_page = ch->new_cluster_page;
2871 812 : memset(ctx->new_cluster_page, 0, SPDK_BS_PAGE_SIZE);
2872 :
2873 : /* Check if the cluster that we intend to do CoW for is valid for
2874 : * the backing dev. For zeroes backing dev, it'll be always valid.
2875 : * For other backing dev e.g. a snapshot, it could be invalid if
2876 : * the blob has been resized after snapshot was taken. */
2877 812 : is_valid_range = blob->back_bs_dev->is_range_valid(blob->back_bs_dev,
2878 : bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page),
2879 812 : bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz));
2880 :
2881 812 : can_copy = is_valid_range && blob_can_copy(blob, cluster_start_page, ©_src_lba);
2882 :
2883 1608 : is_zeroes = is_valid_range && blob->back_bs_dev->is_zeroes(blob->back_bs_dev,
2884 : bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page),
2885 796 : bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz));
2886 812 : if (blob->parent_id != SPDK_BLOBID_INVALID && !is_zeroes && !can_copy) {
2887 280 : ctx->buf = spdk_malloc(blob->bs->cluster_sz, blob->back_bs_dev->blocklen,
2888 : NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
2889 280 : if (!ctx->buf) {
2890 0 : SPDK_ERRLOG("DMA allocation for cluster of size = %" PRIu32 " failed.\n",
2891 : blob->bs->cluster_sz);
2892 0 : free(ctx);
2893 0 : bs_user_op_abort(op, -ENOMEM);
2894 0 : return;
2895 : }
2896 : }
2897 :
2898 812 : spdk_spin_lock(&blob->bs->used_lock);
2899 812 : rc = bs_allocate_cluster(blob, cluster_number, &ctx->new_cluster, &ctx->new_extent_page,
2900 : false);
2901 812 : spdk_spin_unlock(&blob->bs->used_lock);
2902 812 : if (rc != 0) {
2903 0 : spdk_free(ctx->buf);
2904 0 : free(ctx);
2905 0 : bs_user_op_abort(op, rc);
2906 0 : return;
2907 : }
2908 :
2909 812 : cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
2910 812 : cpl.u.blob_basic.cb_fn = blob_allocate_and_copy_cluster_cpl;
2911 812 : cpl.u.blob_basic.cb_arg = ctx;
2912 :
2913 812 : ctx->seq = bs_sequence_start_blob(_ch, &cpl, blob);
2914 812 : if (!ctx->seq) {
2915 0 : spdk_spin_lock(&blob->bs->used_lock);
2916 0 : bs_release_cluster(blob->bs, ctx->new_cluster);
2917 0 : spdk_spin_unlock(&blob->bs->used_lock);
2918 0 : spdk_free(ctx->buf);
2919 0 : free(ctx);
2920 0 : bs_user_op_abort(op, -ENOMEM);
2921 0 : return;
2922 : }
2923 :
2924 : /* Queue the user op to block other incoming operations */
2925 812 : TAILQ_INSERT_TAIL(&ch->need_cluster_alloc, op, link);
2926 :
2927 812 : if (blob->parent_id != SPDK_BLOBID_INVALID && !is_zeroes) {
2928 408 : if (can_copy) {
2929 128 : blob_copy(ctx, op, copy_src_lba);
2930 : } else {
2931 : /* Read cluster from backing device */
2932 280 : bs_sequence_read_bs_dev(ctx->seq, blob->back_bs_dev, ctx->buf,
2933 : bs_dev_page_to_lba(blob->back_bs_dev, cluster_start_page),
2934 280 : bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz),
2935 : blob_write_copy, ctx);
2936 : }
2937 :
2938 : } else {
2939 404 : blob_insert_cluster_on_md_thread(ctx->blob, cluster_number, ctx->new_cluster,
2940 : ctx->new_extent_page, ctx->new_cluster_page, blob_insert_cluster_cpl, ctx);
2941 : }
2942 : }
2943 :
2944 : static inline bool
2945 40206 : blob_calculate_lba_and_lba_count(struct spdk_blob *blob, uint64_t io_unit, uint64_t length,
2946 : uint64_t *lba, uint64_t *lba_count)
2947 : {
2948 40206 : *lba_count = length;
2949 :
2950 40206 : if (!bs_io_unit_is_allocated(blob, io_unit)) {
2951 2992 : assert(blob->back_bs_dev != NULL);
2952 2992 : *lba = bs_io_unit_to_back_dev_lba(blob, io_unit);
2953 2992 : *lba_count = bs_io_unit_to_back_dev_lba(blob, *lba_count);
2954 2992 : return false;
2955 : } else {
2956 37214 : *lba = bs_blob_io_unit_to_lba(blob, io_unit);
2957 37214 : return true;
2958 : }
2959 : }
2960 :
2961 : struct op_split_ctx {
2962 : struct spdk_blob *blob;
2963 : struct spdk_io_channel *channel;
2964 : uint64_t io_unit_offset;
2965 : uint64_t io_units_remaining;
2966 : void *curr_payload;
2967 : enum spdk_blob_op_type op_type;
2968 : spdk_bs_sequence_t *seq;
2969 : bool in_submit_ctx;
2970 : bool completed_in_submit_ctx;
2971 : bool done;
2972 : };
2973 :
2974 : static void
2975 774 : blob_request_submit_op_split_next(void *cb_arg, int bserrno)
2976 : {
2977 774 : struct op_split_ctx *ctx = cb_arg;
2978 774 : struct spdk_blob *blob = ctx->blob;
2979 774 : struct spdk_io_channel *ch = ctx->channel;
2980 774 : enum spdk_blob_op_type op_type = ctx->op_type;
2981 : uint8_t *buf;
2982 : uint64_t offset;
2983 : uint64_t length;
2984 : uint64_t op_length;
2985 :
2986 774 : if (bserrno != 0 || ctx->io_units_remaining == 0) {
2987 178 : bs_sequence_finish(ctx->seq, bserrno);
2988 178 : if (ctx->in_submit_ctx) {
2989 : /* Defer freeing of the ctx object, since it will be
2990 : * accessed when this unwinds back to the submisison
2991 : * context.
2992 : */
2993 40 : ctx->done = true;
2994 : } else {
2995 138 : free(ctx);
2996 : }
2997 178 : return;
2998 : }
2999 :
3000 596 : if (ctx->in_submit_ctx) {
3001 : /* If this split operation completed in the context
3002 : * of its submission, mark the flag and return immediately
3003 : * to avoid recursion.
3004 : */
3005 68 : ctx->completed_in_submit_ctx = true;
3006 68 : return;
3007 : }
3008 :
3009 : while (true) {
3010 596 : ctx->completed_in_submit_ctx = false;
3011 :
3012 596 : offset = ctx->io_unit_offset;
3013 596 : length = ctx->io_units_remaining;
3014 596 : buf = ctx->curr_payload;
3015 596 : op_length = spdk_min(length, bs_num_io_units_to_cluster_boundary(blob,
3016 : offset));
3017 :
3018 : /* Update length and payload for next operation */
3019 596 : ctx->io_units_remaining -= op_length;
3020 596 : ctx->io_unit_offset += op_length;
3021 596 : if (op_type == SPDK_BLOB_WRITE || op_type == SPDK_BLOB_READ) {
3022 528 : ctx->curr_payload += op_length * blob->bs->io_unit_size;
3023 : }
3024 :
3025 596 : assert(!ctx->in_submit_ctx);
3026 596 : ctx->in_submit_ctx = true;
3027 :
3028 596 : switch (op_type) {
3029 418 : case SPDK_BLOB_READ:
3030 418 : spdk_blob_io_read(blob, ch, buf, offset, op_length,
3031 : blob_request_submit_op_split_next, ctx);
3032 418 : break;
3033 110 : case SPDK_BLOB_WRITE:
3034 110 : spdk_blob_io_write(blob, ch, buf, offset, op_length,
3035 : blob_request_submit_op_split_next, ctx);
3036 110 : break;
3037 36 : case SPDK_BLOB_UNMAP:
3038 36 : spdk_blob_io_unmap(blob, ch, offset, op_length,
3039 : blob_request_submit_op_split_next, ctx);
3040 36 : break;
3041 32 : case SPDK_BLOB_WRITE_ZEROES:
3042 32 : spdk_blob_io_write_zeroes(blob, ch, offset, op_length,
3043 : blob_request_submit_op_split_next, ctx);
3044 32 : break;
3045 0 : case SPDK_BLOB_READV:
3046 : case SPDK_BLOB_WRITEV:
3047 0 : SPDK_ERRLOG("readv/write not valid\n");
3048 0 : bs_sequence_finish(ctx->seq, -EINVAL);
3049 0 : free(ctx);
3050 0 : return;
3051 : }
3052 :
3053 : #ifndef __clang_analyzer__
3054 : /* scan-build reports a false positive around accessing the ctx here. It
3055 : * forms a path that recursively calls this function, but then says
3056 : * "assuming ctx->in_submit_ctx is false", when that isn't possible.
3057 : * This path does free(ctx), returns to here, and reports a use-after-free
3058 : * bug. Wrapping this bit of code so that scan-build doesn't see it
3059 : * works around the scan-build bug.
3060 : */
3061 596 : assert(ctx->in_submit_ctx);
3062 596 : ctx->in_submit_ctx = false;
3063 :
3064 : /* If the operation completed immediately, loop back and submit the
3065 : * next operation. Otherwise we can return and the next split
3066 : * operation will get submitted when this current operation is
3067 : * later completed asynchronously.
3068 : */
3069 596 : if (ctx->completed_in_submit_ctx) {
3070 68 : continue;
3071 528 : } else if (ctx->done) {
3072 40 : free(ctx);
3073 : }
3074 : #endif
3075 528 : break;
3076 : }
3077 : }
3078 :
3079 : static void
3080 178 : blob_request_submit_op_split(struct spdk_io_channel *ch, struct spdk_blob *blob,
3081 : void *payload, uint64_t offset, uint64_t length,
3082 : spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
3083 : {
3084 : struct op_split_ctx *ctx;
3085 : spdk_bs_sequence_t *seq;
3086 178 : struct spdk_bs_cpl cpl;
3087 :
3088 178 : assert(blob != NULL);
3089 :
3090 178 : ctx = calloc(1, sizeof(struct op_split_ctx));
3091 178 : if (ctx == NULL) {
3092 0 : cb_fn(cb_arg, -ENOMEM);
3093 0 : return;
3094 : }
3095 :
3096 178 : cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
3097 178 : cpl.u.blob_basic.cb_fn = cb_fn;
3098 178 : cpl.u.blob_basic.cb_arg = cb_arg;
3099 :
3100 178 : seq = bs_sequence_start_blob(ch, &cpl, blob);
3101 178 : if (!seq) {
3102 0 : free(ctx);
3103 0 : cb_fn(cb_arg, -ENOMEM);
3104 0 : return;
3105 : }
3106 :
3107 178 : ctx->blob = blob;
3108 178 : ctx->channel = ch;
3109 178 : ctx->curr_payload = payload;
3110 178 : ctx->io_unit_offset = offset;
3111 178 : ctx->io_units_remaining = length;
3112 178 : ctx->op_type = op_type;
3113 178 : ctx->seq = seq;
3114 :
3115 178 : blob_request_submit_op_split_next(ctx, 0);
3116 : }
3117 :
3118 : static void
3119 60 : spdk_free_cluster_unmap_complete(void *cb_arg, int bserrno)
3120 : {
3121 60 : struct spdk_blob_free_cluster_ctx *ctx = cb_arg;
3122 :
3123 60 : if (bserrno) {
3124 0 : bs_sequence_finish(ctx->seq, bserrno);
3125 0 : free(ctx);
3126 0 : return;
3127 : }
3128 :
3129 60 : blob_free_cluster_on_md_thread(ctx->blob, ctx->cluster_num,
3130 : ctx->extent_page, ctx->md_page, blob_free_cluster_cpl, ctx);
3131 : }
3132 :
3133 : static void
3134 37834 : blob_request_submit_op_single(struct spdk_io_channel *_ch, struct spdk_blob *blob,
3135 : void *payload, uint64_t offset, uint64_t length,
3136 : spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
3137 : {
3138 37834 : struct spdk_bs_cpl cpl;
3139 37834 : uint64_t lba;
3140 37834 : uint64_t lba_count;
3141 : bool is_allocated;
3142 :
3143 37834 : assert(blob != NULL);
3144 :
3145 37834 : cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
3146 37834 : cpl.u.blob_basic.cb_fn = cb_fn;
3147 37834 : cpl.u.blob_basic.cb_arg = cb_arg;
3148 :
3149 37834 : if (blob->frozen_refcnt) {
3150 : /* This blob I/O is frozen */
3151 : spdk_bs_user_op_t *op;
3152 4 : struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_ch);
3153 :
3154 4 : op = bs_user_op_alloc(_ch, &cpl, op_type, blob, payload, 0, offset, length);
3155 4 : if (!op) {
3156 0 : cb_fn(cb_arg, -ENOMEM);
3157 0 : return;
3158 : }
3159 :
3160 4 : TAILQ_INSERT_TAIL(&bs_channel->queued_io, op, link);
3161 :
3162 4 : return;
3163 : }
3164 :
3165 37830 : is_allocated = blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count);
3166 :
3167 37830 : switch (op_type) {
3168 16887 : case SPDK_BLOB_READ: {
3169 : spdk_bs_batch_t *batch;
3170 :
3171 16887 : batch = bs_batch_open(_ch, &cpl, blob);
3172 16887 : if (!batch) {
3173 0 : cb_fn(cb_arg, -ENOMEM);
3174 0 : return;
3175 : }
3176 :
3177 16887 : if (is_allocated) {
3178 : /* Read from the blob */
3179 15799 : bs_batch_read_dev(batch, payload, lba, lba_count);
3180 : } else {
3181 : /* Read from the backing block device */
3182 1088 : bs_batch_read_bs_dev(batch, blob->back_bs_dev, payload, lba, lba_count);
3183 : }
3184 :
3185 16887 : bs_batch_close(batch);
3186 16887 : break;
3187 : }
3188 20851 : case SPDK_BLOB_WRITE:
3189 : case SPDK_BLOB_WRITE_ZEROES: {
3190 20851 : if (is_allocated) {
3191 : /* Write to the blob */
3192 : spdk_bs_batch_t *batch;
3193 :
3194 20507 : if (lba_count == 0) {
3195 0 : cb_fn(cb_arg, 0);
3196 0 : return;
3197 : }
3198 :
3199 20507 : batch = bs_batch_open(_ch, &cpl, blob);
3200 20507 : if (!batch) {
3201 0 : cb_fn(cb_arg, -ENOMEM);
3202 0 : return;
3203 : }
3204 :
3205 20507 : if (op_type == SPDK_BLOB_WRITE) {
3206 20475 : bs_batch_write_dev(batch, payload, lba, lba_count);
3207 : } else {
3208 32 : bs_batch_write_zeroes_dev(batch, lba, lba_count);
3209 : }
3210 :
3211 20507 : bs_batch_close(batch);
3212 : } else {
3213 : /* Queue this operation and allocate the cluster */
3214 : spdk_bs_user_op_t *op;
3215 :
3216 344 : op = bs_user_op_alloc(_ch, &cpl, op_type, blob, payload, 0, offset, length);
3217 344 : if (!op) {
3218 0 : cb_fn(cb_arg, -ENOMEM);
3219 0 : return;
3220 : }
3221 :
3222 344 : bs_allocate_and_copy_cluster(blob, _ch, offset, op);
3223 : }
3224 20851 : break;
3225 : }
3226 92 : case SPDK_BLOB_UNMAP: {
3227 92 : struct spdk_blob_free_cluster_ctx *ctx = NULL;
3228 : spdk_bs_batch_t *batch;
3229 :
3230 : /* if aligned with cluster release cluster */
3231 160 : if (spdk_blob_is_thin_provisioned(blob) && is_allocated &&
3232 68 : bs_io_units_per_cluster(blob) == length) {
3233 60 : struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_ch);
3234 : uint32_t cluster_start_page;
3235 : uint32_t cluster_number;
3236 :
3237 60 : assert(offset % bs_io_units_per_cluster(blob) == 0);
3238 :
3239 : /* Round the io_unit offset down to the first page in the cluster */
3240 60 : cluster_start_page = bs_io_unit_to_cluster_start(blob, offset);
3241 :
3242 : /* Calculate which index in the metadata cluster array the corresponding
3243 : * cluster is supposed to be at. */
3244 60 : cluster_number = bs_io_unit_to_cluster_number(blob, offset);
3245 :
3246 60 : ctx = calloc(1, sizeof(*ctx));
3247 60 : if (!ctx) {
3248 0 : cb_fn(cb_arg, -ENOMEM);
3249 0 : return;
3250 : }
3251 : /* When freeing a cluster the flow should be (in order):
3252 : * 1. Unmap the underlying area (so if the cluster is reclaimed in the future, it won't leak
3253 : * old data)
3254 : * 2. Once the unmap completes (to avoid any races with incoming writes that may claim the
3255 : * cluster), update and sync metadata freeing the cluster
3256 : * 3. Once metadata update is done, complete the user unmap request
3257 : */
3258 60 : ctx->blob = blob;
3259 60 : ctx->page = cluster_start_page;
3260 60 : ctx->cluster_num = cluster_number;
3261 60 : ctx->md_page = bs_channel->new_cluster_page;
3262 60 : ctx->seq = bs_sequence_start_bs(_ch, &cpl);
3263 60 : if (!ctx->seq) {
3264 0 : free(ctx);
3265 0 : cb_fn(cb_arg, -ENOMEM);
3266 0 : return;
3267 : }
3268 :
3269 60 : if (blob->use_extent_table) {
3270 30 : ctx->extent_page = *bs_cluster_to_extent_page(blob, cluster_number);
3271 : }
3272 :
3273 60 : cpl.u.blob_basic.cb_fn = spdk_free_cluster_unmap_complete;
3274 60 : cpl.u.blob_basic.cb_arg = ctx;
3275 : }
3276 :
3277 92 : batch = bs_batch_open(_ch, &cpl, blob);
3278 92 : if (!batch) {
3279 0 : free(ctx);
3280 0 : cb_fn(cb_arg, -ENOMEM);
3281 0 : return;
3282 : }
3283 :
3284 92 : if (is_allocated) {
3285 92 : bs_batch_unmap_dev(batch, lba, lba_count);
3286 : }
3287 :
3288 92 : bs_batch_close(batch);
3289 92 : break;
3290 : }
3291 0 : case SPDK_BLOB_READV:
3292 : case SPDK_BLOB_WRITEV:
3293 0 : SPDK_ERRLOG("readv/write not valid\n");
3294 0 : cb_fn(cb_arg, -EINVAL);
3295 0 : break;
3296 : }
3297 : }
3298 :
3299 : static void
3300 38524 : blob_request_submit_op(struct spdk_blob *blob, struct spdk_io_channel *_channel,
3301 : void *payload, uint64_t offset, uint64_t length,
3302 : spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
3303 : {
3304 38524 : assert(blob != NULL);
3305 :
3306 38524 : if (blob->data_ro && op_type != SPDK_BLOB_READ) {
3307 4 : cb_fn(cb_arg, -EPERM);
3308 4 : return;
3309 : }
3310 :
3311 38520 : if (length == 0) {
3312 492 : cb_fn(cb_arg, 0);
3313 492 : return;
3314 : }
3315 :
3316 38028 : if (offset + length > bs_cluster_to_lba(blob->bs, blob->active.num_clusters)) {
3317 24 : cb_fn(cb_arg, -EINVAL);
3318 24 : return;
3319 : }
3320 38004 : if (length <= bs_num_io_units_to_cluster_boundary(blob, offset)) {
3321 37826 : blob_request_submit_op_single(_channel, blob, payload, offset, length,
3322 : cb_fn, cb_arg, op_type);
3323 : } else {
3324 178 : blob_request_submit_op_split(_channel, blob, payload, offset, length,
3325 : cb_fn, cb_arg, op_type);
3326 : }
3327 : }
3328 :
3329 : struct rw_iov_ctx {
3330 : struct spdk_blob *blob;
3331 : struct spdk_io_channel *channel;
3332 : spdk_blob_op_complete cb_fn;
3333 : void *cb_arg;
3334 : bool read;
3335 : int iovcnt;
3336 : struct iovec *orig_iov;
3337 : uint64_t io_unit_offset;
3338 : uint64_t io_units_remaining;
3339 : uint64_t io_units_done;
3340 : struct spdk_blob_ext_io_opts *ext_io_opts;
3341 : struct iovec iov[0];
3342 : };
3343 :
3344 : static void
3345 2360 : rw_iov_done(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
3346 : {
3347 2360 : assert(cb_arg == NULL);
3348 2360 : bs_sequence_finish(seq, bserrno);
3349 2360 : }
3350 :
3351 : static void
3352 744 : rw_iov_split_next(void *cb_arg, int bserrno)
3353 : {
3354 744 : struct rw_iov_ctx *ctx = cb_arg;
3355 744 : struct spdk_blob *blob = ctx->blob;
3356 : struct iovec *iov, *orig_iov;
3357 : int iovcnt;
3358 : size_t orig_iovoff;
3359 : uint64_t io_units_count, io_units_to_boundary, io_unit_offset;
3360 : uint64_t byte_count;
3361 :
3362 744 : if (bserrno != 0 || ctx->io_units_remaining == 0) {
3363 204 : ctx->cb_fn(ctx->cb_arg, bserrno);
3364 204 : free(ctx);
3365 204 : return;
3366 : }
3367 :
3368 540 : io_unit_offset = ctx->io_unit_offset;
3369 540 : io_units_to_boundary = bs_num_io_units_to_cluster_boundary(blob, io_unit_offset);
3370 540 : io_units_count = spdk_min(ctx->io_units_remaining, io_units_to_boundary);
3371 : /*
3372 : * Get index and offset into the original iov array for our current position in the I/O sequence.
3373 : * byte_count will keep track of how many bytes remaining until orig_iov and orig_iovoff will
3374 : * point to the current position in the I/O sequence.
3375 : */
3376 540 : byte_count = ctx->io_units_done * blob->bs->io_unit_size;
3377 540 : orig_iov = &ctx->orig_iov[0];
3378 540 : orig_iovoff = 0;
3379 1148 : while (byte_count > 0) {
3380 608 : if (byte_count >= orig_iov->iov_len) {
3381 352 : byte_count -= orig_iov->iov_len;
3382 352 : orig_iov++;
3383 : } else {
3384 256 : orig_iovoff = byte_count;
3385 256 : byte_count = 0;
3386 : }
3387 : }
3388 :
3389 : /*
3390 : * Build an iov array for the next I/O in the sequence. byte_count will keep track of how many
3391 : * bytes of this next I/O remain to be accounted for in the new iov array.
3392 : */
3393 540 : byte_count = io_units_count * blob->bs->io_unit_size;
3394 540 : iov = &ctx->iov[0];
3395 540 : iovcnt = 0;
3396 1380 : while (byte_count > 0) {
3397 840 : assert(iovcnt < ctx->iovcnt);
3398 840 : iov->iov_len = spdk_min(byte_count, orig_iov->iov_len - orig_iovoff);
3399 840 : iov->iov_base = orig_iov->iov_base + orig_iovoff;
3400 840 : byte_count -= iov->iov_len;
3401 840 : orig_iovoff = 0;
3402 840 : orig_iov++;
3403 840 : iov++;
3404 840 : iovcnt++;
3405 : }
3406 :
3407 540 : ctx->io_unit_offset += io_units_count;
3408 540 : ctx->io_units_remaining -= io_units_count;
3409 540 : ctx->io_units_done += io_units_count;
3410 540 : iov = &ctx->iov[0];
3411 :
3412 540 : if (ctx->read) {
3413 408 : spdk_blob_io_readv_ext(ctx->blob, ctx->channel, iov, iovcnt, io_unit_offset,
3414 : io_units_count, rw_iov_split_next, ctx, ctx->ext_io_opts);
3415 : } else {
3416 132 : spdk_blob_io_writev_ext(ctx->blob, ctx->channel, iov, iovcnt, io_unit_offset,
3417 : io_units_count, rw_iov_split_next, ctx, ctx->ext_io_opts);
3418 : }
3419 : }
3420 :
3421 : static void
3422 2588 : blob_request_submit_rw_iov(struct spdk_blob *blob, struct spdk_io_channel *_channel,
3423 : struct iovec *iov, int iovcnt,
3424 : uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg, bool read,
3425 : struct spdk_blob_ext_io_opts *ext_io_opts)
3426 : {
3427 2588 : struct spdk_bs_cpl cpl;
3428 :
3429 2588 : assert(blob != NULL);
3430 :
3431 2588 : if (!read && blob->data_ro) {
3432 4 : cb_fn(cb_arg, -EPERM);
3433 4 : return;
3434 : }
3435 :
3436 2584 : if (length == 0) {
3437 0 : cb_fn(cb_arg, 0);
3438 0 : return;
3439 : }
3440 :
3441 2584 : if (offset + length > bs_cluster_to_lba(blob->bs, blob->active.num_clusters)) {
3442 0 : cb_fn(cb_arg, -EINVAL);
3443 0 : return;
3444 : }
3445 :
3446 : /*
3447 : * For now, we implement readv/writev using a sequence (instead of a batch) to account for having
3448 : * to split a request that spans a cluster boundary. For I/O that do not span a cluster boundary,
3449 : * there will be no noticeable difference compared to using a batch. For I/O that do span a cluster
3450 : * boundary, the target LBAs (after blob offset to LBA translation) may not be contiguous, so we need
3451 : * to allocate a separate iov array and split the I/O such that none of the resulting
3452 : * smaller I/O cross a cluster boundary. These smaller I/O will be issued in sequence (not in parallel)
3453 : * but since this case happens very infrequently, any performance impact will be negligible.
3454 : *
3455 : * This could be optimized in the future to allocate a big enough iov array to account for all of the iovs
3456 : * for all of the smaller I/Os, pre-build all of the iov arrays for the smaller I/Os, then issue them
3457 : * in a batch. That would also require creating an intermediate spdk_bs_cpl that would get called
3458 : * when the batch was completed, to allow for freeing the memory for the iov arrays.
3459 : */
3460 2584 : if (spdk_likely(length <= bs_num_io_units_to_cluster_boundary(blob, offset))) {
3461 2376 : uint64_t lba_count;
3462 2376 : uint64_t lba;
3463 : bool is_allocated;
3464 :
3465 2376 : cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
3466 2376 : cpl.u.blob_basic.cb_fn = cb_fn;
3467 2376 : cpl.u.blob_basic.cb_arg = cb_arg;
3468 :
3469 2376 : if (blob->frozen_refcnt) {
3470 : /* This blob I/O is frozen */
3471 : enum spdk_blob_op_type op_type;
3472 : spdk_bs_user_op_t *op;
3473 0 : struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_channel);
3474 :
3475 0 : op_type = read ? SPDK_BLOB_READV : SPDK_BLOB_WRITEV;
3476 0 : op = bs_user_op_alloc(_channel, &cpl, op_type, blob, iov, iovcnt, offset, length);
3477 0 : if (!op) {
3478 0 : cb_fn(cb_arg, -ENOMEM);
3479 0 : return;
3480 : }
3481 :
3482 0 : TAILQ_INSERT_TAIL(&bs_channel->queued_io, op, link);
3483 :
3484 0 : return;
3485 : }
3486 :
3487 2376 : is_allocated = blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count);
3488 :
3489 2376 : if (read) {
3490 : spdk_bs_sequence_t *seq;
3491 :
3492 2084 : seq = bs_sequence_start_blob(_channel, &cpl, blob);
3493 2084 : if (!seq) {
3494 0 : cb_fn(cb_arg, -ENOMEM);
3495 0 : return;
3496 : }
3497 :
3498 2084 : seq->ext_io_opts = ext_io_opts;
3499 :
3500 2084 : if (is_allocated) {
3501 540 : bs_sequence_readv_dev(seq, iov, iovcnt, lba, lba_count, rw_iov_done, NULL);
3502 : } else {
3503 1544 : bs_sequence_readv_bs_dev(seq, blob->back_bs_dev, iov, iovcnt, lba, lba_count,
3504 : rw_iov_done, NULL);
3505 : }
3506 : } else {
3507 292 : if (is_allocated) {
3508 : spdk_bs_sequence_t *seq;
3509 :
3510 276 : seq = bs_sequence_start_blob(_channel, &cpl, blob);
3511 276 : if (!seq) {
3512 0 : cb_fn(cb_arg, -ENOMEM);
3513 0 : return;
3514 : }
3515 :
3516 276 : seq->ext_io_opts = ext_io_opts;
3517 :
3518 276 : bs_sequence_writev_dev(seq, iov, iovcnt, lba, lba_count, rw_iov_done, NULL);
3519 : } else {
3520 : /* Queue this operation and allocate the cluster */
3521 : spdk_bs_user_op_t *op;
3522 :
3523 16 : op = bs_user_op_alloc(_channel, &cpl, SPDK_BLOB_WRITEV, blob, iov, iovcnt, offset,
3524 : length);
3525 16 : if (!op) {
3526 0 : cb_fn(cb_arg, -ENOMEM);
3527 0 : return;
3528 : }
3529 :
3530 16 : op->ext_io_opts = ext_io_opts;
3531 :
3532 16 : bs_allocate_and_copy_cluster(blob, _channel, offset, op);
3533 : }
3534 : }
3535 : } else {
3536 : struct rw_iov_ctx *ctx;
3537 :
3538 208 : ctx = calloc(1, sizeof(struct rw_iov_ctx) + iovcnt * sizeof(struct iovec));
3539 208 : if (ctx == NULL) {
3540 4 : cb_fn(cb_arg, -ENOMEM);
3541 4 : return;
3542 : }
3543 :
3544 204 : ctx->blob = blob;
3545 204 : ctx->channel = _channel;
3546 204 : ctx->cb_fn = cb_fn;
3547 204 : ctx->cb_arg = cb_arg;
3548 204 : ctx->read = read;
3549 204 : ctx->orig_iov = iov;
3550 204 : ctx->iovcnt = iovcnt;
3551 204 : ctx->io_unit_offset = offset;
3552 204 : ctx->io_units_remaining = length;
3553 204 : ctx->io_units_done = 0;
3554 204 : ctx->ext_io_opts = ext_io_opts;
3555 :
3556 204 : rw_iov_split_next(ctx, 0);
3557 : }
3558 : }
3559 :
3560 : static struct spdk_blob *
3561 7733 : blob_lookup(struct spdk_blob_store *bs, spdk_blob_id blobid)
3562 : {
3563 7733 : struct spdk_blob find;
3564 :
3565 7733 : if (spdk_bit_array_get(bs->open_blobids, blobid) == 0) {
3566 6948 : return NULL;
3567 : }
3568 :
3569 785 : find.id = blobid;
3570 785 : return RB_FIND(spdk_blob_tree, &bs->open_blobs, &find);
3571 : }
3572 :
3573 : static void
3574 1810 : blob_get_snapshot_and_clone_entries(struct spdk_blob *blob,
3575 : struct spdk_blob_list **snapshot_entry, struct spdk_blob_list **clone_entry)
3576 : {
3577 1810 : assert(blob != NULL);
3578 1810 : *snapshot_entry = NULL;
3579 1810 : *clone_entry = NULL;
3580 :
3581 1810 : if (blob->parent_id == SPDK_BLOBID_INVALID) {
3582 1518 : return;
3583 : }
3584 :
3585 440 : TAILQ_FOREACH(*snapshot_entry, &blob->bs->snapshots, link) {
3586 380 : if ((*snapshot_entry)->id == blob->parent_id) {
3587 232 : break;
3588 : }
3589 : }
3590 :
3591 292 : if (*snapshot_entry != NULL) {
3592 276 : TAILQ_FOREACH(*clone_entry, &(*snapshot_entry)->clones, link) {
3593 276 : if ((*clone_entry)->id == blob->id) {
3594 232 : break;
3595 : }
3596 : }
3597 :
3598 232 : assert(*clone_entry != NULL);
3599 : }
3600 : }
3601 :
3602 : static int
3603 796 : bs_channel_create(void *io_device, void *ctx_buf)
3604 : {
3605 796 : struct spdk_blob_store *bs = io_device;
3606 796 : struct spdk_bs_channel *channel = ctx_buf;
3607 : struct spdk_bs_dev *dev;
3608 796 : uint32_t max_ops = bs->max_channel_ops;
3609 : uint32_t i;
3610 :
3611 796 : dev = bs->dev;
3612 :
3613 796 : channel->req_mem = calloc(max_ops, sizeof(struct spdk_bs_request_set));
3614 796 : if (!channel->req_mem) {
3615 0 : return -1;
3616 : }
3617 :
3618 796 : TAILQ_INIT(&channel->reqs);
3619 :
3620 408348 : for (i = 0; i < max_ops; i++) {
3621 407552 : TAILQ_INSERT_TAIL(&channel->reqs, &channel->req_mem[i], link);
3622 : }
3623 :
3624 796 : channel->bs = bs;
3625 796 : channel->dev = dev;
3626 796 : channel->dev_channel = dev->create_channel(dev);
3627 :
3628 796 : if (!channel->dev_channel) {
3629 0 : SPDK_ERRLOG("Failed to create device channel.\n");
3630 0 : free(channel->req_mem);
3631 0 : return -1;
3632 : }
3633 :
3634 796 : channel->new_cluster_page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0, NULL, SPDK_ENV_SOCKET_ID_ANY,
3635 : SPDK_MALLOC_DMA);
3636 796 : if (!channel->new_cluster_page) {
3637 0 : SPDK_ERRLOG("Failed to allocate new cluster page\n");
3638 0 : free(channel->req_mem);
3639 0 : channel->dev->destroy_channel(channel->dev, channel->dev_channel);
3640 0 : return -1;
3641 : }
3642 :
3643 796 : TAILQ_INIT(&channel->need_cluster_alloc);
3644 796 : TAILQ_INIT(&channel->queued_io);
3645 796 : RB_INIT(&channel->esnap_channels);
3646 :
3647 796 : return 0;
3648 : }
3649 :
3650 : static void
3651 796 : bs_channel_destroy(void *io_device, void *ctx_buf)
3652 : {
3653 796 : struct spdk_bs_channel *channel = ctx_buf;
3654 : spdk_bs_user_op_t *op;
3655 :
3656 796 : while (!TAILQ_EMPTY(&channel->need_cluster_alloc)) {
3657 0 : op = TAILQ_FIRST(&channel->need_cluster_alloc);
3658 0 : TAILQ_REMOVE(&channel->need_cluster_alloc, op, link);
3659 0 : bs_user_op_abort(op, -EIO);
3660 : }
3661 :
3662 796 : while (!TAILQ_EMPTY(&channel->queued_io)) {
3663 0 : op = TAILQ_FIRST(&channel->queued_io);
3664 0 : TAILQ_REMOVE(&channel->queued_io, op, link);
3665 0 : bs_user_op_abort(op, -EIO);
3666 : }
3667 :
3668 796 : blob_esnap_destroy_bs_channel(channel);
3669 :
3670 796 : free(channel->req_mem);
3671 796 : spdk_free(channel->new_cluster_page);
3672 796 : channel->dev->destroy_channel(channel->dev, channel->dev_channel);
3673 796 : }
3674 :
3675 : static void
3676 780 : bs_dev_destroy(void *io_device)
3677 : {
3678 780 : struct spdk_blob_store *bs = io_device;
3679 : struct spdk_blob *blob, *blob_tmp;
3680 :
3681 780 : bs->dev->destroy(bs->dev);
3682 :
3683 780 : RB_FOREACH_SAFE(blob, spdk_blob_tree, &bs->open_blobs, blob_tmp) {
3684 0 : RB_REMOVE(spdk_blob_tree, &bs->open_blobs, blob);
3685 0 : spdk_bit_array_clear(bs->open_blobids, blob->id);
3686 0 : blob_free(blob);
3687 : }
3688 :
3689 780 : spdk_spin_destroy(&bs->used_lock);
3690 :
3691 780 : spdk_bit_array_free(&bs->open_blobids);
3692 780 : spdk_bit_array_free(&bs->used_blobids);
3693 780 : spdk_bit_array_free(&bs->used_md_pages);
3694 780 : spdk_bit_pool_free(&bs->used_clusters);
3695 : /*
3696 : * If this function is called for any reason except a successful unload,
3697 : * the unload_cpl type will be NONE and this will be a nop.
3698 : */
3699 780 : bs_call_cpl(&bs->unload_cpl, bs->unload_err);
3700 :
3701 780 : free(bs);
3702 780 : }
3703 :
3704 : static int
3705 908 : bs_blob_list_add(struct spdk_blob *blob)
3706 : {
3707 : spdk_blob_id snapshot_id;
3708 908 : struct spdk_blob_list *snapshot_entry = NULL;
3709 908 : struct spdk_blob_list *clone_entry = NULL;
3710 :
3711 908 : assert(blob != NULL);
3712 :
3713 908 : snapshot_id = blob->parent_id;
3714 908 : if (snapshot_id == SPDK_BLOBID_INVALID ||
3715 : snapshot_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
3716 492 : return 0;
3717 : }
3718 :
3719 416 : snapshot_entry = bs_get_snapshot_entry(blob->bs, snapshot_id);
3720 416 : if (snapshot_entry == NULL) {
3721 : /* Snapshot not found */
3722 288 : snapshot_entry = calloc(1, sizeof(struct spdk_blob_list));
3723 288 : if (snapshot_entry == NULL) {
3724 0 : return -ENOMEM;
3725 : }
3726 288 : snapshot_entry->id = snapshot_id;
3727 288 : TAILQ_INIT(&snapshot_entry->clones);
3728 288 : TAILQ_INSERT_TAIL(&blob->bs->snapshots, snapshot_entry, link);
3729 : } else {
3730 204 : TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
3731 76 : if (clone_entry->id == blob->id) {
3732 0 : break;
3733 : }
3734 : }
3735 : }
3736 :
3737 416 : if (clone_entry == NULL) {
3738 : /* Clone not found */
3739 416 : clone_entry = calloc(1, sizeof(struct spdk_blob_list));
3740 416 : if (clone_entry == NULL) {
3741 0 : return -ENOMEM;
3742 : }
3743 416 : clone_entry->id = blob->id;
3744 416 : TAILQ_INIT(&clone_entry->clones);
3745 416 : TAILQ_INSERT_TAIL(&snapshot_entry->clones, clone_entry, link);
3746 416 : snapshot_entry->clone_count++;
3747 : }
3748 :
3749 416 : return 0;
3750 : }
3751 :
3752 : static void
3753 1732 : bs_blob_list_remove(struct spdk_blob *blob)
3754 : {
3755 1732 : struct spdk_blob_list *snapshot_entry = NULL;
3756 1732 : struct spdk_blob_list *clone_entry = NULL;
3757 :
3758 1732 : blob_get_snapshot_and_clone_entries(blob, &snapshot_entry, &clone_entry);
3759 :
3760 1732 : if (snapshot_entry == NULL) {
3761 1516 : return;
3762 : }
3763 :
3764 216 : blob->parent_id = SPDK_BLOBID_INVALID;
3765 216 : TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
3766 216 : free(clone_entry);
3767 :
3768 216 : snapshot_entry->clone_count--;
3769 : }
3770 :
3771 : static int
3772 780 : bs_blob_list_free(struct spdk_blob_store *bs)
3773 : {
3774 : struct spdk_blob_list *snapshot_entry;
3775 : struct spdk_blob_list *snapshot_entry_tmp;
3776 : struct spdk_blob_list *clone_entry;
3777 : struct spdk_blob_list *clone_entry_tmp;
3778 :
3779 924 : TAILQ_FOREACH_SAFE(snapshot_entry, &bs->snapshots, link, snapshot_entry_tmp) {
3780 296 : TAILQ_FOREACH_SAFE(clone_entry, &snapshot_entry->clones, link, clone_entry_tmp) {
3781 152 : TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
3782 152 : free(clone_entry);
3783 : }
3784 144 : TAILQ_REMOVE(&bs->snapshots, snapshot_entry, link);
3785 144 : free(snapshot_entry);
3786 : }
3787 :
3788 780 : return 0;
3789 : }
3790 :
3791 : static void
3792 780 : bs_free(struct spdk_blob_store *bs)
3793 : {
3794 780 : bs_blob_list_free(bs);
3795 :
3796 780 : bs_unregister_md_thread(bs);
3797 780 : spdk_io_device_unregister(bs, bs_dev_destroy);
3798 780 : }
3799 :
3800 : void
3801 1048 : spdk_bs_opts_init(struct spdk_bs_opts *opts, size_t opts_size)
3802 : {
3803 :
3804 1048 : if (!opts) {
3805 0 : SPDK_ERRLOG("opts should not be NULL\n");
3806 0 : return;
3807 : }
3808 :
3809 1048 : if (!opts_size) {
3810 0 : SPDK_ERRLOG("opts_size should not be zero value\n");
3811 0 : return;
3812 : }
3813 :
3814 1048 : memset(opts, 0, opts_size);
3815 1048 : opts->opts_size = opts_size;
3816 :
3817 : #define FIELD_OK(field) \
3818 : offsetof(struct spdk_bs_opts, field) + sizeof(opts->field) <= opts_size
3819 :
3820 : #define SET_FIELD(field, value) \
3821 : if (FIELD_OK(field)) { \
3822 : opts->field = value; \
3823 : } \
3824 :
3825 1048 : SET_FIELD(cluster_sz, SPDK_BLOB_OPTS_CLUSTER_SZ);
3826 1048 : SET_FIELD(num_md_pages, SPDK_BLOB_OPTS_NUM_MD_PAGES);
3827 1048 : SET_FIELD(max_md_ops, SPDK_BLOB_OPTS_NUM_MD_PAGES);
3828 1048 : SET_FIELD(max_channel_ops, SPDK_BLOB_OPTS_DEFAULT_CHANNEL_OPS);
3829 1048 : SET_FIELD(clear_method, BS_CLEAR_WITH_UNMAP);
3830 :
3831 1048 : if (FIELD_OK(bstype)) {
3832 1048 : memset(&opts->bstype, 0, sizeof(opts->bstype));
3833 : }
3834 :
3835 1048 : SET_FIELD(iter_cb_fn, NULL);
3836 1048 : SET_FIELD(iter_cb_arg, NULL);
3837 1048 : SET_FIELD(force_recover, false);
3838 1048 : SET_FIELD(esnap_bs_dev_create, NULL);
3839 1048 : SET_FIELD(esnap_ctx, NULL);
3840 :
3841 : #undef FIELD_OK
3842 : #undef SET_FIELD
3843 : }
3844 :
3845 : static int
3846 484 : bs_opts_verify(struct spdk_bs_opts *opts)
3847 : {
3848 484 : if (opts->cluster_sz == 0 || opts->num_md_pages == 0 || opts->max_md_ops == 0 ||
3849 480 : opts->max_channel_ops == 0) {
3850 4 : SPDK_ERRLOG("Blobstore options cannot be set to 0\n");
3851 4 : return -1;
3852 : }
3853 :
3854 480 : return 0;
3855 : }
3856 :
3857 : /* START spdk_bs_load */
3858 :
3859 : /* spdk_bs_load_ctx is used for init, load, unload and dump code paths. */
3860 :
3861 : struct spdk_bs_load_ctx {
3862 : struct spdk_blob_store *bs;
3863 : struct spdk_bs_super_block *super;
3864 :
3865 : struct spdk_bs_md_mask *mask;
3866 : bool in_page_chain;
3867 : uint32_t page_index;
3868 : uint32_t cur_page;
3869 : struct spdk_blob_md_page *page;
3870 :
3871 : uint64_t num_extent_pages;
3872 : uint32_t *extent_page_num;
3873 : struct spdk_blob_md_page *extent_pages;
3874 : struct spdk_bit_array *used_clusters;
3875 :
3876 : spdk_bs_sequence_t *seq;
3877 : spdk_blob_op_with_handle_complete iter_cb_fn;
3878 : void *iter_cb_arg;
3879 : struct spdk_blob *blob;
3880 : spdk_blob_id blobid;
3881 :
3882 : bool force_recover;
3883 :
3884 : /* These fields are used in the spdk_bs_dump path. */
3885 : bool dumping;
3886 : FILE *fp;
3887 : spdk_bs_dump_print_xattr print_xattr_fn;
3888 : char xattr_name[4096];
3889 : };
3890 :
3891 : static int
3892 784 : bs_alloc(struct spdk_bs_dev *dev, struct spdk_bs_opts *opts, struct spdk_blob_store **_bs,
3893 : struct spdk_bs_load_ctx **_ctx)
3894 : {
3895 : struct spdk_blob_store *bs;
3896 : struct spdk_bs_load_ctx *ctx;
3897 : uint64_t dev_size;
3898 : int rc;
3899 :
3900 784 : dev_size = dev->blocklen * dev->blockcnt;
3901 784 : if (dev_size < opts->cluster_sz) {
3902 : /* Device size cannot be smaller than cluster size of blobstore */
3903 0 : SPDK_INFOLOG(blob, "Device size %" PRIu64 " is smaller than cluster size %" PRIu32 "\n",
3904 : dev_size, opts->cluster_sz);
3905 0 : return -ENOSPC;
3906 : }
3907 784 : if (opts->cluster_sz < SPDK_BS_PAGE_SIZE) {
3908 : /* Cluster size cannot be smaller than page size */
3909 4 : SPDK_ERRLOG("Cluster size %" PRIu32 " is smaller than page size %d\n",
3910 : opts->cluster_sz, SPDK_BS_PAGE_SIZE);
3911 4 : return -EINVAL;
3912 : }
3913 780 : bs = calloc(1, sizeof(struct spdk_blob_store));
3914 780 : if (!bs) {
3915 0 : return -ENOMEM;
3916 : }
3917 :
3918 780 : ctx = calloc(1, sizeof(struct spdk_bs_load_ctx));
3919 780 : if (!ctx) {
3920 0 : free(bs);
3921 0 : return -ENOMEM;
3922 : }
3923 :
3924 780 : ctx->bs = bs;
3925 780 : ctx->iter_cb_fn = opts->iter_cb_fn;
3926 780 : ctx->iter_cb_arg = opts->iter_cb_arg;
3927 780 : ctx->force_recover = opts->force_recover;
3928 :
3929 780 : ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
3930 : SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
3931 780 : if (!ctx->super) {
3932 0 : free(ctx);
3933 0 : free(bs);
3934 0 : return -ENOMEM;
3935 : }
3936 :
3937 780 : RB_INIT(&bs->open_blobs);
3938 780 : TAILQ_INIT(&bs->snapshots);
3939 780 : bs->dev = dev;
3940 780 : bs->md_thread = spdk_get_thread();
3941 780 : assert(bs->md_thread != NULL);
3942 :
3943 : /*
3944 : * Do not use bs_lba_to_cluster() here since blockcnt may not be an
3945 : * even multiple of the cluster size.
3946 : */
3947 780 : bs->cluster_sz = opts->cluster_sz;
3948 780 : bs->total_clusters = dev->blockcnt / (bs->cluster_sz / dev->blocklen);
3949 780 : ctx->used_clusters = spdk_bit_array_create(bs->total_clusters);
3950 780 : if (!ctx->used_clusters) {
3951 0 : spdk_free(ctx->super);
3952 0 : free(ctx);
3953 0 : free(bs);
3954 0 : return -ENOMEM;
3955 : }
3956 :
3957 780 : bs->pages_per_cluster = bs->cluster_sz / SPDK_BS_PAGE_SIZE;
3958 780 : if (spdk_u32_is_pow2(bs->pages_per_cluster)) {
3959 780 : bs->pages_per_cluster_shift = spdk_u32log2(bs->pages_per_cluster);
3960 : }
3961 780 : bs->num_free_clusters = bs->total_clusters;
3962 780 : bs->io_unit_size = dev->blocklen;
3963 :
3964 780 : bs->max_channel_ops = opts->max_channel_ops;
3965 780 : bs->super_blob = SPDK_BLOBID_INVALID;
3966 780 : memcpy(&bs->bstype, &opts->bstype, sizeof(opts->bstype));
3967 780 : bs->esnap_bs_dev_create = opts->esnap_bs_dev_create;
3968 780 : bs->esnap_ctx = opts->esnap_ctx;
3969 :
3970 : /* The metadata is assumed to be at least 1 page */
3971 780 : bs->used_md_pages = spdk_bit_array_create(1);
3972 780 : bs->used_blobids = spdk_bit_array_create(0);
3973 780 : bs->open_blobids = spdk_bit_array_create(0);
3974 :
3975 780 : spdk_spin_init(&bs->used_lock);
3976 :
3977 780 : spdk_io_device_register(bs, bs_channel_create, bs_channel_destroy,
3978 : sizeof(struct spdk_bs_channel), "blobstore");
3979 780 : rc = bs_register_md_thread(bs);
3980 780 : if (rc == -1) {
3981 0 : spdk_io_device_unregister(bs, NULL);
3982 0 : spdk_spin_destroy(&bs->used_lock);
3983 0 : spdk_bit_array_free(&bs->open_blobids);
3984 0 : spdk_bit_array_free(&bs->used_blobids);
3985 0 : spdk_bit_array_free(&bs->used_md_pages);
3986 0 : spdk_bit_array_free(&ctx->used_clusters);
3987 0 : spdk_free(ctx->super);
3988 0 : free(ctx);
3989 0 : free(bs);
3990 : /* FIXME: this is a lie but don't know how to get a proper error code here */
3991 0 : return -ENOMEM;
3992 : }
3993 :
3994 780 : *_ctx = ctx;
3995 780 : *_bs = bs;
3996 780 : return 0;
3997 : }
3998 :
3999 : static void
4000 24 : bs_load_ctx_fail(struct spdk_bs_load_ctx *ctx, int bserrno)
4001 : {
4002 24 : assert(bserrno != 0);
4003 :
4004 24 : spdk_free(ctx->super);
4005 24 : bs_sequence_finish(ctx->seq, bserrno);
4006 24 : bs_free(ctx->bs);
4007 24 : spdk_bit_array_free(&ctx->used_clusters);
4008 24 : free(ctx);
4009 24 : }
4010 :
4011 : static void
4012 824 : bs_write_super(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
4013 : struct spdk_bs_super_block *super, spdk_bs_sequence_cpl cb_fn, void *cb_arg)
4014 : {
4015 : /* Update the values in the super block */
4016 824 : super->super_blob = bs->super_blob;
4017 824 : memcpy(&super->bstype, &bs->bstype, sizeof(bs->bstype));
4018 824 : super->crc = blob_md_page_calc_crc(super);
4019 824 : bs_sequence_write_dev(seq, super, bs_page_to_lba(bs, 0),
4020 824 : bs_byte_to_lba(bs, sizeof(*super)),
4021 : cb_fn, cb_arg);
4022 824 : }
4023 :
4024 : static void
4025 760 : bs_write_used_clusters(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
4026 : {
4027 760 : struct spdk_bs_load_ctx *ctx = arg;
4028 : uint64_t mask_size, lba, lba_count;
4029 :
4030 : /* Write out the used clusters mask */
4031 760 : mask_size = ctx->super->used_cluster_mask_len * SPDK_BS_PAGE_SIZE;
4032 760 : ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL,
4033 : SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
4034 760 : if (!ctx->mask) {
4035 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4036 0 : return;
4037 : }
4038 :
4039 760 : ctx->mask->type = SPDK_MD_MASK_TYPE_USED_CLUSTERS;
4040 760 : ctx->mask->length = ctx->bs->total_clusters;
4041 : /* We could get here through the normal unload path, or through dirty
4042 : * shutdown recovery. For the normal unload path, we use the mask from
4043 : * the bit pool. For dirty shutdown recovery, we don't have a bit pool yet -
4044 : * only the bit array from the load ctx.
4045 : */
4046 760 : if (ctx->bs->used_clusters) {
4047 654 : assert(ctx->mask->length == spdk_bit_pool_capacity(ctx->bs->used_clusters));
4048 654 : spdk_bit_pool_store_mask(ctx->bs->used_clusters, ctx->mask->mask);
4049 : } else {
4050 106 : assert(ctx->mask->length == spdk_bit_array_capacity(ctx->used_clusters));
4051 106 : spdk_bit_array_store_mask(ctx->used_clusters, ctx->mask->mask);
4052 : }
4053 760 : lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
4054 760 : lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
4055 760 : bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
4056 : }
4057 :
4058 : static void
4059 760 : bs_write_used_md(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
4060 : {
4061 760 : struct spdk_bs_load_ctx *ctx = arg;
4062 : uint64_t mask_size, lba, lba_count;
4063 :
4064 760 : mask_size = ctx->super->used_page_mask_len * SPDK_BS_PAGE_SIZE;
4065 760 : ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL,
4066 : SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
4067 760 : if (!ctx->mask) {
4068 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4069 0 : return;
4070 : }
4071 :
4072 760 : ctx->mask->type = SPDK_MD_MASK_TYPE_USED_PAGES;
4073 760 : ctx->mask->length = ctx->super->md_len;
4074 760 : assert(ctx->mask->length == spdk_bit_array_capacity(ctx->bs->used_md_pages));
4075 :
4076 760 : spdk_bit_array_store_mask(ctx->bs->used_md_pages, ctx->mask->mask);
4077 760 : lba = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_start);
4078 760 : lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_len);
4079 760 : bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
4080 : }
4081 :
4082 : static void
4083 760 : bs_write_used_blobids(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
4084 : {
4085 760 : struct spdk_bs_load_ctx *ctx = arg;
4086 : uint64_t mask_size, lba, lba_count;
4087 :
4088 760 : if (ctx->super->used_blobid_mask_len == 0) {
4089 : /*
4090 : * This is a pre-v3 on-disk format where the blobid mask does not get
4091 : * written to disk.
4092 : */
4093 24 : cb_fn(seq, arg, 0);
4094 24 : return;
4095 : }
4096 :
4097 736 : mask_size = ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE;
4098 736 : ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
4099 : SPDK_MALLOC_DMA);
4100 736 : if (!ctx->mask) {
4101 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4102 0 : return;
4103 : }
4104 :
4105 736 : ctx->mask->type = SPDK_MD_MASK_TYPE_USED_BLOBIDS;
4106 736 : ctx->mask->length = ctx->super->md_len;
4107 736 : assert(ctx->mask->length == spdk_bit_array_capacity(ctx->bs->used_blobids));
4108 :
4109 736 : spdk_bit_array_store_mask(ctx->bs->used_blobids, ctx->mask->mask);
4110 736 : lba = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_start);
4111 736 : lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_len);
4112 736 : bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
4113 : }
4114 :
4115 : static void
4116 704 : blob_set_thin_provision(struct spdk_blob *blob)
4117 : {
4118 704 : blob_verify_md_op(blob);
4119 704 : blob->invalid_flags |= SPDK_BLOB_THIN_PROV;
4120 704 : blob->state = SPDK_BLOB_STATE_DIRTY;
4121 704 : }
4122 :
4123 : static void
4124 2094 : blob_set_clear_method(struct spdk_blob *blob, enum blob_clear_method clear_method)
4125 : {
4126 2094 : blob_verify_md_op(blob);
4127 2094 : blob->clear_method = clear_method;
4128 2094 : blob->md_ro_flags |= (clear_method << SPDK_BLOB_CLEAR_METHOD_SHIFT);
4129 2094 : blob->state = SPDK_BLOB_STATE_DIRTY;
4130 2094 : }
4131 :
4132 : static void bs_load_iter(void *arg, struct spdk_blob *blob, int bserrno);
4133 :
4134 : static void
4135 24 : bs_delete_corrupted_blob_cpl(void *cb_arg, int bserrno)
4136 : {
4137 24 : struct spdk_bs_load_ctx *ctx = cb_arg;
4138 : spdk_blob_id id;
4139 : int64_t page_num;
4140 :
4141 : /* Iterate to next blob (we can't use spdk_bs_iter_next function as our
4142 : * last blob has been removed */
4143 24 : page_num = bs_blobid_to_page(ctx->blobid);
4144 24 : page_num++;
4145 24 : page_num = spdk_bit_array_find_first_set(ctx->bs->used_blobids, page_num);
4146 24 : if (page_num >= spdk_bit_array_capacity(ctx->bs->used_blobids)) {
4147 24 : bs_load_iter(ctx, NULL, -ENOENT);
4148 24 : return;
4149 : }
4150 :
4151 0 : id = bs_page_to_blobid(page_num);
4152 :
4153 0 : spdk_bs_open_blob(ctx->bs, id, bs_load_iter, ctx);
4154 : }
4155 :
4156 : static void
4157 24 : bs_delete_corrupted_close_cb(void *cb_arg, int bserrno)
4158 : {
4159 24 : struct spdk_bs_load_ctx *ctx = cb_arg;
4160 :
4161 24 : if (bserrno != 0) {
4162 0 : SPDK_ERRLOG("Failed to close corrupted blob\n");
4163 0 : spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
4164 0 : return;
4165 : }
4166 :
4167 24 : spdk_bs_delete_blob(ctx->bs, ctx->blobid, bs_delete_corrupted_blob_cpl, ctx);
4168 : }
4169 :
4170 : static void
4171 24 : bs_delete_corrupted_blob(void *cb_arg, int bserrno)
4172 : {
4173 24 : struct spdk_bs_load_ctx *ctx = cb_arg;
4174 : uint64_t i;
4175 :
4176 24 : if (bserrno != 0) {
4177 0 : SPDK_ERRLOG("Failed to close clone of a corrupted blob\n");
4178 0 : spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
4179 0 : return;
4180 : }
4181 :
4182 : /* Snapshot and clone have the same copy of cluster map and extent pages
4183 : * at this point. Let's clear both for snapshot now,
4184 : * so that it won't be cleared for clone later when we remove snapshot.
4185 : * Also set thin provision to pass data corruption check */
4186 264 : for (i = 0; i < ctx->blob->active.num_clusters; i++) {
4187 240 : ctx->blob->active.clusters[i] = 0;
4188 : }
4189 36 : for (i = 0; i < ctx->blob->active.num_extent_pages; i++) {
4190 12 : ctx->blob->active.extent_pages[i] = 0;
4191 : }
4192 :
4193 24 : ctx->blob->active.num_allocated_clusters = 0;
4194 :
4195 24 : ctx->blob->md_ro = false;
4196 :
4197 24 : blob_set_thin_provision(ctx->blob);
4198 :
4199 24 : ctx->blobid = ctx->blob->id;
4200 :
4201 24 : spdk_blob_close(ctx->blob, bs_delete_corrupted_close_cb, ctx);
4202 : }
4203 :
4204 : static void
4205 12 : bs_update_corrupted_blob(void *cb_arg, int bserrno)
4206 : {
4207 12 : struct spdk_bs_load_ctx *ctx = cb_arg;
4208 :
4209 12 : if (bserrno != 0) {
4210 0 : SPDK_ERRLOG("Failed to close clone of a corrupted blob\n");
4211 0 : spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
4212 0 : return;
4213 : }
4214 :
4215 12 : ctx->blob->md_ro = false;
4216 12 : blob_remove_xattr(ctx->blob, SNAPSHOT_PENDING_REMOVAL, true);
4217 12 : blob_remove_xattr(ctx->blob, SNAPSHOT_IN_PROGRESS, true);
4218 12 : spdk_blob_set_read_only(ctx->blob);
4219 :
4220 12 : if (ctx->iter_cb_fn) {
4221 0 : ctx->iter_cb_fn(ctx->iter_cb_arg, ctx->blob, 0);
4222 : }
4223 12 : bs_blob_list_add(ctx->blob);
4224 :
4225 12 : spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
4226 : }
4227 :
4228 : static void
4229 36 : bs_examine_clone(void *cb_arg, struct spdk_blob *blob, int bserrno)
4230 : {
4231 36 : struct spdk_bs_load_ctx *ctx = cb_arg;
4232 :
4233 36 : if (bserrno != 0) {
4234 0 : SPDK_ERRLOG("Failed to open clone of a corrupted blob\n");
4235 0 : spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
4236 0 : return;
4237 : }
4238 :
4239 36 : if (blob->parent_id == ctx->blob->id) {
4240 : /* Power failure occurred before updating clone (snapshot delete case)
4241 : * or after updating clone (creating snapshot case) - keep snapshot */
4242 12 : spdk_blob_close(blob, bs_update_corrupted_blob, ctx);
4243 : } else {
4244 : /* Power failure occurred after updating clone (snapshot delete case)
4245 : * or before updating clone (creating snapshot case) - remove snapshot */
4246 24 : spdk_blob_close(blob, bs_delete_corrupted_blob, ctx);
4247 : }
4248 : }
4249 :
4250 : static void
4251 720 : bs_load_iter(void *arg, struct spdk_blob *blob, int bserrno)
4252 : {
4253 720 : struct spdk_bs_load_ctx *ctx = arg;
4254 720 : const void *value;
4255 720 : size_t len;
4256 720 : int rc = 0;
4257 :
4258 720 : if (bserrno == 0) {
4259 : /* Examine blob if it is corrupted after power failure. Fix
4260 : * the ones that can be fixed and remove any other corrupted
4261 : * ones. If it is not corrupted just process it */
4262 440 : rc = blob_get_xattr_value(blob, SNAPSHOT_PENDING_REMOVAL, &value, &len, true);
4263 440 : if (rc != 0) {
4264 420 : rc = blob_get_xattr_value(blob, SNAPSHOT_IN_PROGRESS, &value, &len, true);
4265 420 : if (rc != 0) {
4266 : /* Not corrupted - process it and continue with iterating through blobs */
4267 404 : if (ctx->iter_cb_fn) {
4268 34 : ctx->iter_cb_fn(ctx->iter_cb_arg, blob, 0);
4269 : }
4270 404 : bs_blob_list_add(blob);
4271 404 : spdk_bs_iter_next(ctx->bs, blob, bs_load_iter, ctx);
4272 404 : return;
4273 : }
4274 :
4275 : }
4276 :
4277 36 : assert(len == sizeof(spdk_blob_id));
4278 :
4279 36 : ctx->blob = blob;
4280 :
4281 : /* Open clone to check if we are able to fix this blob or should we remove it */
4282 36 : spdk_bs_open_blob(ctx->bs, *(spdk_blob_id *)value, bs_examine_clone, ctx);
4283 36 : return;
4284 280 : } else if (bserrno == -ENOENT) {
4285 280 : bserrno = 0;
4286 : } else {
4287 : /*
4288 : * This case needs to be looked at further. Same problem
4289 : * exists with applications that rely on explicit blob
4290 : * iteration. We should just skip the blob that failed
4291 : * to load and continue on to the next one.
4292 : */
4293 0 : SPDK_ERRLOG("Error in iterating blobs\n");
4294 : }
4295 :
4296 280 : ctx->iter_cb_fn = NULL;
4297 :
4298 280 : spdk_free(ctx->super);
4299 280 : spdk_free(ctx->mask);
4300 280 : bs_sequence_finish(ctx->seq, bserrno);
4301 280 : free(ctx);
4302 : }
4303 :
4304 : static void bs_dump_read_md_page(spdk_bs_sequence_t *seq, void *cb_arg);
4305 :
4306 : static void
4307 280 : bs_load_complete(struct spdk_bs_load_ctx *ctx)
4308 : {
4309 280 : ctx->bs->used_clusters = spdk_bit_pool_create_from_array(ctx->used_clusters);
4310 280 : if (ctx->dumping) {
4311 0 : bs_dump_read_md_page(ctx->seq, ctx);
4312 0 : return;
4313 : }
4314 280 : spdk_bs_iter_first(ctx->bs, bs_load_iter, ctx);
4315 : }
4316 :
4317 : static void
4318 174 : bs_load_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
4319 : {
4320 174 : struct spdk_bs_load_ctx *ctx = cb_arg;
4321 : int rc;
4322 :
4323 : /* The type must be correct */
4324 174 : assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_BLOBIDS);
4325 :
4326 : /* The length of the mask (in bits) must not be greater than
4327 : * the length of the buffer (converted to bits) */
4328 174 : assert(ctx->mask->length <= (ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE * 8));
4329 :
4330 : /* The length of the mask must be exactly equal to the size
4331 : * (in pages) of the metadata region */
4332 174 : assert(ctx->mask->length == ctx->super->md_len);
4333 :
4334 174 : rc = spdk_bit_array_resize(&ctx->bs->used_blobids, ctx->mask->length);
4335 174 : if (rc < 0) {
4336 0 : spdk_free(ctx->mask);
4337 0 : bs_load_ctx_fail(ctx, rc);
4338 0 : return;
4339 : }
4340 :
4341 174 : spdk_bit_array_load_mask(ctx->bs->used_blobids, ctx->mask->mask);
4342 174 : bs_load_complete(ctx);
4343 : }
4344 :
4345 : static void
4346 174 : bs_load_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
4347 : {
4348 174 : struct spdk_bs_load_ctx *ctx = cb_arg;
4349 : uint64_t lba, lba_count, mask_size;
4350 : int rc;
4351 :
4352 174 : if (bserrno != 0) {
4353 0 : bs_load_ctx_fail(ctx, bserrno);
4354 0 : return;
4355 : }
4356 :
4357 : /* The type must be correct */
4358 174 : assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_CLUSTERS);
4359 : /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
4360 174 : assert(ctx->mask->length <= (ctx->super->used_cluster_mask_len * sizeof(
4361 : struct spdk_blob_md_page) * 8));
4362 : /*
4363 : * The length of the mask must be equal to or larger than the total number of clusters. It may be
4364 : * larger than the total number of clusters due to a failure spdk_bs_grow.
4365 : */
4366 174 : assert(ctx->mask->length >= ctx->bs->total_clusters);
4367 174 : if (ctx->mask->length > ctx->bs->total_clusters) {
4368 4 : SPDK_WARNLOG("Shrink the used_custers mask length to total_clusters");
4369 4 : ctx->mask->length = ctx->bs->total_clusters;
4370 : }
4371 :
4372 174 : rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->mask->length);
4373 174 : if (rc < 0) {
4374 0 : spdk_free(ctx->mask);
4375 0 : bs_load_ctx_fail(ctx, rc);
4376 0 : return;
4377 : }
4378 :
4379 174 : spdk_bit_array_load_mask(ctx->used_clusters, ctx->mask->mask);
4380 174 : ctx->bs->num_free_clusters = spdk_bit_array_count_clear(ctx->used_clusters);
4381 174 : assert(ctx->bs->num_free_clusters <= ctx->bs->total_clusters);
4382 :
4383 174 : spdk_free(ctx->mask);
4384 :
4385 : /* Read the used blobids mask */
4386 174 : mask_size = ctx->super->used_blobid_mask_len * SPDK_BS_PAGE_SIZE;
4387 174 : ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
4388 : SPDK_MALLOC_DMA);
4389 174 : if (!ctx->mask) {
4390 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4391 0 : return;
4392 : }
4393 174 : lba = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_start);
4394 174 : lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_len);
4395 174 : bs_sequence_read_dev(seq, ctx->mask, lba, lba_count,
4396 : bs_load_used_blobids_cpl, ctx);
4397 : }
4398 :
4399 : static void
4400 174 : bs_load_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
4401 : {
4402 174 : struct spdk_bs_load_ctx *ctx = cb_arg;
4403 : uint64_t lba, lba_count, mask_size;
4404 : int rc;
4405 :
4406 174 : if (bserrno != 0) {
4407 0 : bs_load_ctx_fail(ctx, bserrno);
4408 0 : return;
4409 : }
4410 :
4411 : /* The type must be correct */
4412 174 : assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_PAGES);
4413 : /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
4414 174 : assert(ctx->mask->length <= (ctx->super->used_page_mask_len * SPDK_BS_PAGE_SIZE *
4415 : 8));
4416 : /* The length of the mask must be exactly equal to the size (in pages) of the metadata region */
4417 174 : if (ctx->mask->length != ctx->super->md_len) {
4418 0 : SPDK_ERRLOG("mismatched md_len in used_pages mask: "
4419 : "mask->length=%" PRIu32 " super->md_len=%" PRIu32 "\n",
4420 : ctx->mask->length, ctx->super->md_len);
4421 0 : assert(false);
4422 : }
4423 :
4424 174 : rc = spdk_bit_array_resize(&ctx->bs->used_md_pages, ctx->mask->length);
4425 174 : if (rc < 0) {
4426 0 : spdk_free(ctx->mask);
4427 0 : bs_load_ctx_fail(ctx, rc);
4428 0 : return;
4429 : }
4430 :
4431 174 : spdk_bit_array_load_mask(ctx->bs->used_md_pages, ctx->mask->mask);
4432 174 : spdk_free(ctx->mask);
4433 :
4434 : /* Read the used clusters mask */
4435 174 : mask_size = ctx->super->used_cluster_mask_len * SPDK_BS_PAGE_SIZE;
4436 174 : ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
4437 : SPDK_MALLOC_DMA);
4438 174 : if (!ctx->mask) {
4439 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4440 0 : return;
4441 : }
4442 174 : lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
4443 174 : lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
4444 174 : bs_sequence_read_dev(seq, ctx->mask, lba, lba_count,
4445 : bs_load_used_clusters_cpl, ctx);
4446 : }
4447 :
4448 : static void
4449 174 : bs_load_read_used_pages(struct spdk_bs_load_ctx *ctx)
4450 : {
4451 : uint64_t lba, lba_count, mask_size;
4452 :
4453 : /* Read the used pages mask */
4454 174 : mask_size = ctx->super->used_page_mask_len * SPDK_BS_PAGE_SIZE;
4455 174 : ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL,
4456 : SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
4457 174 : if (!ctx->mask) {
4458 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4459 0 : return;
4460 : }
4461 :
4462 174 : lba = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_start);
4463 174 : lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_len);
4464 174 : bs_sequence_read_dev(ctx->seq, ctx->mask, lba, lba_count,
4465 : bs_load_used_pages_cpl, ctx);
4466 : }
4467 :
4468 : static int
4469 246 : bs_load_replay_md_parse_page(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_page *page)
4470 : {
4471 246 : struct spdk_blob_store *bs = ctx->bs;
4472 : struct spdk_blob_md_descriptor *desc;
4473 246 : size_t cur_desc = 0;
4474 :
4475 246 : desc = (struct spdk_blob_md_descriptor *)page->descriptors;
4476 718 : while (cur_desc < sizeof(page->descriptors)) {
4477 718 : if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
4478 226 : if (desc->length == 0) {
4479 : /* If padding and length are 0, this terminates the page */
4480 226 : break;
4481 : }
4482 492 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE) {
4483 : struct spdk_blob_md_descriptor_extent_rle *desc_extent_rle;
4484 : unsigned int i, j;
4485 68 : unsigned int cluster_count = 0;
4486 : uint32_t cluster_idx;
4487 :
4488 68 : desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)desc;
4489 :
4490 136 : for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
4491 828 : for (j = 0; j < desc_extent_rle->extents[i].length; j++) {
4492 760 : cluster_idx = desc_extent_rle->extents[i].cluster_idx;
4493 : /*
4494 : * cluster_idx = 0 means an unallocated cluster - don't mark that
4495 : * in the used cluster map.
4496 : */
4497 760 : if (cluster_idx != 0) {
4498 540 : SPDK_NOTICELOG("Recover: cluster %" PRIu32 "\n", cluster_idx + j);
4499 540 : spdk_bit_array_set(ctx->used_clusters, cluster_idx + j);
4500 540 : if (bs->num_free_clusters == 0) {
4501 0 : return -ENOSPC;
4502 : }
4503 540 : bs->num_free_clusters--;
4504 : }
4505 760 : cluster_count++;
4506 : }
4507 : }
4508 68 : if (cluster_count == 0) {
4509 0 : return -EINVAL;
4510 : }
4511 424 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
4512 : struct spdk_blob_md_descriptor_extent_page *desc_extent;
4513 : uint32_t i;
4514 52 : uint32_t cluster_count = 0;
4515 : uint32_t cluster_idx;
4516 : size_t cluster_idx_length;
4517 :
4518 52 : desc_extent = (struct spdk_blob_md_descriptor_extent_page *)desc;
4519 52 : cluster_idx_length = desc_extent->length - sizeof(desc_extent->start_cluster_idx);
4520 :
4521 52 : if (desc_extent->length <= sizeof(desc_extent->start_cluster_idx) ||
4522 52 : (cluster_idx_length % sizeof(desc_extent->cluster_idx[0]) != 0)) {
4523 0 : return -EINVAL;
4524 : }
4525 :
4526 652 : for (i = 0; i < cluster_idx_length / sizeof(desc_extent->cluster_idx[0]); i++) {
4527 600 : cluster_idx = desc_extent->cluster_idx[i];
4528 : /*
4529 : * cluster_idx = 0 means an unallocated cluster - don't mark that
4530 : * in the used cluster map.
4531 : */
4532 600 : if (cluster_idx != 0) {
4533 600 : if (cluster_idx < desc_extent->start_cluster_idx &&
4534 0 : cluster_idx >= desc_extent->start_cluster_idx + cluster_count) {
4535 0 : return -EINVAL;
4536 : }
4537 600 : spdk_bit_array_set(ctx->used_clusters, cluster_idx);
4538 600 : if (bs->num_free_clusters == 0) {
4539 0 : return -ENOSPC;
4540 : }
4541 600 : bs->num_free_clusters--;
4542 : }
4543 600 : cluster_count++;
4544 : }
4545 :
4546 52 : if (cluster_count == 0) {
4547 0 : return -EINVAL;
4548 : }
4549 372 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
4550 : /* Skip this item */
4551 296 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
4552 : /* Skip this item */
4553 236 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
4554 : /* Skip this item */
4555 82 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
4556 : struct spdk_blob_md_descriptor_extent_table *desc_extent_table;
4557 82 : uint32_t num_extent_pages = ctx->num_extent_pages;
4558 : uint32_t i;
4559 : size_t extent_pages_length;
4560 : void *tmp;
4561 :
4562 82 : desc_extent_table = (struct spdk_blob_md_descriptor_extent_table *)desc;
4563 82 : extent_pages_length = desc_extent_table->length - sizeof(desc_extent_table->num_clusters);
4564 :
4565 82 : if (desc_extent_table->length == 0 ||
4566 82 : (extent_pages_length % sizeof(desc_extent_table->extent_page[0]) != 0)) {
4567 0 : return -EINVAL;
4568 : }
4569 :
4570 160 : for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
4571 78 : if (desc_extent_table->extent_page[i].page_idx != 0) {
4572 52 : if (desc_extent_table->extent_page[i].num_pages != 1) {
4573 0 : return -EINVAL;
4574 : }
4575 52 : num_extent_pages += 1;
4576 : }
4577 : }
4578 :
4579 82 : if (num_extent_pages > 0) {
4580 52 : tmp = realloc(ctx->extent_page_num, num_extent_pages * sizeof(uint32_t));
4581 52 : if (tmp == NULL) {
4582 0 : return -ENOMEM;
4583 : }
4584 52 : ctx->extent_page_num = tmp;
4585 :
4586 : /* Extent table entries contain md page numbers for extent pages.
4587 : * Zeroes represent unallocated extent pages, those are run-length-encoded.
4588 : */
4589 104 : for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
4590 52 : if (desc_extent_table->extent_page[i].page_idx != 0) {
4591 52 : ctx->extent_page_num[ctx->num_extent_pages] = desc_extent_table->extent_page[i].page_idx;
4592 52 : ctx->num_extent_pages += 1;
4593 : }
4594 : }
4595 : }
4596 : } else {
4597 : /* Error */
4598 0 : return -EINVAL;
4599 : }
4600 : /* Advance to the next descriptor */
4601 492 : cur_desc += sizeof(*desc) + desc->length;
4602 492 : if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
4603 20 : break;
4604 : }
4605 472 : desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
4606 : }
4607 246 : return 0;
4608 : }
4609 :
4610 : static bool
4611 1296 : bs_load_cur_extent_page_valid(struct spdk_blob_md_page *page)
4612 : {
4613 : uint32_t crc;
4614 1296 : struct spdk_blob_md_descriptor *desc = (struct spdk_blob_md_descriptor *)page->descriptors;
4615 : size_t desc_len;
4616 :
4617 1296 : crc = blob_md_page_calc_crc(page);
4618 1296 : if (crc != page->crc) {
4619 0 : return false;
4620 : }
4621 :
4622 : /* Extent page should always be of sequence num 0. */
4623 1296 : if (page->sequence_num != 0) {
4624 44 : return false;
4625 : }
4626 :
4627 : /* Descriptor type must be EXTENT_PAGE. */
4628 1252 : if (desc->type != SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
4629 154 : return false;
4630 : }
4631 :
4632 : /* Descriptor length cannot exceed the page. */
4633 1098 : desc_len = sizeof(*desc) + desc->length;
4634 1098 : if (desc_len > sizeof(page->descriptors)) {
4635 0 : return false;
4636 : }
4637 :
4638 : /* It has to be the only descriptor in the page. */
4639 1098 : if (desc_len + sizeof(*desc) <= sizeof(page->descriptors)) {
4640 1098 : desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + desc_len);
4641 1098 : if (desc->length != 0) {
4642 0 : return false;
4643 : }
4644 : }
4645 :
4646 1098 : return true;
4647 : }
4648 :
4649 : static bool
4650 6754 : bs_load_cur_md_page_valid(struct spdk_bs_load_ctx *ctx)
4651 : {
4652 : uint32_t crc;
4653 6754 : struct spdk_blob_md_page *page = ctx->page;
4654 :
4655 6754 : crc = blob_md_page_calc_crc(page);
4656 6754 : if (crc != page->crc) {
4657 6538 : return false;
4658 : }
4659 :
4660 : /* First page of a sequence should match the blobid. */
4661 216 : if (page->sequence_num == 0 &&
4662 172 : bs_page_to_blobid(ctx->cur_page) != page->id) {
4663 18 : return false;
4664 : }
4665 198 : assert(bs_load_cur_extent_page_valid(page) == false);
4666 :
4667 198 : return true;
4668 : }
4669 :
4670 : static void bs_load_replay_cur_md_page(struct spdk_bs_load_ctx *ctx);
4671 :
4672 : static void
4673 106 : bs_load_write_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
4674 : {
4675 106 : struct spdk_bs_load_ctx *ctx = cb_arg;
4676 :
4677 106 : if (bserrno != 0) {
4678 0 : bs_load_ctx_fail(ctx, bserrno);
4679 0 : return;
4680 : }
4681 :
4682 106 : bs_load_complete(ctx);
4683 : }
4684 :
4685 : static void
4686 106 : bs_load_write_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
4687 : {
4688 106 : struct spdk_bs_load_ctx *ctx = cb_arg;
4689 :
4690 106 : spdk_free(ctx->mask);
4691 106 : ctx->mask = NULL;
4692 :
4693 106 : if (bserrno != 0) {
4694 0 : bs_load_ctx_fail(ctx, bserrno);
4695 0 : return;
4696 : }
4697 :
4698 106 : bs_write_used_clusters(seq, ctx, bs_load_write_used_clusters_cpl);
4699 : }
4700 :
4701 : static void
4702 106 : bs_load_write_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
4703 : {
4704 106 : struct spdk_bs_load_ctx *ctx = cb_arg;
4705 :
4706 106 : spdk_free(ctx->mask);
4707 106 : ctx->mask = NULL;
4708 :
4709 106 : if (bserrno != 0) {
4710 0 : bs_load_ctx_fail(ctx, bserrno);
4711 0 : return;
4712 : }
4713 :
4714 106 : bs_write_used_blobids(seq, ctx, bs_load_write_used_blobids_cpl);
4715 : }
4716 :
4717 : static void
4718 106 : bs_load_write_used_md(struct spdk_bs_load_ctx *ctx)
4719 : {
4720 106 : bs_write_used_md(ctx->seq, ctx, bs_load_write_used_pages_cpl);
4721 106 : }
4722 :
4723 : static void
4724 6714 : bs_load_replay_md_chain_cpl(struct spdk_bs_load_ctx *ctx)
4725 : {
4726 : uint64_t num_md_clusters;
4727 : uint64_t i;
4728 :
4729 6714 : ctx->in_page_chain = false;
4730 :
4731 : do {
4732 6784 : ctx->page_index++;
4733 6784 : } while (spdk_bit_array_get(ctx->bs->used_md_pages, ctx->page_index) == true);
4734 :
4735 6714 : if (ctx->page_index < ctx->super->md_len) {
4736 6608 : ctx->cur_page = ctx->page_index;
4737 6608 : bs_load_replay_cur_md_page(ctx);
4738 : } else {
4739 : /* Claim all of the clusters used by the metadata */
4740 106 : num_md_clusters = spdk_divide_round_up(
4741 106 : ctx->super->md_start + ctx->super->md_len, ctx->bs->pages_per_cluster);
4742 480 : for (i = 0; i < num_md_clusters; i++) {
4743 374 : spdk_bit_array_set(ctx->used_clusters, i);
4744 : }
4745 106 : ctx->bs->num_free_clusters -= num_md_clusters;
4746 106 : spdk_free(ctx->page);
4747 106 : bs_load_write_used_md(ctx);
4748 : }
4749 6714 : }
4750 :
4751 : static void
4752 52 : bs_load_replay_extent_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
4753 : {
4754 52 : struct spdk_bs_load_ctx *ctx = cb_arg;
4755 : uint32_t page_num;
4756 : uint64_t i;
4757 :
4758 52 : if (bserrno != 0) {
4759 0 : spdk_free(ctx->extent_pages);
4760 0 : bs_load_ctx_fail(ctx, bserrno);
4761 0 : return;
4762 : }
4763 :
4764 104 : for (i = 0; i < ctx->num_extent_pages; i++) {
4765 : /* Extent pages are only read when present within in chain md.
4766 : * Integrity of md is not right if that page was not a valid extent page. */
4767 52 : if (bs_load_cur_extent_page_valid(&ctx->extent_pages[i]) != true) {
4768 0 : spdk_free(ctx->extent_pages);
4769 0 : bs_load_ctx_fail(ctx, -EILSEQ);
4770 0 : return;
4771 : }
4772 :
4773 52 : page_num = ctx->extent_page_num[i];
4774 52 : spdk_bit_array_set(ctx->bs->used_md_pages, page_num);
4775 52 : if (bs_load_replay_md_parse_page(ctx, &ctx->extent_pages[i])) {
4776 0 : spdk_free(ctx->extent_pages);
4777 0 : bs_load_ctx_fail(ctx, -EILSEQ);
4778 0 : return;
4779 : }
4780 : }
4781 :
4782 52 : spdk_free(ctx->extent_pages);
4783 52 : free(ctx->extent_page_num);
4784 52 : ctx->extent_page_num = NULL;
4785 52 : ctx->num_extent_pages = 0;
4786 :
4787 52 : bs_load_replay_md_chain_cpl(ctx);
4788 : }
4789 :
4790 : static void
4791 52 : bs_load_replay_extent_pages(struct spdk_bs_load_ctx *ctx)
4792 : {
4793 : spdk_bs_batch_t *batch;
4794 : uint32_t page;
4795 : uint64_t lba;
4796 : uint64_t i;
4797 :
4798 52 : ctx->extent_pages = spdk_zmalloc(SPDK_BS_PAGE_SIZE * ctx->num_extent_pages, 0,
4799 : NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
4800 52 : if (!ctx->extent_pages) {
4801 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4802 0 : return;
4803 : }
4804 :
4805 52 : batch = bs_sequence_to_batch(ctx->seq, bs_load_replay_extent_page_cpl, ctx);
4806 :
4807 104 : for (i = 0; i < ctx->num_extent_pages; i++) {
4808 52 : page = ctx->extent_page_num[i];
4809 52 : assert(page < ctx->super->md_len);
4810 52 : lba = bs_md_page_to_lba(ctx->bs, page);
4811 52 : bs_batch_read_dev(batch, &ctx->extent_pages[i], lba,
4812 52 : bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE));
4813 : }
4814 :
4815 52 : bs_batch_close(batch);
4816 : }
4817 :
4818 : static void
4819 6754 : bs_load_replay_md_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
4820 : {
4821 6754 : struct spdk_bs_load_ctx *ctx = cb_arg;
4822 : uint32_t page_num;
4823 : struct spdk_blob_md_page *page;
4824 :
4825 6754 : if (bserrno != 0) {
4826 0 : bs_load_ctx_fail(ctx, bserrno);
4827 0 : return;
4828 : }
4829 :
4830 6754 : page_num = ctx->cur_page;
4831 6754 : page = ctx->page;
4832 6754 : if (bs_load_cur_md_page_valid(ctx) == true) {
4833 198 : if (page->sequence_num == 0 || ctx->in_page_chain == true) {
4834 194 : spdk_spin_lock(&ctx->bs->used_lock);
4835 194 : bs_claim_md_page(ctx->bs, page_num);
4836 194 : spdk_spin_unlock(&ctx->bs->used_lock);
4837 194 : if (page->sequence_num == 0) {
4838 154 : SPDK_NOTICELOG("Recover: blob 0x%" PRIx32 "\n", page_num);
4839 154 : spdk_bit_array_set(ctx->bs->used_blobids, page_num);
4840 : }
4841 194 : if (bs_load_replay_md_parse_page(ctx, page)) {
4842 0 : bs_load_ctx_fail(ctx, -EILSEQ);
4843 0 : return;
4844 : }
4845 194 : if (page->next != SPDK_INVALID_MD_PAGE) {
4846 40 : ctx->in_page_chain = true;
4847 40 : ctx->cur_page = page->next;
4848 40 : bs_load_replay_cur_md_page(ctx);
4849 40 : return;
4850 : }
4851 154 : if (ctx->num_extent_pages != 0) {
4852 52 : bs_load_replay_extent_pages(ctx);
4853 52 : return;
4854 : }
4855 : }
4856 : }
4857 6662 : bs_load_replay_md_chain_cpl(ctx);
4858 : }
4859 :
4860 : static void
4861 6754 : bs_load_replay_cur_md_page(struct spdk_bs_load_ctx *ctx)
4862 : {
4863 : uint64_t lba;
4864 :
4865 6754 : assert(ctx->cur_page < ctx->super->md_len);
4866 6754 : lba = bs_md_page_to_lba(ctx->bs, ctx->cur_page);
4867 6754 : bs_sequence_read_dev(ctx->seq, ctx->page, lba,
4868 6754 : bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE),
4869 : bs_load_replay_md_cpl, ctx);
4870 6754 : }
4871 :
4872 : static void
4873 106 : bs_load_replay_md(struct spdk_bs_load_ctx *ctx)
4874 : {
4875 106 : ctx->page_index = 0;
4876 106 : ctx->cur_page = 0;
4877 106 : ctx->page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0,
4878 : NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
4879 106 : if (!ctx->page) {
4880 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4881 0 : return;
4882 : }
4883 106 : bs_load_replay_cur_md_page(ctx);
4884 : }
4885 :
4886 : static void
4887 106 : bs_recover(struct spdk_bs_load_ctx *ctx)
4888 : {
4889 : int rc;
4890 :
4891 106 : SPDK_NOTICELOG("Performing recovery on blobstore\n");
4892 106 : rc = spdk_bit_array_resize(&ctx->bs->used_md_pages, ctx->super->md_len);
4893 106 : if (rc < 0) {
4894 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4895 0 : return;
4896 : }
4897 :
4898 106 : rc = spdk_bit_array_resize(&ctx->bs->used_blobids, ctx->super->md_len);
4899 106 : if (rc < 0) {
4900 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4901 0 : return;
4902 : }
4903 :
4904 106 : rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->bs->total_clusters);
4905 106 : if (rc < 0) {
4906 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4907 0 : return;
4908 : }
4909 :
4910 106 : rc = spdk_bit_array_resize(&ctx->bs->open_blobids, ctx->super->md_len);
4911 106 : if (rc < 0) {
4912 0 : bs_load_ctx_fail(ctx, -ENOMEM);
4913 0 : return;
4914 : }
4915 :
4916 106 : ctx->bs->num_free_clusters = ctx->bs->total_clusters;
4917 106 : bs_load_replay_md(ctx);
4918 : }
4919 :
4920 : static int
4921 276 : bs_parse_super(struct spdk_bs_load_ctx *ctx)
4922 : {
4923 : int rc;
4924 :
4925 276 : if (ctx->super->size == 0) {
4926 8 : ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
4927 : }
4928 :
4929 276 : if (ctx->super->io_unit_size == 0) {
4930 8 : ctx->super->io_unit_size = SPDK_BS_PAGE_SIZE;
4931 : }
4932 :
4933 276 : ctx->bs->clean = 1;
4934 276 : ctx->bs->cluster_sz = ctx->super->cluster_size;
4935 276 : ctx->bs->total_clusters = ctx->super->size / ctx->super->cluster_size;
4936 276 : ctx->bs->pages_per_cluster = ctx->bs->cluster_sz / SPDK_BS_PAGE_SIZE;
4937 276 : if (spdk_u32_is_pow2(ctx->bs->pages_per_cluster)) {
4938 276 : ctx->bs->pages_per_cluster_shift = spdk_u32log2(ctx->bs->pages_per_cluster);
4939 : }
4940 276 : ctx->bs->io_unit_size = ctx->super->io_unit_size;
4941 276 : rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->bs->total_clusters);
4942 276 : if (rc < 0) {
4943 0 : return -ENOMEM;
4944 : }
4945 276 : ctx->bs->md_start = ctx->super->md_start;
4946 276 : ctx->bs->md_len = ctx->super->md_len;
4947 276 : rc = spdk_bit_array_resize(&ctx->bs->open_blobids, ctx->bs->md_len);
4948 276 : if (rc < 0) {
4949 0 : return -ENOMEM;
4950 : }
4951 :
4952 552 : ctx->bs->total_data_clusters = ctx->bs->total_clusters - spdk_divide_round_up(
4953 276 : ctx->bs->md_start + ctx->bs->md_len, ctx->bs->pages_per_cluster);
4954 276 : ctx->bs->super_blob = ctx->super->super_blob;
4955 276 : memcpy(&ctx->bs->bstype, &ctx->super->bstype, sizeof(ctx->super->bstype));
4956 :
4957 276 : return 0;
4958 : }
4959 :
4960 : static void
4961 300 : bs_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
4962 : {
4963 300 : struct spdk_bs_load_ctx *ctx = cb_arg;
4964 : int rc;
4965 :
4966 300 : rc = bs_super_validate(ctx->super, ctx->bs);
4967 300 : if (rc != 0) {
4968 24 : bs_load_ctx_fail(ctx, rc);
4969 24 : return;
4970 : }
4971 :
4972 276 : rc = bs_parse_super(ctx);
4973 276 : if (rc < 0) {
4974 0 : bs_load_ctx_fail(ctx, rc);
4975 0 : return;
4976 : }
4977 :
4978 276 : if (ctx->super->used_blobid_mask_len == 0 || ctx->super->clean == 0 || ctx->force_recover) {
4979 106 : bs_recover(ctx);
4980 : } else {
4981 170 : bs_load_read_used_pages(ctx);
4982 : }
4983 : }
4984 :
4985 : static inline int
4986 308 : bs_opts_copy(struct spdk_bs_opts *src, struct spdk_bs_opts *dst)
4987 : {
4988 :
4989 308 : if (!src->opts_size) {
4990 0 : SPDK_ERRLOG("opts_size should not be zero value\n");
4991 0 : return -1;
4992 : }
4993 :
4994 : #define FIELD_OK(field) \
4995 : offsetof(struct spdk_bs_opts, field) + sizeof(src->field) <= src->opts_size
4996 :
4997 : #define SET_FIELD(field) \
4998 : if (FIELD_OK(field)) { \
4999 : dst->field = src->field; \
5000 : } \
5001 :
5002 308 : SET_FIELD(cluster_sz);
5003 308 : SET_FIELD(num_md_pages);
5004 308 : SET_FIELD(max_md_ops);
5005 308 : SET_FIELD(max_channel_ops);
5006 308 : SET_FIELD(clear_method);
5007 :
5008 308 : if (FIELD_OK(bstype)) {
5009 308 : memcpy(&dst->bstype, &src->bstype, sizeof(dst->bstype));
5010 : }
5011 308 : SET_FIELD(iter_cb_fn);
5012 308 : SET_FIELD(iter_cb_arg);
5013 308 : SET_FIELD(force_recover);
5014 308 : SET_FIELD(esnap_bs_dev_create);
5015 308 : SET_FIELD(esnap_ctx);
5016 :
5017 308 : dst->opts_size = src->opts_size;
5018 :
5019 : /* You should not remove this statement, but need to update the assert statement
5020 : * if you add a new field, and also add a corresponding SET_FIELD statement */
5021 : SPDK_STATIC_ASSERT(sizeof(struct spdk_bs_opts) == 88, "Incorrect size");
5022 :
5023 : #undef FIELD_OK
5024 : #undef SET_FIELD
5025 :
5026 308 : return 0;
5027 : }
5028 :
5029 : void
5030 312 : spdk_bs_load(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
5031 : spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
5032 : {
5033 312 : struct spdk_blob_store *bs;
5034 312 : struct spdk_bs_cpl cpl;
5035 312 : struct spdk_bs_load_ctx *ctx;
5036 312 : struct spdk_bs_opts opts = {};
5037 : int err;
5038 :
5039 312 : SPDK_DEBUGLOG(blob, "Loading blobstore from dev %p\n", dev);
5040 :
5041 312 : if ((SPDK_BS_PAGE_SIZE % dev->blocklen) != 0) {
5042 4 : SPDK_DEBUGLOG(blob, "unsupported dev block length of %d\n", dev->blocklen);
5043 4 : dev->destroy(dev);
5044 4 : cb_fn(cb_arg, NULL, -EINVAL);
5045 4 : return;
5046 : }
5047 :
5048 308 : spdk_bs_opts_init(&opts, sizeof(opts));
5049 308 : if (o) {
5050 122 : if (bs_opts_copy(o, &opts)) {
5051 0 : return;
5052 : }
5053 : }
5054 :
5055 308 : if (opts.max_md_ops == 0 || opts.max_channel_ops == 0) {
5056 8 : dev->destroy(dev);
5057 8 : cb_fn(cb_arg, NULL, -EINVAL);
5058 8 : return;
5059 : }
5060 :
5061 300 : err = bs_alloc(dev, &opts, &bs, &ctx);
5062 300 : if (err) {
5063 0 : dev->destroy(dev);
5064 0 : cb_fn(cb_arg, NULL, err);
5065 0 : return;
5066 : }
5067 :
5068 300 : cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
5069 300 : cpl.u.bs_handle.cb_fn = cb_fn;
5070 300 : cpl.u.bs_handle.cb_arg = cb_arg;
5071 300 : cpl.u.bs_handle.bs = bs;
5072 :
5073 300 : ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
5074 300 : if (!ctx->seq) {
5075 0 : spdk_free(ctx->super);
5076 0 : free(ctx);
5077 0 : bs_free(bs);
5078 0 : cb_fn(cb_arg, NULL, -ENOMEM);
5079 0 : return;
5080 : }
5081 :
5082 : /* Read the super block */
5083 300 : bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
5084 300 : bs_byte_to_lba(bs, sizeof(*ctx->super)),
5085 : bs_load_super_cpl, ctx);
5086 : }
5087 :
5088 : /* END spdk_bs_load */
5089 :
5090 : /* START spdk_bs_dump */
5091 :
5092 : static void
5093 0 : bs_dump_finish(spdk_bs_sequence_t *seq, struct spdk_bs_load_ctx *ctx, int bserrno)
5094 : {
5095 0 : spdk_free(ctx->super);
5096 :
5097 : /*
5098 : * We need to defer calling bs_call_cpl() until after
5099 : * dev destruction, so tuck these away for later use.
5100 : */
5101 0 : ctx->bs->unload_err = bserrno;
5102 0 : memcpy(&ctx->bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
5103 0 : seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
5104 :
5105 0 : bs_sequence_finish(seq, 0);
5106 0 : bs_free(ctx->bs);
5107 0 : free(ctx);
5108 0 : }
5109 :
5110 : static void
5111 0 : bs_dump_print_xattr(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_descriptor *desc)
5112 : {
5113 : struct spdk_blob_md_descriptor_xattr *desc_xattr;
5114 : uint32_t i;
5115 : const char *type;
5116 :
5117 0 : desc_xattr = (struct spdk_blob_md_descriptor_xattr *)desc;
5118 :
5119 0 : if (desc_xattr->length !=
5120 : sizeof(desc_xattr->name_length) + sizeof(desc_xattr->value_length) +
5121 0 : desc_xattr->name_length + desc_xattr->value_length) {
5122 : }
5123 :
5124 0 : memcpy(ctx->xattr_name, desc_xattr->name, desc_xattr->name_length);
5125 0 : ctx->xattr_name[desc_xattr->name_length] = '\0';
5126 0 : if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
5127 0 : type = "XATTR";
5128 0 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
5129 0 : type = "XATTR_INTERNAL";
5130 : } else {
5131 0 : assert(false);
5132 : type = "XATTR_?";
5133 : }
5134 0 : fprintf(ctx->fp, "%s: name = \"%s\"\n", type, ctx->xattr_name);
5135 0 : fprintf(ctx->fp, " value = \"");
5136 0 : ctx->print_xattr_fn(ctx->fp, ctx->super->bstype.bstype, ctx->xattr_name,
5137 0 : (void *)((uintptr_t)desc_xattr->name + desc_xattr->name_length),
5138 0 : desc_xattr->value_length);
5139 0 : fprintf(ctx->fp, "\"\n");
5140 0 : for (i = 0; i < desc_xattr->value_length; i++) {
5141 0 : if (i % 16 == 0) {
5142 0 : fprintf(ctx->fp, " ");
5143 : }
5144 0 : fprintf(ctx->fp, "%02" PRIx8 " ", *((uint8_t *)desc_xattr->name + desc_xattr->name_length + i));
5145 0 : if ((i + 1) % 16 == 0) {
5146 0 : fprintf(ctx->fp, "\n");
5147 : }
5148 : }
5149 0 : if (i % 16 != 0) {
5150 0 : fprintf(ctx->fp, "\n");
5151 : }
5152 0 : }
5153 :
5154 : struct type_flag_desc {
5155 : uint64_t mask;
5156 : uint64_t val;
5157 : const char *name;
5158 : };
5159 :
5160 : static void
5161 0 : bs_dump_print_type_bits(struct spdk_bs_load_ctx *ctx, uint64_t flags,
5162 : struct type_flag_desc *desc, size_t numflags)
5163 : {
5164 0 : uint64_t covered = 0;
5165 : size_t i;
5166 :
5167 0 : for (i = 0; i < numflags; i++) {
5168 0 : if ((desc[i].mask & flags) != desc[i].val) {
5169 0 : continue;
5170 : }
5171 0 : fprintf(ctx->fp, "\t\t 0x%016" PRIx64 " %s", desc[i].val, desc[i].name);
5172 0 : if (desc[i].mask != desc[i].val) {
5173 0 : fprintf(ctx->fp, " (mask 0x%" PRIx64 " value 0x%" PRIx64 ")",
5174 0 : desc[i].mask, desc[i].val);
5175 : }
5176 0 : fprintf(ctx->fp, "\n");
5177 0 : covered |= desc[i].mask;
5178 : }
5179 0 : if ((flags & ~covered) != 0) {
5180 0 : fprintf(ctx->fp, "\t\t 0x%016" PRIx64 " Unknown\n", flags & ~covered);
5181 : }
5182 0 : }
5183 :
5184 : static void
5185 0 : bs_dump_print_type_flags(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_descriptor *desc)
5186 : {
5187 : struct spdk_blob_md_descriptor_flags *type_desc;
5188 : #define ADD_FLAG(f) { f, f, #f }
5189 : #define ADD_MASK_VAL(m, v) { m, v, #v }
5190 : static struct type_flag_desc invalid[] = {
5191 : ADD_FLAG(SPDK_BLOB_THIN_PROV),
5192 : ADD_FLAG(SPDK_BLOB_INTERNAL_XATTR),
5193 : ADD_FLAG(SPDK_BLOB_EXTENT_TABLE),
5194 : };
5195 : static struct type_flag_desc data_ro[] = {
5196 : ADD_FLAG(SPDK_BLOB_READ_ONLY),
5197 : };
5198 : static struct type_flag_desc md_ro[] = {
5199 : ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_DEFAULT),
5200 : ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_NONE),
5201 : ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_UNMAP),
5202 : ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_WRITE_ZEROES),
5203 : };
5204 : #undef ADD_FLAG
5205 : #undef ADD_MASK_VAL
5206 :
5207 0 : type_desc = (struct spdk_blob_md_descriptor_flags *)desc;
5208 0 : fprintf(ctx->fp, "Flags:\n");
5209 0 : fprintf(ctx->fp, "\tinvalid: 0x%016" PRIx64 "\n", type_desc->invalid_flags);
5210 0 : bs_dump_print_type_bits(ctx, type_desc->invalid_flags, invalid,
5211 : SPDK_COUNTOF(invalid));
5212 0 : fprintf(ctx->fp, "\tdata_ro: 0x%016" PRIx64 "\n", type_desc->data_ro_flags);
5213 0 : bs_dump_print_type_bits(ctx, type_desc->data_ro_flags, data_ro,
5214 : SPDK_COUNTOF(data_ro));
5215 0 : fprintf(ctx->fp, "\t md_ro: 0x%016" PRIx64 "\n", type_desc->md_ro_flags);
5216 0 : bs_dump_print_type_bits(ctx, type_desc->md_ro_flags, md_ro,
5217 : SPDK_COUNTOF(md_ro));
5218 0 : }
5219 :
5220 : static void
5221 0 : bs_dump_print_extent_table(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_descriptor *desc)
5222 : {
5223 : struct spdk_blob_md_descriptor_extent_table *et_desc;
5224 : uint64_t num_extent_pages;
5225 : uint32_t et_idx;
5226 :
5227 0 : et_desc = (struct spdk_blob_md_descriptor_extent_table *)desc;
5228 0 : num_extent_pages = (et_desc->length - sizeof(et_desc->num_clusters)) /
5229 : sizeof(et_desc->extent_page[0]);
5230 :
5231 0 : fprintf(ctx->fp, "Extent table:\n");
5232 0 : for (et_idx = 0; et_idx < num_extent_pages; et_idx++) {
5233 0 : if (et_desc->extent_page[et_idx].page_idx == 0) {
5234 : /* Zeroes represent unallocated extent pages. */
5235 0 : continue;
5236 : }
5237 0 : fprintf(ctx->fp, "\tExtent page: %5" PRIu32 " length %3" PRIu32
5238 : " at LBA %" PRIu64 "\n", et_desc->extent_page[et_idx].page_idx,
5239 : et_desc->extent_page[et_idx].num_pages,
5240 : bs_md_page_to_lba(ctx->bs, et_desc->extent_page[et_idx].page_idx));
5241 : }
5242 0 : }
5243 :
5244 : static void
5245 0 : bs_dump_print_md_page(struct spdk_bs_load_ctx *ctx)
5246 : {
5247 0 : uint32_t page_idx = ctx->cur_page;
5248 0 : struct spdk_blob_md_page *page = ctx->page;
5249 : struct spdk_blob_md_descriptor *desc;
5250 0 : size_t cur_desc = 0;
5251 : uint32_t crc;
5252 :
5253 0 : fprintf(ctx->fp, "=========\n");
5254 0 : fprintf(ctx->fp, "Metadata Page Index: %" PRIu32 " (0x%" PRIx32 ")\n", page_idx, page_idx);
5255 0 : fprintf(ctx->fp, "Start LBA: %" PRIu64 "\n", bs_md_page_to_lba(ctx->bs, page_idx));
5256 0 : fprintf(ctx->fp, "Blob ID: 0x%" PRIx64 "\n", page->id);
5257 0 : fprintf(ctx->fp, "Sequence: %" PRIu32 "\n", page->sequence_num);
5258 0 : if (page->next == SPDK_INVALID_MD_PAGE) {
5259 0 : fprintf(ctx->fp, "Next: None\n");
5260 : } else {
5261 0 : fprintf(ctx->fp, "Next: %" PRIu32 "\n", page->next);
5262 : }
5263 0 : fprintf(ctx->fp, "In used bit array%s:", ctx->super->clean ? "" : " (not clean: dubious)");
5264 0 : if (spdk_bit_array_get(ctx->bs->used_md_pages, page_idx)) {
5265 0 : fprintf(ctx->fp, " md");
5266 : }
5267 0 : if (spdk_bit_array_get(ctx->bs->used_blobids, page_idx)) {
5268 0 : fprintf(ctx->fp, " blob");
5269 : }
5270 0 : fprintf(ctx->fp, "\n");
5271 :
5272 0 : crc = blob_md_page_calc_crc(page);
5273 0 : fprintf(ctx->fp, "CRC: 0x%" PRIx32 " (%s)\n", page->crc, crc == page->crc ? "OK" : "Mismatch");
5274 :
5275 0 : desc = (struct spdk_blob_md_descriptor *)page->descriptors;
5276 0 : while (cur_desc < sizeof(page->descriptors)) {
5277 0 : if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
5278 0 : if (desc->length == 0) {
5279 : /* If padding and length are 0, this terminates the page */
5280 0 : break;
5281 : }
5282 0 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE) {
5283 : struct spdk_blob_md_descriptor_extent_rle *desc_extent_rle;
5284 : unsigned int i;
5285 :
5286 0 : desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)desc;
5287 :
5288 0 : for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
5289 0 : if (desc_extent_rle->extents[i].cluster_idx != 0) {
5290 0 : fprintf(ctx->fp, "Allocated Extent - Start: %" PRIu32,
5291 : desc_extent_rle->extents[i].cluster_idx);
5292 : } else {
5293 0 : fprintf(ctx->fp, "Unallocated Extent - ");
5294 : }
5295 0 : fprintf(ctx->fp, " Length: %" PRIu32, desc_extent_rle->extents[i].length);
5296 0 : fprintf(ctx->fp, "\n");
5297 : }
5298 0 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
5299 : struct spdk_blob_md_descriptor_extent_page *desc_extent;
5300 : unsigned int i;
5301 :
5302 0 : desc_extent = (struct spdk_blob_md_descriptor_extent_page *)desc;
5303 :
5304 0 : for (i = 0; i < desc_extent->length / sizeof(desc_extent->cluster_idx[0]); i++) {
5305 0 : if (desc_extent->cluster_idx[i] != 0) {
5306 0 : fprintf(ctx->fp, "Allocated Extent - Start: %" PRIu32,
5307 : desc_extent->cluster_idx[i]);
5308 : } else {
5309 0 : fprintf(ctx->fp, "Unallocated Extent");
5310 : }
5311 0 : fprintf(ctx->fp, "\n");
5312 : }
5313 0 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
5314 0 : bs_dump_print_xattr(ctx, desc);
5315 0 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
5316 0 : bs_dump_print_xattr(ctx, desc);
5317 0 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
5318 0 : bs_dump_print_type_flags(ctx, desc);
5319 0 : } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
5320 0 : bs_dump_print_extent_table(ctx, desc);
5321 : } else {
5322 : /* Error */
5323 0 : fprintf(ctx->fp, "Unknown descriptor type %" PRIu8 "\n", desc->type);
5324 : }
5325 : /* Advance to the next descriptor */
5326 0 : cur_desc += sizeof(*desc) + desc->length;
5327 0 : if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
5328 0 : break;
5329 : }
5330 0 : desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
5331 : }
5332 0 : }
5333 :
5334 : static void
5335 0 : bs_dump_read_md_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5336 : {
5337 0 : struct spdk_bs_load_ctx *ctx = cb_arg;
5338 :
5339 0 : if (bserrno != 0) {
5340 0 : bs_dump_finish(seq, ctx, bserrno);
5341 0 : return;
5342 : }
5343 :
5344 0 : if (ctx->page->id != 0) {
5345 0 : bs_dump_print_md_page(ctx);
5346 : }
5347 :
5348 0 : ctx->cur_page++;
5349 :
5350 0 : if (ctx->cur_page < ctx->super->md_len) {
5351 0 : bs_dump_read_md_page(seq, ctx);
5352 : } else {
5353 0 : spdk_free(ctx->page);
5354 0 : bs_dump_finish(seq, ctx, 0);
5355 : }
5356 : }
5357 :
5358 : static void
5359 0 : bs_dump_read_md_page(spdk_bs_sequence_t *seq, void *cb_arg)
5360 : {
5361 0 : struct spdk_bs_load_ctx *ctx = cb_arg;
5362 : uint64_t lba;
5363 :
5364 0 : assert(ctx->cur_page < ctx->super->md_len);
5365 0 : lba = bs_page_to_lba(ctx->bs, ctx->super->md_start + ctx->cur_page);
5366 0 : bs_sequence_read_dev(seq, ctx->page, lba,
5367 0 : bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE),
5368 : bs_dump_read_md_page_cpl, ctx);
5369 0 : }
5370 :
5371 : static void
5372 0 : bs_dump_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5373 : {
5374 0 : struct spdk_bs_load_ctx *ctx = cb_arg;
5375 : int rc;
5376 :
5377 0 : fprintf(ctx->fp, "Signature: \"%.8s\" ", ctx->super->signature);
5378 0 : if (memcmp(ctx->super->signature, SPDK_BS_SUPER_BLOCK_SIG,
5379 : sizeof(ctx->super->signature)) != 0) {
5380 0 : fprintf(ctx->fp, "(Mismatch)\n");
5381 0 : bs_dump_finish(seq, ctx, bserrno);
5382 0 : return;
5383 : } else {
5384 0 : fprintf(ctx->fp, "(OK)\n");
5385 : }
5386 0 : fprintf(ctx->fp, "Version: %" PRIu32 "\n", ctx->super->version);
5387 0 : fprintf(ctx->fp, "CRC: 0x%x (%s)\n", ctx->super->crc,
5388 0 : (ctx->super->crc == blob_md_page_calc_crc(ctx->super)) ? "OK" : "Mismatch");
5389 0 : fprintf(ctx->fp, "Blobstore Type: %.*s\n", SPDK_BLOBSTORE_TYPE_LENGTH, ctx->super->bstype.bstype);
5390 0 : fprintf(ctx->fp, "Cluster Size: %" PRIu32 "\n", ctx->super->cluster_size);
5391 0 : fprintf(ctx->fp, "Super Blob ID: ");
5392 0 : if (ctx->super->super_blob == SPDK_BLOBID_INVALID) {
5393 0 : fprintf(ctx->fp, "(None)\n");
5394 : } else {
5395 0 : fprintf(ctx->fp, "0x%" PRIx64 "\n", ctx->super->super_blob);
5396 : }
5397 0 : fprintf(ctx->fp, "Clean: %" PRIu32 "\n", ctx->super->clean);
5398 0 : fprintf(ctx->fp, "Used Metadata Page Mask Start: %" PRIu32 "\n", ctx->super->used_page_mask_start);
5399 0 : fprintf(ctx->fp, "Used Metadata Page Mask Length: %" PRIu32 "\n", ctx->super->used_page_mask_len);
5400 0 : fprintf(ctx->fp, "Used Cluster Mask Start: %" PRIu32 "\n", ctx->super->used_cluster_mask_start);
5401 0 : fprintf(ctx->fp, "Used Cluster Mask Length: %" PRIu32 "\n", ctx->super->used_cluster_mask_len);
5402 0 : fprintf(ctx->fp, "Used Blob ID Mask Start: %" PRIu32 "\n", ctx->super->used_blobid_mask_start);
5403 0 : fprintf(ctx->fp, "Used Blob ID Mask Length: %" PRIu32 "\n", ctx->super->used_blobid_mask_len);
5404 0 : fprintf(ctx->fp, "Metadata Start: %" PRIu32 "\n", ctx->super->md_start);
5405 0 : fprintf(ctx->fp, "Metadata Length: %" PRIu32 "\n", ctx->super->md_len);
5406 :
5407 0 : ctx->cur_page = 0;
5408 0 : ctx->page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0,
5409 : NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
5410 0 : if (!ctx->page) {
5411 0 : bs_dump_finish(seq, ctx, -ENOMEM);
5412 0 : return;
5413 : }
5414 :
5415 0 : rc = bs_parse_super(ctx);
5416 0 : if (rc < 0) {
5417 0 : bs_load_ctx_fail(ctx, rc);
5418 0 : return;
5419 : }
5420 :
5421 0 : bs_load_read_used_pages(ctx);
5422 : }
5423 :
5424 : void
5425 0 : spdk_bs_dump(struct spdk_bs_dev *dev, FILE *fp, spdk_bs_dump_print_xattr print_xattr_fn,
5426 : spdk_bs_op_complete cb_fn, void *cb_arg)
5427 : {
5428 0 : struct spdk_blob_store *bs;
5429 0 : struct spdk_bs_cpl cpl;
5430 0 : struct spdk_bs_load_ctx *ctx;
5431 0 : struct spdk_bs_opts opts = {};
5432 : int err;
5433 :
5434 0 : SPDK_DEBUGLOG(blob, "Dumping blobstore from dev %p\n", dev);
5435 :
5436 0 : spdk_bs_opts_init(&opts, sizeof(opts));
5437 :
5438 0 : err = bs_alloc(dev, &opts, &bs, &ctx);
5439 0 : if (err) {
5440 0 : dev->destroy(dev);
5441 0 : cb_fn(cb_arg, err);
5442 0 : return;
5443 : }
5444 :
5445 0 : ctx->dumping = true;
5446 0 : ctx->fp = fp;
5447 0 : ctx->print_xattr_fn = print_xattr_fn;
5448 :
5449 0 : cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
5450 0 : cpl.u.bs_basic.cb_fn = cb_fn;
5451 0 : cpl.u.bs_basic.cb_arg = cb_arg;
5452 :
5453 0 : ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
5454 0 : if (!ctx->seq) {
5455 0 : spdk_free(ctx->super);
5456 0 : free(ctx);
5457 0 : bs_free(bs);
5458 0 : cb_fn(cb_arg, -ENOMEM);
5459 0 : return;
5460 : }
5461 :
5462 : /* Read the super block */
5463 0 : bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
5464 0 : bs_byte_to_lba(bs, sizeof(*ctx->super)),
5465 : bs_dump_super_cpl, ctx);
5466 : }
5467 :
5468 : /* END spdk_bs_dump */
5469 :
5470 : /* START spdk_bs_init */
5471 :
5472 : static void
5473 472 : bs_init_persist_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5474 : {
5475 472 : struct spdk_bs_load_ctx *ctx = cb_arg;
5476 :
5477 472 : ctx->bs->used_clusters = spdk_bit_pool_create_from_array(ctx->used_clusters);
5478 472 : spdk_free(ctx->super);
5479 472 : free(ctx);
5480 :
5481 472 : bs_sequence_finish(seq, bserrno);
5482 472 : }
5483 :
5484 : static void
5485 472 : bs_init_trim_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5486 : {
5487 472 : struct spdk_bs_load_ctx *ctx = cb_arg;
5488 :
5489 : /* Write super block */
5490 472 : bs_sequence_write_dev(seq, ctx->super, bs_page_to_lba(ctx->bs, 0),
5491 472 : bs_byte_to_lba(ctx->bs, sizeof(*ctx->super)),
5492 : bs_init_persist_super_cpl, ctx);
5493 472 : }
5494 :
5495 : void
5496 488 : spdk_bs_init(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
5497 : spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
5498 : {
5499 488 : struct spdk_bs_load_ctx *ctx;
5500 488 : struct spdk_blob_store *bs;
5501 488 : struct spdk_bs_cpl cpl;
5502 : spdk_bs_sequence_t *seq;
5503 : spdk_bs_batch_t *batch;
5504 : uint64_t num_md_lba;
5505 : uint64_t num_md_pages;
5506 : uint64_t num_md_clusters;
5507 : uint64_t max_used_cluster_mask_len;
5508 : uint32_t i;
5509 488 : struct spdk_bs_opts opts = {};
5510 : int rc;
5511 : uint64_t lba, lba_count;
5512 :
5513 488 : SPDK_DEBUGLOG(blob, "Initializing blobstore on dev %p\n", dev);
5514 :
5515 488 : if ((SPDK_BS_PAGE_SIZE % dev->blocklen) != 0) {
5516 4 : SPDK_ERRLOG("unsupported dev block length of %d\n",
5517 : dev->blocklen);
5518 4 : dev->destroy(dev);
5519 4 : cb_fn(cb_arg, NULL, -EINVAL);
5520 4 : return;
5521 : }
5522 :
5523 484 : spdk_bs_opts_init(&opts, sizeof(opts));
5524 484 : if (o) {
5525 182 : if (bs_opts_copy(o, &opts)) {
5526 0 : return;
5527 : }
5528 : }
5529 :
5530 484 : if (bs_opts_verify(&opts) != 0) {
5531 4 : dev->destroy(dev);
5532 4 : cb_fn(cb_arg, NULL, -EINVAL);
5533 4 : return;
5534 : }
5535 :
5536 480 : rc = bs_alloc(dev, &opts, &bs, &ctx);
5537 480 : if (rc) {
5538 4 : dev->destroy(dev);
5539 4 : cb_fn(cb_arg, NULL, rc);
5540 4 : return;
5541 : }
5542 :
5543 476 : if (opts.num_md_pages == SPDK_BLOB_OPTS_NUM_MD_PAGES) {
5544 : /* By default, allocate 1 page per cluster.
5545 : * Technically, this over-allocates metadata
5546 : * because more metadata will reduce the number
5547 : * of usable clusters. This can be addressed with
5548 : * more complex math in the future.
5549 : */
5550 468 : bs->md_len = bs->total_clusters;
5551 : } else {
5552 8 : bs->md_len = opts.num_md_pages;
5553 : }
5554 476 : rc = spdk_bit_array_resize(&bs->used_md_pages, bs->md_len);
5555 476 : if (rc < 0) {
5556 0 : spdk_free(ctx->super);
5557 0 : free(ctx);
5558 0 : bs_free(bs);
5559 0 : cb_fn(cb_arg, NULL, -ENOMEM);
5560 0 : return;
5561 : }
5562 :
5563 476 : rc = spdk_bit_array_resize(&bs->used_blobids, bs->md_len);
5564 476 : if (rc < 0) {
5565 0 : spdk_free(ctx->super);
5566 0 : free(ctx);
5567 0 : bs_free(bs);
5568 0 : cb_fn(cb_arg, NULL, -ENOMEM);
5569 0 : return;
5570 : }
5571 :
5572 476 : rc = spdk_bit_array_resize(&bs->open_blobids, bs->md_len);
5573 476 : if (rc < 0) {
5574 0 : spdk_free(ctx->super);
5575 0 : free(ctx);
5576 0 : bs_free(bs);
5577 0 : cb_fn(cb_arg, NULL, -ENOMEM);
5578 0 : return;
5579 : }
5580 :
5581 476 : memcpy(ctx->super->signature, SPDK_BS_SUPER_BLOCK_SIG,
5582 : sizeof(ctx->super->signature));
5583 476 : ctx->super->version = SPDK_BS_VERSION;
5584 476 : ctx->super->length = sizeof(*ctx->super);
5585 476 : ctx->super->super_blob = bs->super_blob;
5586 476 : ctx->super->clean = 0;
5587 476 : ctx->super->cluster_size = bs->cluster_sz;
5588 476 : ctx->super->io_unit_size = bs->io_unit_size;
5589 476 : memcpy(&ctx->super->bstype, &bs->bstype, sizeof(bs->bstype));
5590 :
5591 : /* Calculate how many pages the metadata consumes at the front
5592 : * of the disk.
5593 : */
5594 :
5595 : /* The super block uses 1 page */
5596 476 : num_md_pages = 1;
5597 :
5598 : /* The used_md_pages mask requires 1 bit per metadata page, rounded
5599 : * up to the nearest page, plus a header.
5600 : */
5601 476 : ctx->super->used_page_mask_start = num_md_pages;
5602 476 : ctx->super->used_page_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
5603 476 : spdk_divide_round_up(bs->md_len, 8),
5604 : SPDK_BS_PAGE_SIZE);
5605 476 : num_md_pages += ctx->super->used_page_mask_len;
5606 :
5607 : /* The used_clusters mask requires 1 bit per cluster, rounded
5608 : * up to the nearest page, plus a header.
5609 : */
5610 476 : ctx->super->used_cluster_mask_start = num_md_pages;
5611 476 : ctx->super->used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
5612 476 : spdk_divide_round_up(bs->total_clusters, 8),
5613 : SPDK_BS_PAGE_SIZE);
5614 : /* The blobstore might be extended, then the used_cluster bitmap will need more space.
5615 : * Here we calculate the max clusters we can support according to the
5616 : * num_md_pages (bs->md_len).
5617 : */
5618 476 : max_used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
5619 476 : spdk_divide_round_up(bs->md_len, 8),
5620 : SPDK_BS_PAGE_SIZE);
5621 476 : max_used_cluster_mask_len = spdk_max(max_used_cluster_mask_len,
5622 : ctx->super->used_cluster_mask_len);
5623 476 : num_md_pages += max_used_cluster_mask_len;
5624 :
5625 : /* The used_blobids mask requires 1 bit per metadata page, rounded
5626 : * up to the nearest page, plus a header.
5627 : */
5628 476 : ctx->super->used_blobid_mask_start = num_md_pages;
5629 476 : ctx->super->used_blobid_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
5630 476 : spdk_divide_round_up(bs->md_len, 8),
5631 : SPDK_BS_PAGE_SIZE);
5632 476 : num_md_pages += ctx->super->used_blobid_mask_len;
5633 :
5634 : /* The metadata region size was chosen above */
5635 476 : ctx->super->md_start = bs->md_start = num_md_pages;
5636 476 : ctx->super->md_len = bs->md_len;
5637 476 : num_md_pages += bs->md_len;
5638 :
5639 476 : num_md_lba = bs_page_to_lba(bs, num_md_pages);
5640 :
5641 476 : ctx->super->size = dev->blockcnt * dev->blocklen;
5642 :
5643 476 : ctx->super->crc = blob_md_page_calc_crc(ctx->super);
5644 :
5645 476 : num_md_clusters = spdk_divide_round_up(num_md_pages, bs->pages_per_cluster);
5646 476 : if (num_md_clusters > bs->total_clusters) {
5647 4 : SPDK_ERRLOG("Blobstore metadata cannot use more clusters than is available, "
5648 : "please decrease number of pages reserved for metadata "
5649 : "or increase cluster size.\n");
5650 4 : spdk_free(ctx->super);
5651 4 : spdk_bit_array_free(&ctx->used_clusters);
5652 4 : free(ctx);
5653 4 : bs_free(bs);
5654 4 : cb_fn(cb_arg, NULL, -ENOMEM);
5655 4 : return;
5656 : }
5657 : /* Claim all of the clusters used by the metadata */
5658 75700 : for (i = 0; i < num_md_clusters; i++) {
5659 75228 : spdk_bit_array_set(ctx->used_clusters, i);
5660 : }
5661 :
5662 472 : bs->num_free_clusters -= num_md_clusters;
5663 472 : bs->total_data_clusters = bs->num_free_clusters;
5664 :
5665 472 : cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
5666 472 : cpl.u.bs_handle.cb_fn = cb_fn;
5667 472 : cpl.u.bs_handle.cb_arg = cb_arg;
5668 472 : cpl.u.bs_handle.bs = bs;
5669 :
5670 472 : seq = bs_sequence_start_bs(bs->md_channel, &cpl);
5671 472 : if (!seq) {
5672 0 : spdk_free(ctx->super);
5673 0 : free(ctx);
5674 0 : bs_free(bs);
5675 0 : cb_fn(cb_arg, NULL, -ENOMEM);
5676 0 : return;
5677 : }
5678 :
5679 472 : batch = bs_sequence_to_batch(seq, bs_init_trim_cpl, ctx);
5680 :
5681 : /* Clear metadata space */
5682 472 : bs_batch_write_zeroes_dev(batch, 0, num_md_lba);
5683 :
5684 472 : lba = num_md_lba;
5685 472 : lba_count = ctx->bs->dev->blockcnt - lba;
5686 472 : switch (opts.clear_method) {
5687 456 : case BS_CLEAR_WITH_UNMAP:
5688 : /* Trim data clusters */
5689 456 : bs_batch_unmap_dev(batch, lba, lba_count);
5690 456 : break;
5691 0 : case BS_CLEAR_WITH_WRITE_ZEROES:
5692 : /* Write_zeroes to data clusters */
5693 0 : bs_batch_write_zeroes_dev(batch, lba, lba_count);
5694 0 : break;
5695 16 : case BS_CLEAR_WITH_NONE:
5696 : default:
5697 16 : break;
5698 : }
5699 :
5700 472 : bs_batch_close(batch);
5701 : }
5702 :
5703 : /* END spdk_bs_init */
5704 :
5705 : /* START spdk_bs_destroy */
5706 :
5707 : static void
5708 4 : bs_destroy_trim_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5709 : {
5710 4 : struct spdk_bs_load_ctx *ctx = cb_arg;
5711 4 : struct spdk_blob_store *bs = ctx->bs;
5712 :
5713 : /*
5714 : * We need to defer calling bs_call_cpl() until after
5715 : * dev destruction, so tuck these away for later use.
5716 : */
5717 4 : bs->unload_err = bserrno;
5718 4 : memcpy(&bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
5719 4 : seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
5720 :
5721 4 : bs_sequence_finish(seq, bserrno);
5722 :
5723 4 : bs_free(bs);
5724 4 : free(ctx);
5725 4 : }
5726 :
5727 : void
5728 4 : spdk_bs_destroy(struct spdk_blob_store *bs, spdk_bs_op_complete cb_fn,
5729 : void *cb_arg)
5730 : {
5731 4 : struct spdk_bs_cpl cpl;
5732 : spdk_bs_sequence_t *seq;
5733 : struct spdk_bs_load_ctx *ctx;
5734 :
5735 4 : SPDK_DEBUGLOG(blob, "Destroying blobstore\n");
5736 :
5737 4 : if (!RB_EMPTY(&bs->open_blobs)) {
5738 0 : SPDK_ERRLOG("Blobstore still has open blobs\n");
5739 0 : cb_fn(cb_arg, -EBUSY);
5740 0 : return;
5741 : }
5742 :
5743 4 : cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
5744 4 : cpl.u.bs_basic.cb_fn = cb_fn;
5745 4 : cpl.u.bs_basic.cb_arg = cb_arg;
5746 :
5747 4 : ctx = calloc(1, sizeof(*ctx));
5748 4 : if (!ctx) {
5749 0 : cb_fn(cb_arg, -ENOMEM);
5750 0 : return;
5751 : }
5752 :
5753 4 : ctx->bs = bs;
5754 :
5755 4 : seq = bs_sequence_start_bs(bs->md_channel, &cpl);
5756 4 : if (!seq) {
5757 0 : free(ctx);
5758 0 : cb_fn(cb_arg, -ENOMEM);
5759 0 : return;
5760 : }
5761 :
5762 : /* Write zeroes to the super block */
5763 4 : bs_sequence_write_zeroes_dev(seq,
5764 : bs_page_to_lba(bs, 0),
5765 : bs_byte_to_lba(bs, sizeof(struct spdk_bs_super_block)),
5766 : bs_destroy_trim_cpl, ctx);
5767 : }
5768 :
5769 : /* END spdk_bs_destroy */
5770 :
5771 : /* START spdk_bs_unload */
5772 :
5773 : static void
5774 654 : bs_unload_finish(struct spdk_bs_load_ctx *ctx, int bserrno)
5775 : {
5776 654 : spdk_bs_sequence_t *seq = ctx->seq;
5777 :
5778 654 : spdk_free(ctx->super);
5779 :
5780 : /*
5781 : * We need to defer calling bs_call_cpl() until after
5782 : * dev destruction, so tuck these away for later use.
5783 : */
5784 654 : ctx->bs->unload_err = bserrno;
5785 654 : memcpy(&ctx->bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
5786 654 : seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
5787 :
5788 654 : bs_sequence_finish(seq, bserrno);
5789 :
5790 654 : bs_free(ctx->bs);
5791 654 : free(ctx);
5792 654 : }
5793 :
5794 : static void
5795 654 : bs_unload_write_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5796 : {
5797 654 : struct spdk_bs_load_ctx *ctx = cb_arg;
5798 :
5799 654 : bs_unload_finish(ctx, bserrno);
5800 654 : }
5801 :
5802 : static void
5803 654 : bs_unload_write_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5804 : {
5805 654 : struct spdk_bs_load_ctx *ctx = cb_arg;
5806 :
5807 654 : spdk_free(ctx->mask);
5808 :
5809 654 : if (bserrno != 0) {
5810 0 : bs_unload_finish(ctx, bserrno);
5811 0 : return;
5812 : }
5813 :
5814 654 : ctx->super->clean = 1;
5815 :
5816 654 : bs_write_super(seq, ctx->bs, ctx->super, bs_unload_write_super_cpl, ctx);
5817 : }
5818 :
5819 : static void
5820 654 : bs_unload_write_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5821 : {
5822 654 : struct spdk_bs_load_ctx *ctx = cb_arg;
5823 :
5824 654 : spdk_free(ctx->mask);
5825 654 : ctx->mask = NULL;
5826 :
5827 654 : if (bserrno != 0) {
5828 0 : bs_unload_finish(ctx, bserrno);
5829 0 : return;
5830 : }
5831 :
5832 654 : bs_write_used_clusters(seq, ctx, bs_unload_write_used_clusters_cpl);
5833 : }
5834 :
5835 : static void
5836 654 : bs_unload_write_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5837 : {
5838 654 : struct spdk_bs_load_ctx *ctx = cb_arg;
5839 :
5840 654 : spdk_free(ctx->mask);
5841 654 : ctx->mask = NULL;
5842 :
5843 654 : if (bserrno != 0) {
5844 0 : bs_unload_finish(ctx, bserrno);
5845 0 : return;
5846 : }
5847 :
5848 654 : bs_write_used_blobids(seq, ctx, bs_unload_write_used_blobids_cpl);
5849 : }
5850 :
5851 : static void
5852 654 : bs_unload_read_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5853 : {
5854 654 : struct spdk_bs_load_ctx *ctx = cb_arg;
5855 : int rc;
5856 :
5857 654 : if (bserrno != 0) {
5858 0 : bs_unload_finish(ctx, bserrno);
5859 0 : return;
5860 : }
5861 :
5862 654 : rc = bs_super_validate(ctx->super, ctx->bs);
5863 654 : if (rc != 0) {
5864 0 : bs_unload_finish(ctx, rc);
5865 0 : return;
5866 : }
5867 :
5868 654 : bs_write_used_md(seq, cb_arg, bs_unload_write_used_pages_cpl);
5869 : }
5870 :
5871 : void
5872 662 : spdk_bs_unload(struct spdk_blob_store *bs, spdk_bs_op_complete cb_fn, void *cb_arg)
5873 : {
5874 662 : struct spdk_bs_cpl cpl;
5875 : struct spdk_bs_load_ctx *ctx;
5876 :
5877 662 : SPDK_DEBUGLOG(blob, "Syncing blobstore\n");
5878 :
5879 : /*
5880 : * If external snapshot channels are being destroyed while the blobstore is unloaded, the
5881 : * unload is deferred until after the channel destruction completes.
5882 : */
5883 662 : if (bs->esnap_channels_unloading != 0) {
5884 4 : if (bs->esnap_unload_cb_fn != NULL) {
5885 0 : SPDK_ERRLOG("Blobstore unload in progress\n");
5886 0 : cb_fn(cb_arg, -EBUSY);
5887 0 : return;
5888 : }
5889 4 : SPDK_DEBUGLOG(blob_esnap, "Blobstore unload deferred: %" PRIu32
5890 : " esnap clones are unloading\n", bs->esnap_channels_unloading);
5891 4 : bs->esnap_unload_cb_fn = cb_fn;
5892 4 : bs->esnap_unload_cb_arg = cb_arg;
5893 4 : return;
5894 : }
5895 658 : if (bs->esnap_unload_cb_fn != NULL) {
5896 4 : SPDK_DEBUGLOG(blob_esnap, "Blobstore deferred unload progressing\n");
5897 4 : assert(bs->esnap_unload_cb_fn == cb_fn);
5898 4 : assert(bs->esnap_unload_cb_arg == cb_arg);
5899 4 : bs->esnap_unload_cb_fn = NULL;
5900 4 : bs->esnap_unload_cb_arg = NULL;
5901 : }
5902 :
5903 658 : if (!RB_EMPTY(&bs->open_blobs)) {
5904 4 : SPDK_ERRLOG("Blobstore still has open blobs\n");
5905 4 : cb_fn(cb_arg, -EBUSY);
5906 4 : return;
5907 : }
5908 :
5909 654 : ctx = calloc(1, sizeof(*ctx));
5910 654 : if (!ctx) {
5911 0 : cb_fn(cb_arg, -ENOMEM);
5912 0 : return;
5913 : }
5914 :
5915 654 : ctx->bs = bs;
5916 :
5917 654 : ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
5918 : SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
5919 654 : if (!ctx->super) {
5920 0 : free(ctx);
5921 0 : cb_fn(cb_arg, -ENOMEM);
5922 0 : return;
5923 : }
5924 :
5925 654 : cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
5926 654 : cpl.u.bs_basic.cb_fn = cb_fn;
5927 654 : cpl.u.bs_basic.cb_arg = cb_arg;
5928 :
5929 654 : ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
5930 654 : if (!ctx->seq) {
5931 0 : spdk_free(ctx->super);
5932 0 : free(ctx);
5933 0 : cb_fn(cb_arg, -ENOMEM);
5934 0 : return;
5935 : }
5936 :
5937 : /* Read super block */
5938 654 : bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
5939 654 : bs_byte_to_lba(bs, sizeof(*ctx->super)),
5940 : bs_unload_read_super_cpl, ctx);
5941 : }
5942 :
5943 : /* END spdk_bs_unload */
5944 :
5945 : /* START spdk_bs_set_super */
5946 :
5947 : struct spdk_bs_set_super_ctx {
5948 : struct spdk_blob_store *bs;
5949 : struct spdk_bs_super_block *super;
5950 : };
5951 :
5952 : static void
5953 8 : bs_set_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5954 : {
5955 8 : struct spdk_bs_set_super_ctx *ctx = cb_arg;
5956 :
5957 8 : if (bserrno != 0) {
5958 0 : SPDK_ERRLOG("Unable to write to super block of blobstore\n");
5959 : }
5960 :
5961 8 : spdk_free(ctx->super);
5962 :
5963 8 : bs_sequence_finish(seq, bserrno);
5964 :
5965 8 : free(ctx);
5966 8 : }
5967 :
5968 : static void
5969 8 : bs_set_super_read_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
5970 : {
5971 8 : struct spdk_bs_set_super_ctx *ctx = cb_arg;
5972 : int rc;
5973 :
5974 8 : if (bserrno != 0) {
5975 0 : SPDK_ERRLOG("Unable to read super block of blobstore\n");
5976 0 : spdk_free(ctx->super);
5977 0 : bs_sequence_finish(seq, bserrno);
5978 0 : free(ctx);
5979 0 : return;
5980 : }
5981 :
5982 8 : rc = bs_super_validate(ctx->super, ctx->bs);
5983 8 : if (rc != 0) {
5984 0 : SPDK_ERRLOG("Not a valid super block\n");
5985 0 : spdk_free(ctx->super);
5986 0 : bs_sequence_finish(seq, rc);
5987 0 : free(ctx);
5988 0 : return;
5989 : }
5990 :
5991 8 : bs_write_super(seq, ctx->bs, ctx->super, bs_set_super_write_cpl, ctx);
5992 : }
5993 :
5994 : void
5995 8 : spdk_bs_set_super(struct spdk_blob_store *bs, spdk_blob_id blobid,
5996 : spdk_bs_op_complete cb_fn, void *cb_arg)
5997 : {
5998 8 : struct spdk_bs_cpl cpl;
5999 : spdk_bs_sequence_t *seq;
6000 : struct spdk_bs_set_super_ctx *ctx;
6001 :
6002 8 : SPDK_DEBUGLOG(blob, "Setting super blob id on blobstore\n");
6003 :
6004 8 : ctx = calloc(1, sizeof(*ctx));
6005 8 : if (!ctx) {
6006 0 : cb_fn(cb_arg, -ENOMEM);
6007 0 : return;
6008 : }
6009 :
6010 8 : ctx->bs = bs;
6011 :
6012 8 : ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
6013 : SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
6014 8 : if (!ctx->super) {
6015 0 : free(ctx);
6016 0 : cb_fn(cb_arg, -ENOMEM);
6017 0 : return;
6018 : }
6019 :
6020 8 : cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
6021 8 : cpl.u.bs_basic.cb_fn = cb_fn;
6022 8 : cpl.u.bs_basic.cb_arg = cb_arg;
6023 :
6024 8 : seq = bs_sequence_start_bs(bs->md_channel, &cpl);
6025 8 : if (!seq) {
6026 0 : spdk_free(ctx->super);
6027 0 : free(ctx);
6028 0 : cb_fn(cb_arg, -ENOMEM);
6029 0 : return;
6030 : }
6031 :
6032 8 : bs->super_blob = blobid;
6033 :
6034 : /* Read super block */
6035 8 : bs_sequence_read_dev(seq, ctx->super, bs_page_to_lba(bs, 0),
6036 8 : bs_byte_to_lba(bs, sizeof(*ctx->super)),
6037 : bs_set_super_read_cpl, ctx);
6038 : }
6039 :
6040 : /* END spdk_bs_set_super */
6041 :
6042 : void
6043 12 : spdk_bs_get_super(struct spdk_blob_store *bs,
6044 : spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
6045 : {
6046 12 : if (bs->super_blob == SPDK_BLOBID_INVALID) {
6047 4 : cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOENT);
6048 : } else {
6049 8 : cb_fn(cb_arg, bs->super_blob, 0);
6050 : }
6051 12 : }
6052 :
6053 : uint64_t
6054 132 : spdk_bs_get_cluster_size(struct spdk_blob_store *bs)
6055 : {
6056 132 : return bs->cluster_sz;
6057 : }
6058 :
6059 : uint64_t
6060 68 : spdk_bs_get_page_size(struct spdk_blob_store *bs)
6061 : {
6062 68 : return SPDK_BS_PAGE_SIZE;
6063 : }
6064 :
6065 : uint64_t
6066 738 : spdk_bs_get_io_unit_size(struct spdk_blob_store *bs)
6067 : {
6068 738 : return bs->io_unit_size;
6069 : }
6070 :
6071 : uint64_t
6072 540 : spdk_bs_free_cluster_count(struct spdk_blob_store *bs)
6073 : {
6074 540 : return bs->num_free_clusters;
6075 : }
6076 :
6077 : uint64_t
6078 92 : spdk_bs_total_data_cluster_count(struct spdk_blob_store *bs)
6079 : {
6080 92 : return bs->total_data_clusters;
6081 : }
6082 :
6083 : static int
6084 780 : bs_register_md_thread(struct spdk_blob_store *bs)
6085 : {
6086 780 : bs->md_channel = spdk_get_io_channel(bs);
6087 780 : if (!bs->md_channel) {
6088 0 : SPDK_ERRLOG("Failed to get IO channel.\n");
6089 0 : return -1;
6090 : }
6091 :
6092 780 : return 0;
6093 : }
6094 :
6095 : static int
6096 780 : bs_unregister_md_thread(struct spdk_blob_store *bs)
6097 : {
6098 780 : spdk_put_io_channel(bs->md_channel);
6099 :
6100 780 : return 0;
6101 : }
6102 :
6103 : spdk_blob_id
6104 562 : spdk_blob_get_id(struct spdk_blob *blob)
6105 : {
6106 562 : assert(blob != NULL);
6107 :
6108 562 : return blob->id;
6109 : }
6110 :
6111 : uint64_t
6112 24 : spdk_blob_get_num_pages(struct spdk_blob *blob)
6113 : {
6114 24 : assert(blob != NULL);
6115 :
6116 24 : return bs_cluster_to_page(blob->bs, blob->active.num_clusters);
6117 : }
6118 :
6119 : uint64_t
6120 24 : spdk_blob_get_num_io_units(struct spdk_blob *blob)
6121 : {
6122 24 : assert(blob != NULL);
6123 :
6124 24 : return spdk_blob_get_num_pages(blob) * bs_io_unit_per_page(blob->bs);
6125 : }
6126 :
6127 : uint64_t
6128 569 : spdk_blob_get_num_clusters(struct spdk_blob *blob)
6129 : {
6130 569 : assert(blob != NULL);
6131 :
6132 569 : return blob->active.num_clusters;
6133 : }
6134 :
6135 : uint64_t
6136 330 : spdk_blob_get_num_allocated_clusters(struct spdk_blob *blob)
6137 : {
6138 330 : assert(blob != NULL);
6139 :
6140 330 : return blob->active.num_allocated_clusters;
6141 : }
6142 :
6143 : static uint64_t
6144 24 : blob_find_io_unit(struct spdk_blob *blob, uint64_t offset, bool is_allocated)
6145 : {
6146 24 : uint64_t blob_io_unit_num = spdk_blob_get_num_io_units(blob);
6147 :
6148 44 : while (offset < blob_io_unit_num) {
6149 40 : if (bs_io_unit_is_allocated(blob, offset) == is_allocated) {
6150 20 : return offset;
6151 : }
6152 :
6153 20 : offset += bs_num_io_units_to_cluster_boundary(blob, offset);
6154 : }
6155 :
6156 4 : return UINT64_MAX;
6157 : }
6158 :
6159 : uint64_t
6160 12 : spdk_blob_get_next_allocated_io_unit(struct spdk_blob *blob, uint64_t offset)
6161 : {
6162 12 : return blob_find_io_unit(blob, offset, true);
6163 : }
6164 :
6165 : uint64_t
6166 12 : spdk_blob_get_next_unallocated_io_unit(struct spdk_blob *blob, uint64_t offset)
6167 : {
6168 12 : return blob_find_io_unit(blob, offset, false);
6169 : }
6170 :
6171 : /* START spdk_bs_create_blob */
6172 :
6173 : static void
6174 1878 : bs_create_blob_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
6175 : {
6176 1878 : struct spdk_blob *blob = cb_arg;
6177 1878 : uint32_t page_idx = bs_blobid_to_page(blob->id);
6178 :
6179 1878 : if (bserrno != 0) {
6180 0 : spdk_spin_lock(&blob->bs->used_lock);
6181 0 : spdk_bit_array_clear(blob->bs->used_blobids, page_idx);
6182 0 : bs_release_md_page(blob->bs, page_idx);
6183 0 : spdk_spin_unlock(&blob->bs->used_lock);
6184 : }
6185 :
6186 1878 : blob_free(blob);
6187 :
6188 1878 : bs_sequence_finish(seq, bserrno);
6189 1878 : }
6190 :
6191 : static int
6192 3776 : blob_set_xattrs(struct spdk_blob *blob, const struct spdk_blob_xattr_opts *xattrs,
6193 : bool internal)
6194 : {
6195 : uint64_t i;
6196 3776 : size_t value_len = 0;
6197 : int rc;
6198 3776 : const void *value = NULL;
6199 3776 : if (xattrs->count > 0 && xattrs->get_value == NULL) {
6200 8 : return -EINVAL;
6201 : }
6202 4084 : for (i = 0; i < xattrs->count; i++) {
6203 320 : xattrs->get_value(xattrs->ctx, xattrs->names[i], &value, &value_len);
6204 320 : if (value == NULL || value_len == 0) {
6205 4 : return -EINVAL;
6206 : }
6207 316 : rc = blob_set_xattr(blob, xattrs->names[i], value, value_len, internal);
6208 316 : if (rc < 0) {
6209 0 : return rc;
6210 : }
6211 : }
6212 3764 : return 0;
6213 : }
6214 :
6215 : static void
6216 1862 : blob_opts_copy(const struct spdk_blob_opts *src, struct spdk_blob_opts *dst)
6217 : {
6218 : #define FIELD_OK(field) \
6219 : offsetof(struct spdk_blob_opts, field) + sizeof(src->field) <= src->opts_size
6220 :
6221 : #define SET_FIELD(field) \
6222 : if (FIELD_OK(field)) { \
6223 : dst->field = src->field; \
6224 : } \
6225 :
6226 1862 : SET_FIELD(num_clusters);
6227 1862 : SET_FIELD(thin_provision);
6228 1862 : SET_FIELD(clear_method);
6229 :
6230 1862 : if (FIELD_OK(xattrs)) {
6231 1862 : memcpy(&dst->xattrs, &src->xattrs, sizeof(src->xattrs));
6232 : }
6233 :
6234 1862 : SET_FIELD(use_extent_table);
6235 1862 : SET_FIELD(esnap_id);
6236 1862 : SET_FIELD(esnap_id_len);
6237 :
6238 1862 : dst->opts_size = src->opts_size;
6239 :
6240 : /* You should not remove this statement, but need to update the assert statement
6241 : * if you add a new field, and also add a corresponding SET_FIELD statement */
6242 : SPDK_STATIC_ASSERT(sizeof(struct spdk_blob_opts) == 80, "Incorrect size");
6243 :
6244 : #undef FIELD_OK
6245 : #undef SET_FIELD
6246 1862 : }
6247 :
6248 : static void
6249 1894 : bs_create_blob(struct spdk_blob_store *bs,
6250 : const struct spdk_blob_opts *opts,
6251 : const struct spdk_blob_xattr_opts *internal_xattrs,
6252 : spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
6253 : {
6254 : struct spdk_blob *blob;
6255 : uint32_t page_idx;
6256 1894 : struct spdk_bs_cpl cpl;
6257 1894 : struct spdk_blob_opts opts_local;
6258 1894 : struct spdk_blob_xattr_opts internal_xattrs_default;
6259 : spdk_bs_sequence_t *seq;
6260 : spdk_blob_id id;
6261 : int rc;
6262 :
6263 1894 : assert(spdk_get_thread() == bs->md_thread);
6264 :
6265 1894 : spdk_spin_lock(&bs->used_lock);
6266 1894 : page_idx = spdk_bit_array_find_first_clear(bs->used_md_pages, 0);
6267 1894 : if (page_idx == UINT32_MAX) {
6268 0 : spdk_spin_unlock(&bs->used_lock);
6269 0 : cb_fn(cb_arg, 0, -ENOMEM);
6270 0 : return;
6271 : }
6272 1894 : spdk_bit_array_set(bs->used_blobids, page_idx);
6273 1894 : bs_claim_md_page(bs, page_idx);
6274 1894 : spdk_spin_unlock(&bs->used_lock);
6275 :
6276 1894 : id = bs_page_to_blobid(page_idx);
6277 :
6278 1894 : SPDK_DEBUGLOG(blob, "Creating blob with id 0x%" PRIx64 " at page %u\n", id, page_idx);
6279 :
6280 1894 : spdk_blob_opts_init(&opts_local, sizeof(opts_local));
6281 1894 : if (opts) {
6282 1862 : blob_opts_copy(opts, &opts_local);
6283 : }
6284 :
6285 1894 : blob = blob_alloc(bs, id);
6286 1894 : if (!blob) {
6287 0 : rc = -ENOMEM;
6288 0 : goto error;
6289 : }
6290 :
6291 1894 : blob->use_extent_table = opts_local.use_extent_table;
6292 1894 : if (blob->use_extent_table) {
6293 968 : blob->invalid_flags |= SPDK_BLOB_EXTENT_TABLE;
6294 : }
6295 :
6296 1894 : if (!internal_xattrs) {
6297 1622 : blob_xattrs_init(&internal_xattrs_default);
6298 1622 : internal_xattrs = &internal_xattrs_default;
6299 : }
6300 :
6301 1894 : rc = blob_set_xattrs(blob, &opts_local.xattrs, false);
6302 1894 : if (rc < 0) {
6303 12 : goto error;
6304 : }
6305 :
6306 1882 : rc = blob_set_xattrs(blob, internal_xattrs, true);
6307 1882 : if (rc < 0) {
6308 0 : goto error;
6309 : }
6310 :
6311 1882 : if (opts_local.thin_provision) {
6312 356 : blob_set_thin_provision(blob);
6313 : }
6314 :
6315 1882 : blob_set_clear_method(blob, opts_local.clear_method);
6316 :
6317 1882 : if (opts_local.esnap_id != NULL) {
6318 60 : if (opts_local.esnap_id_len > UINT16_MAX) {
6319 0 : SPDK_ERRLOG("esnap id length %" PRIu64 "is too long\n",
6320 : opts_local.esnap_id_len);
6321 0 : rc = -EINVAL;
6322 0 : goto error;
6323 :
6324 : }
6325 60 : blob_set_thin_provision(blob);
6326 60 : blob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
6327 60 : rc = blob_set_xattr(blob, BLOB_EXTERNAL_SNAPSHOT_ID,
6328 60 : opts_local.esnap_id, opts_local.esnap_id_len, true);
6329 60 : if (rc != 0) {
6330 0 : goto error;
6331 : }
6332 : }
6333 :
6334 1882 : rc = blob_resize(blob, opts_local.num_clusters);
6335 1882 : if (rc < 0) {
6336 4 : goto error;
6337 : }
6338 1878 : cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
6339 1878 : cpl.u.blobid.cb_fn = cb_fn;
6340 1878 : cpl.u.blobid.cb_arg = cb_arg;
6341 1878 : cpl.u.blobid.blobid = blob->id;
6342 :
6343 1878 : seq = bs_sequence_start_bs(bs->md_channel, &cpl);
6344 1878 : if (!seq) {
6345 0 : rc = -ENOMEM;
6346 0 : goto error;
6347 : }
6348 :
6349 1878 : blob_persist(seq, blob, bs_create_blob_cpl, blob);
6350 1878 : return;
6351 :
6352 16 : error:
6353 16 : SPDK_ERRLOG("Failed to create blob: %s, size in clusters/size: %lu (clusters)\n",
6354 : spdk_strerror(rc), opts_local.num_clusters);
6355 16 : if (blob != NULL) {
6356 16 : blob_free(blob);
6357 : }
6358 16 : spdk_spin_lock(&bs->used_lock);
6359 16 : spdk_bit_array_clear(bs->used_blobids, page_idx);
6360 16 : bs_release_md_page(bs, page_idx);
6361 16 : spdk_spin_unlock(&bs->used_lock);
6362 16 : cb_fn(cb_arg, 0, rc);
6363 : }
6364 :
6365 : void
6366 16 : spdk_bs_create_blob(struct spdk_blob_store *bs,
6367 : spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
6368 : {
6369 16 : bs_create_blob(bs, NULL, NULL, cb_fn, cb_arg);
6370 16 : }
6371 :
6372 : void
6373 1598 : spdk_bs_create_blob_ext(struct spdk_blob_store *bs, const struct spdk_blob_opts *opts,
6374 : spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
6375 : {
6376 1598 : bs_create_blob(bs, opts, NULL, cb_fn, cb_arg);
6377 1598 : }
6378 :
6379 : /* END spdk_bs_create_blob */
6380 :
6381 : /* START blob_cleanup */
6382 :
6383 : struct spdk_clone_snapshot_ctx {
6384 : struct spdk_bs_cpl cpl;
6385 : int bserrno;
6386 : bool frozen;
6387 :
6388 : struct spdk_io_channel *channel;
6389 :
6390 : /* Current cluster for inflate operation */
6391 : uint64_t cluster;
6392 :
6393 : /* For inflation force allocation of all unallocated clusters and remove
6394 : * thin-provisioning. Otherwise only decouple parent and keep clone thin. */
6395 : bool allocate_all;
6396 :
6397 : struct {
6398 : spdk_blob_id id;
6399 : struct spdk_blob *blob;
6400 : bool md_ro;
6401 : } original;
6402 : struct {
6403 : spdk_blob_id id;
6404 : struct spdk_blob *blob;
6405 : } new;
6406 :
6407 : /* xattrs specified for snapshot/clones only. They have no impact on
6408 : * the original blobs xattrs. */
6409 : const struct spdk_blob_xattr_opts *xattrs;
6410 : };
6411 :
6412 : static void
6413 346 : bs_clone_snapshot_cleanup_finish(void *cb_arg, int bserrno)
6414 : {
6415 346 : struct spdk_clone_snapshot_ctx *ctx = cb_arg;
6416 346 : struct spdk_bs_cpl *cpl = &ctx->cpl;
6417 :
6418 346 : if (bserrno != 0) {
6419 6 : if (ctx->bserrno != 0) {
6420 0 : SPDK_ERRLOG("Cleanup error %d\n", bserrno);
6421 : } else {
6422 6 : ctx->bserrno = bserrno;
6423 : }
6424 : }
6425 :
6426 346 : switch (cpl->type) {
6427 282 : case SPDK_BS_CPL_TYPE_BLOBID:
6428 282 : cpl->u.blobid.cb_fn(cpl->u.blobid.cb_arg, cpl->u.blobid.blobid, ctx->bserrno);
6429 282 : break;
6430 64 : case SPDK_BS_CPL_TYPE_BLOB_BASIC:
6431 64 : cpl->u.blob_basic.cb_fn(cpl->u.blob_basic.cb_arg, ctx->bserrno);
6432 64 : break;
6433 0 : default:
6434 0 : SPDK_UNREACHABLE();
6435 : break;
6436 : }
6437 :
6438 346 : free(ctx);
6439 346 : }
6440 :
6441 : static void
6442 332 : bs_snapshot_unfreeze_cpl(void *cb_arg, int bserrno)
6443 : {
6444 332 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6445 332 : struct spdk_blob *origblob = ctx->original.blob;
6446 :
6447 332 : if (bserrno != 0) {
6448 0 : if (ctx->bserrno != 0) {
6449 0 : SPDK_ERRLOG("Unfreeze error %d\n", bserrno);
6450 : } else {
6451 0 : ctx->bserrno = bserrno;
6452 : }
6453 : }
6454 :
6455 332 : ctx->original.id = origblob->id;
6456 332 : origblob->locked_operation_in_progress = false;
6457 :
6458 : /* Revert md_ro to original state */
6459 332 : origblob->md_ro = ctx->original.md_ro;
6460 :
6461 332 : spdk_blob_close(origblob, bs_clone_snapshot_cleanup_finish, ctx);
6462 332 : }
6463 :
6464 : static void
6465 332 : bs_clone_snapshot_origblob_cleanup(void *cb_arg, int bserrno)
6466 : {
6467 332 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6468 332 : struct spdk_blob *origblob = ctx->original.blob;
6469 :
6470 332 : if (bserrno != 0) {
6471 24 : if (ctx->bserrno != 0) {
6472 4 : SPDK_ERRLOG("Cleanup error %d\n", bserrno);
6473 : } else {
6474 20 : ctx->bserrno = bserrno;
6475 : }
6476 : }
6477 :
6478 332 : if (ctx->frozen) {
6479 : /* Unfreeze any outstanding I/O */
6480 212 : blob_unfreeze_io(origblob, bs_snapshot_unfreeze_cpl, ctx);
6481 : } else {
6482 120 : bs_snapshot_unfreeze_cpl(ctx, 0);
6483 : }
6484 :
6485 332 : }
6486 :
6487 : static void
6488 4 : bs_clone_snapshot_newblob_cleanup(struct spdk_clone_snapshot_ctx *ctx, int bserrno)
6489 : {
6490 4 : struct spdk_blob *newblob = ctx->new.blob;
6491 :
6492 4 : if (bserrno != 0) {
6493 4 : if (ctx->bserrno != 0) {
6494 0 : SPDK_ERRLOG("Cleanup error %d\n", bserrno);
6495 : } else {
6496 4 : ctx->bserrno = bserrno;
6497 : }
6498 : }
6499 :
6500 4 : ctx->new.id = newblob->id;
6501 4 : spdk_blob_close(newblob, bs_clone_snapshot_origblob_cleanup, ctx);
6502 4 : }
6503 :
6504 : /* END blob_cleanup */
6505 :
6506 : /* START spdk_bs_create_snapshot */
6507 :
6508 : static void
6509 220 : bs_snapshot_swap_cluster_maps(struct spdk_blob *blob1, struct spdk_blob *blob2)
6510 : {
6511 : uint64_t *cluster_temp;
6512 : uint64_t num_allocated_clusters_temp;
6513 : uint32_t *extent_page_temp;
6514 :
6515 220 : cluster_temp = blob1->active.clusters;
6516 220 : blob1->active.clusters = blob2->active.clusters;
6517 220 : blob2->active.clusters = cluster_temp;
6518 :
6519 220 : num_allocated_clusters_temp = blob1->active.num_allocated_clusters;
6520 220 : blob1->active.num_allocated_clusters = blob2->active.num_allocated_clusters;
6521 220 : blob2->active.num_allocated_clusters = num_allocated_clusters_temp;
6522 :
6523 220 : extent_page_temp = blob1->active.extent_pages;
6524 220 : blob1->active.extent_pages = blob2->active.extent_pages;
6525 220 : blob2->active.extent_pages = extent_page_temp;
6526 220 : }
6527 :
6528 : /* Copies an internal xattr */
6529 : static int
6530 28 : bs_snapshot_copy_xattr(struct spdk_blob *toblob, struct spdk_blob *fromblob, const char *name)
6531 : {
6532 28 : const void *val = NULL;
6533 28 : size_t len;
6534 : int bserrno;
6535 :
6536 28 : bserrno = blob_get_xattr_value(fromblob, name, &val, &len, true);
6537 28 : if (bserrno != 0) {
6538 0 : SPDK_ERRLOG("blob 0x%" PRIx64 " missing %s XATTR\n", fromblob->id, name);
6539 0 : return bserrno;
6540 : }
6541 :
6542 28 : bserrno = blob_set_xattr(toblob, name, val, len, true);
6543 28 : if (bserrno != 0) {
6544 0 : SPDK_ERRLOG("could not set %s XATTR on blob 0x%" PRIx64 "\n",
6545 : name, toblob->id);
6546 0 : return bserrno;
6547 : }
6548 28 : return 0;
6549 : }
6550 :
6551 : static void
6552 208 : bs_snapshot_origblob_sync_cpl(void *cb_arg, int bserrno)
6553 : {
6554 208 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6555 208 : struct spdk_blob *origblob = ctx->original.blob;
6556 208 : struct spdk_blob *newblob = ctx->new.blob;
6557 :
6558 208 : if (bserrno != 0) {
6559 4 : bs_snapshot_swap_cluster_maps(newblob, origblob);
6560 4 : if (blob_is_esnap_clone(newblob)) {
6561 0 : bs_snapshot_copy_xattr(origblob, newblob, BLOB_EXTERNAL_SNAPSHOT_ID);
6562 0 : origblob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
6563 : }
6564 4 : bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
6565 4 : return;
6566 : }
6567 :
6568 : /* Remove metadata descriptor SNAPSHOT_IN_PROGRESS */
6569 204 : bserrno = blob_remove_xattr(newblob, SNAPSHOT_IN_PROGRESS, true);
6570 204 : if (bserrno != 0) {
6571 0 : bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
6572 0 : return;
6573 : }
6574 :
6575 204 : bs_blob_list_add(ctx->original.blob);
6576 :
6577 204 : spdk_blob_set_read_only(newblob);
6578 :
6579 : /* sync snapshot metadata */
6580 204 : spdk_blob_sync_md(newblob, bs_clone_snapshot_origblob_cleanup, ctx);
6581 : }
6582 :
6583 : static void
6584 212 : bs_snapshot_newblob_sync_cpl(void *cb_arg, int bserrno)
6585 : {
6586 212 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6587 212 : struct spdk_blob *origblob = ctx->original.blob;
6588 212 : struct spdk_blob *newblob = ctx->new.blob;
6589 :
6590 212 : if (bserrno != 0) {
6591 : /* return cluster map back to original */
6592 4 : bs_snapshot_swap_cluster_maps(newblob, origblob);
6593 :
6594 : /* Newblob md sync failed. Valid clusters are only present in origblob.
6595 : * Since I/O is frozen on origblob, not changes to zeroed out cluster map should have occurred.
6596 : * Newblob needs to be reverted to thin_provisioned state at creation to properly close. */
6597 4 : blob_set_thin_provision(newblob);
6598 4 : assert(spdk_mem_all_zero(newblob->active.clusters,
6599 : newblob->active.num_clusters * sizeof(*newblob->active.clusters)));
6600 4 : assert(spdk_mem_all_zero(newblob->active.extent_pages,
6601 : newblob->active.num_extent_pages * sizeof(*newblob->active.extent_pages)));
6602 :
6603 4 : bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
6604 4 : return;
6605 : }
6606 :
6607 : /* Set internal xattr for snapshot id */
6608 208 : bserrno = blob_set_xattr(origblob, BLOB_SNAPSHOT, &newblob->id, sizeof(spdk_blob_id), true);
6609 208 : if (bserrno != 0) {
6610 : /* return cluster map back to original */
6611 0 : bs_snapshot_swap_cluster_maps(newblob, origblob);
6612 0 : blob_set_thin_provision(newblob);
6613 0 : bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
6614 0 : return;
6615 : }
6616 :
6617 : /* Create new back_bs_dev for snapshot */
6618 208 : origblob->back_bs_dev = bs_create_blob_bs_dev(newblob);
6619 208 : if (origblob->back_bs_dev == NULL) {
6620 : /* return cluster map back to original */
6621 0 : bs_snapshot_swap_cluster_maps(newblob, origblob);
6622 0 : blob_set_thin_provision(newblob);
6623 0 : bs_clone_snapshot_newblob_cleanup(ctx, -EINVAL);
6624 0 : return;
6625 : }
6626 :
6627 : /* Remove the xattr that references an external snapshot */
6628 208 : if (blob_is_esnap_clone(origblob)) {
6629 16 : origblob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
6630 16 : bserrno = blob_remove_xattr(origblob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
6631 16 : if (bserrno != 0) {
6632 0 : if (bserrno == -ENOENT) {
6633 0 : SPDK_ERRLOG("blob 0x%" PRIx64 " has no " BLOB_EXTERNAL_SNAPSHOT_ID
6634 : " xattr to remove\n", origblob->id);
6635 0 : assert(false);
6636 : } else {
6637 : /* return cluster map back to original */
6638 0 : bs_snapshot_swap_cluster_maps(newblob, origblob);
6639 0 : blob_set_thin_provision(newblob);
6640 0 : bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
6641 0 : return;
6642 : }
6643 : }
6644 : }
6645 :
6646 208 : bs_blob_list_remove(origblob);
6647 208 : origblob->parent_id = newblob->id;
6648 : /* set clone blob as thin provisioned */
6649 208 : blob_set_thin_provision(origblob);
6650 :
6651 208 : bs_blob_list_add(newblob);
6652 :
6653 : /* sync clone metadata */
6654 208 : spdk_blob_sync_md(origblob, bs_snapshot_origblob_sync_cpl, ctx);
6655 : }
6656 :
6657 : static void
6658 212 : bs_snapshot_freeze_cpl(void *cb_arg, int rc)
6659 : {
6660 212 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6661 212 : struct spdk_blob *origblob = ctx->original.blob;
6662 212 : struct spdk_blob *newblob = ctx->new.blob;
6663 : int bserrno;
6664 :
6665 212 : if (rc != 0) {
6666 0 : bs_clone_snapshot_newblob_cleanup(ctx, rc);
6667 0 : return;
6668 : }
6669 :
6670 212 : ctx->frozen = true;
6671 :
6672 212 : if (blob_is_esnap_clone(origblob)) {
6673 : /* Clean up any channels associated with the original blob id because future IO will
6674 : * perform IO using the snapshot blob_id.
6675 : */
6676 16 : blob_esnap_destroy_bs_dev_channels(origblob, false, NULL, NULL);
6677 : }
6678 212 : if (newblob->back_bs_dev) {
6679 212 : blob_back_bs_destroy(newblob);
6680 : }
6681 : /* set new back_bs_dev for snapshot */
6682 212 : newblob->back_bs_dev = origblob->back_bs_dev;
6683 : /* Set invalid flags from origblob */
6684 212 : newblob->invalid_flags = origblob->invalid_flags;
6685 :
6686 : /* inherit parent from original blob if set */
6687 212 : newblob->parent_id = origblob->parent_id;
6688 212 : switch (origblob->parent_id) {
6689 16 : case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
6690 16 : bserrno = bs_snapshot_copy_xattr(newblob, origblob, BLOB_EXTERNAL_SNAPSHOT_ID);
6691 16 : if (bserrno != 0) {
6692 0 : bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
6693 0 : return;
6694 : }
6695 16 : break;
6696 144 : case SPDK_BLOBID_INVALID:
6697 144 : break;
6698 52 : default:
6699 : /* Set internal xattr for snapshot id */
6700 52 : bserrno = blob_set_xattr(newblob, BLOB_SNAPSHOT,
6701 52 : &origblob->parent_id, sizeof(spdk_blob_id), true);
6702 52 : if (bserrno != 0) {
6703 0 : bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
6704 0 : return;
6705 : }
6706 : }
6707 :
6708 : /* swap cluster maps */
6709 212 : bs_snapshot_swap_cluster_maps(newblob, origblob);
6710 :
6711 : /* Set the clear method on the new blob to match the original. */
6712 212 : blob_set_clear_method(newblob, origblob->clear_method);
6713 :
6714 : /* sync snapshot metadata */
6715 212 : spdk_blob_sync_md(newblob, bs_snapshot_newblob_sync_cpl, ctx);
6716 : }
6717 :
6718 : static void
6719 216 : bs_snapshot_newblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
6720 : {
6721 216 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6722 216 : struct spdk_blob *origblob = ctx->original.blob;
6723 216 : struct spdk_blob *newblob = _blob;
6724 :
6725 216 : if (bserrno != 0) {
6726 4 : bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
6727 4 : return;
6728 : }
6729 :
6730 212 : ctx->new.blob = newblob;
6731 212 : assert(spdk_blob_is_thin_provisioned(newblob));
6732 212 : assert(spdk_mem_all_zero(newblob->active.clusters,
6733 : newblob->active.num_clusters * sizeof(*newblob->active.clusters)));
6734 212 : assert(spdk_mem_all_zero(newblob->active.extent_pages,
6735 : newblob->active.num_extent_pages * sizeof(*newblob->active.extent_pages)));
6736 :
6737 212 : blob_freeze_io(origblob, bs_snapshot_freeze_cpl, ctx);
6738 : }
6739 :
6740 : static void
6741 220 : bs_snapshot_newblob_create_cpl(void *cb_arg, spdk_blob_id blobid, int bserrno)
6742 : {
6743 220 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6744 220 : struct spdk_blob *origblob = ctx->original.blob;
6745 :
6746 220 : if (bserrno != 0) {
6747 4 : bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
6748 4 : return;
6749 : }
6750 :
6751 216 : ctx->new.id = blobid;
6752 216 : ctx->cpl.u.blobid.blobid = blobid;
6753 :
6754 216 : spdk_bs_open_blob(origblob->bs, ctx->new.id, bs_snapshot_newblob_open_cpl, ctx);
6755 : }
6756 :
6757 :
6758 : static void
6759 220 : bs_xattr_snapshot(void *arg, const char *name,
6760 : const void **value, size_t *value_len)
6761 : {
6762 220 : assert(strncmp(name, SNAPSHOT_IN_PROGRESS, sizeof(SNAPSHOT_IN_PROGRESS)) == 0);
6763 :
6764 220 : struct spdk_blob *blob = (struct spdk_blob *)arg;
6765 220 : *value = &blob->id;
6766 220 : *value_len = sizeof(blob->id);
6767 220 : }
6768 :
6769 : static void
6770 230 : bs_snapshot_origblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
6771 : {
6772 230 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6773 230 : struct spdk_blob_opts opts;
6774 230 : struct spdk_blob_xattr_opts internal_xattrs;
6775 230 : char *xattrs_names[] = { SNAPSHOT_IN_PROGRESS };
6776 :
6777 230 : if (bserrno != 0) {
6778 6 : bs_clone_snapshot_cleanup_finish(ctx, bserrno);
6779 6 : return;
6780 : }
6781 :
6782 224 : ctx->original.blob = _blob;
6783 :
6784 224 : if (_blob->data_ro || _blob->md_ro) {
6785 4 : SPDK_DEBUGLOG(blob, "Cannot create snapshot from read only blob with id 0x%"
6786 : PRIx64 "\n", _blob->id);
6787 4 : ctx->bserrno = -EINVAL;
6788 4 : spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
6789 4 : return;
6790 : }
6791 :
6792 220 : if (_blob->locked_operation_in_progress) {
6793 0 : SPDK_DEBUGLOG(blob, "Cannot create snapshot - another operation in progress\n");
6794 0 : ctx->bserrno = -EBUSY;
6795 0 : spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
6796 0 : return;
6797 : }
6798 :
6799 220 : _blob->locked_operation_in_progress = true;
6800 :
6801 220 : spdk_blob_opts_init(&opts, sizeof(opts));
6802 220 : blob_xattrs_init(&internal_xattrs);
6803 :
6804 : /* Change the size of new blob to the same as in original blob,
6805 : * but do not allocate clusters */
6806 220 : opts.thin_provision = true;
6807 220 : opts.num_clusters = spdk_blob_get_num_clusters(_blob);
6808 220 : opts.use_extent_table = _blob->use_extent_table;
6809 :
6810 : /* If there are any xattrs specified for snapshot, set them now */
6811 220 : if (ctx->xattrs) {
6812 4 : memcpy(&opts.xattrs, ctx->xattrs, sizeof(*ctx->xattrs));
6813 : }
6814 : /* Set internal xattr SNAPSHOT_IN_PROGRESS */
6815 220 : internal_xattrs.count = 1;
6816 220 : internal_xattrs.ctx = _blob;
6817 220 : internal_xattrs.names = xattrs_names;
6818 220 : internal_xattrs.get_value = bs_xattr_snapshot;
6819 :
6820 220 : bs_create_blob(_blob->bs, &opts, &internal_xattrs,
6821 : bs_snapshot_newblob_create_cpl, ctx);
6822 : }
6823 :
6824 : void
6825 230 : spdk_bs_create_snapshot(struct spdk_blob_store *bs, spdk_blob_id blobid,
6826 : const struct spdk_blob_xattr_opts *snapshot_xattrs,
6827 : spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
6828 : {
6829 230 : struct spdk_clone_snapshot_ctx *ctx = calloc(1, sizeof(*ctx));
6830 :
6831 230 : if (!ctx) {
6832 0 : cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOMEM);
6833 0 : return;
6834 : }
6835 230 : ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
6836 230 : ctx->cpl.u.blobid.cb_fn = cb_fn;
6837 230 : ctx->cpl.u.blobid.cb_arg = cb_arg;
6838 230 : ctx->cpl.u.blobid.blobid = SPDK_BLOBID_INVALID;
6839 230 : ctx->bserrno = 0;
6840 230 : ctx->frozen = false;
6841 230 : ctx->original.id = blobid;
6842 230 : ctx->xattrs = snapshot_xattrs;
6843 :
6844 230 : spdk_bs_open_blob(bs, ctx->original.id, bs_snapshot_origblob_open_cpl, ctx);
6845 : }
6846 : /* END spdk_bs_create_snapshot */
6847 :
6848 : /* START spdk_bs_create_clone */
6849 :
6850 : static void
6851 48 : bs_xattr_clone(void *arg, const char *name,
6852 : const void **value, size_t *value_len)
6853 : {
6854 48 : assert(strncmp(name, BLOB_SNAPSHOT, sizeof(BLOB_SNAPSHOT)) == 0);
6855 :
6856 48 : struct spdk_blob *blob = (struct spdk_blob *)arg;
6857 48 : *value = &blob->id;
6858 48 : *value_len = sizeof(blob->id);
6859 48 : }
6860 :
6861 : static void
6862 48 : bs_clone_newblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
6863 : {
6864 48 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6865 48 : struct spdk_blob *clone = _blob;
6866 :
6867 48 : ctx->new.blob = clone;
6868 48 : bs_blob_list_add(clone);
6869 :
6870 48 : spdk_blob_close(clone, bs_clone_snapshot_origblob_cleanup, ctx);
6871 48 : }
6872 :
6873 : static void
6874 48 : bs_clone_newblob_create_cpl(void *cb_arg, spdk_blob_id blobid, int bserrno)
6875 : {
6876 48 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6877 :
6878 48 : ctx->cpl.u.blobid.blobid = blobid;
6879 48 : spdk_bs_open_blob(ctx->original.blob->bs, blobid, bs_clone_newblob_open_cpl, ctx);
6880 48 : }
6881 :
6882 : static void
6883 52 : bs_clone_origblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
6884 : {
6885 52 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6886 52 : struct spdk_blob_opts opts;
6887 52 : struct spdk_blob_xattr_opts internal_xattrs;
6888 52 : char *xattr_names[] = { BLOB_SNAPSHOT };
6889 :
6890 52 : if (bserrno != 0) {
6891 0 : bs_clone_snapshot_cleanup_finish(ctx, bserrno);
6892 0 : return;
6893 : }
6894 :
6895 52 : ctx->original.blob = _blob;
6896 52 : ctx->original.md_ro = _blob->md_ro;
6897 :
6898 52 : if (!_blob->data_ro || !_blob->md_ro) {
6899 4 : SPDK_DEBUGLOG(blob, "Clone not from read-only blob\n");
6900 4 : ctx->bserrno = -EINVAL;
6901 4 : spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
6902 4 : return;
6903 : }
6904 :
6905 48 : if (_blob->locked_operation_in_progress) {
6906 0 : SPDK_DEBUGLOG(blob, "Cannot create clone - another operation in progress\n");
6907 0 : ctx->bserrno = -EBUSY;
6908 0 : spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
6909 0 : return;
6910 : }
6911 :
6912 48 : _blob->locked_operation_in_progress = true;
6913 :
6914 48 : spdk_blob_opts_init(&opts, sizeof(opts));
6915 48 : blob_xattrs_init(&internal_xattrs);
6916 :
6917 48 : opts.thin_provision = true;
6918 48 : opts.num_clusters = spdk_blob_get_num_clusters(_blob);
6919 48 : opts.use_extent_table = _blob->use_extent_table;
6920 48 : if (ctx->xattrs) {
6921 4 : memcpy(&opts.xattrs, ctx->xattrs, sizeof(*ctx->xattrs));
6922 : }
6923 :
6924 : /* Set internal xattr BLOB_SNAPSHOT */
6925 48 : internal_xattrs.count = 1;
6926 48 : internal_xattrs.ctx = _blob;
6927 48 : internal_xattrs.names = xattr_names;
6928 48 : internal_xattrs.get_value = bs_xattr_clone;
6929 :
6930 48 : bs_create_blob(_blob->bs, &opts, &internal_xattrs,
6931 : bs_clone_newblob_create_cpl, ctx);
6932 : }
6933 :
6934 : void
6935 52 : spdk_bs_create_clone(struct spdk_blob_store *bs, spdk_blob_id blobid,
6936 : const struct spdk_blob_xattr_opts *clone_xattrs,
6937 : spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
6938 : {
6939 52 : struct spdk_clone_snapshot_ctx *ctx = calloc(1, sizeof(*ctx));
6940 :
6941 52 : if (!ctx) {
6942 0 : cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOMEM);
6943 0 : return;
6944 : }
6945 :
6946 52 : ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
6947 52 : ctx->cpl.u.blobid.cb_fn = cb_fn;
6948 52 : ctx->cpl.u.blobid.cb_arg = cb_arg;
6949 52 : ctx->cpl.u.blobid.blobid = SPDK_BLOBID_INVALID;
6950 52 : ctx->bserrno = 0;
6951 52 : ctx->xattrs = clone_xattrs;
6952 52 : ctx->original.id = blobid;
6953 :
6954 52 : spdk_bs_open_blob(bs, ctx->original.id, bs_clone_origblob_open_cpl, ctx);
6955 : }
6956 :
6957 : /* END spdk_bs_create_clone */
6958 :
6959 : /* START spdk_bs_inflate_blob */
6960 :
6961 : static void
6962 12 : bs_inflate_blob_set_parent_cpl(void *cb_arg, struct spdk_blob *_parent, int bserrno)
6963 : {
6964 12 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
6965 12 : struct spdk_blob *_blob = ctx->original.blob;
6966 :
6967 12 : if (bserrno != 0) {
6968 0 : bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
6969 0 : return;
6970 : }
6971 :
6972 : /* Temporarily override md_ro flag for MD modification */
6973 12 : _blob->md_ro = false;
6974 :
6975 12 : bserrno = blob_set_xattr(_blob, BLOB_SNAPSHOT, &_parent->id, sizeof(spdk_blob_id), true);
6976 12 : if (bserrno != 0) {
6977 0 : bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
6978 0 : return;
6979 : }
6980 :
6981 12 : assert(_parent != NULL);
6982 :
6983 12 : bs_blob_list_remove(_blob);
6984 12 : _blob->parent_id = _parent->id;
6985 :
6986 12 : blob_back_bs_destroy(_blob);
6987 12 : _blob->back_bs_dev = bs_create_blob_bs_dev(_parent);
6988 12 : bs_blob_list_add(_blob);
6989 :
6990 12 : spdk_blob_sync_md(_blob, bs_clone_snapshot_origblob_cleanup, ctx);
6991 : }
6992 :
6993 : static void
6994 4 : bs_inflate_blob_set_esnap_refs(struct spdk_clone_snapshot_ctx *ctx)
6995 : {
6996 4 : struct spdk_blob *_blob = ctx->original.blob;
6997 4 : struct spdk_blob *_parent = ((struct spdk_blob_bs_dev *)(_blob->back_bs_dev))->blob;
6998 : int bserrno;
6999 :
7000 4 : assert(_parent != NULL);
7001 4 : assert(_parent->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT);
7002 :
7003 : /* Temporarily override md_ro flag for MD modification */
7004 4 : _blob->md_ro = false;
7005 :
7006 4 : blob_remove_xattr(_blob, BLOB_SNAPSHOT, true);
7007 4 : bserrno = bs_snapshot_copy_xattr(_blob, _parent, BLOB_EXTERNAL_SNAPSHOT_ID);
7008 4 : if (bserrno != 0) {
7009 0 : bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
7010 0 : return;
7011 : }
7012 :
7013 4 : bs_blob_list_remove(_blob);
7014 :
7015 4 : _blob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
7016 4 : _blob->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
7017 :
7018 4 : blob_back_bs_destroy(_blob);
7019 4 : _blob->back_bs_dev = _parent->back_bs_dev;
7020 :
7021 4 : LIST_INSERT_AFTER(_parent, _blob, back_bs_dev_link);
7022 :
7023 4 : spdk_blob_sync_md(_blob, bs_clone_snapshot_origblob_cleanup, ctx);
7024 : }
7025 :
7026 : static void
7027 60 : bs_inflate_blob_done(struct spdk_clone_snapshot_ctx *ctx)
7028 : {
7029 60 : struct spdk_blob *_blob = ctx->original.blob;
7030 : struct spdk_blob *_parent;
7031 :
7032 60 : if (ctx->allocate_all) {
7033 : /* remove thin provisioning */
7034 32 : bs_blob_list_remove(_blob);
7035 32 : if (_blob->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
7036 8 : blob_remove_xattr(_blob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
7037 8 : _blob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
7038 : } else {
7039 24 : blob_remove_xattr(_blob, BLOB_SNAPSHOT, true);
7040 : }
7041 32 : _blob->invalid_flags = _blob->invalid_flags & ~SPDK_BLOB_THIN_PROV;
7042 32 : blob_back_bs_destroy(_blob);
7043 32 : _blob->parent_id = SPDK_BLOBID_INVALID;
7044 : } else {
7045 : /* For now, esnap clones always have allocate_all set. */
7046 28 : assert(!blob_is_esnap_clone(_blob));
7047 :
7048 28 : _parent = ((struct spdk_blob_bs_dev *)(_blob->back_bs_dev))->blob;
7049 28 : switch (_parent->parent_id) {
7050 12 : case SPDK_BLOBID_INVALID:
7051 12 : bs_blob_list_remove(_blob);
7052 12 : _blob->parent_id = SPDK_BLOBID_INVALID;
7053 12 : blob_back_bs_destroy(_blob);
7054 12 : _blob->back_bs_dev = bs_create_zeroes_dev();
7055 12 : break;
7056 4 : case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
7057 4 : bs_inflate_blob_set_esnap_refs(ctx);
7058 4 : return;
7059 12 : default:
7060 : /* We must change the parent of the inflated blob */
7061 12 : spdk_bs_open_blob(_blob->bs, _parent->parent_id,
7062 : bs_inflate_blob_set_parent_cpl, ctx);
7063 12 : return;
7064 : }
7065 : }
7066 :
7067 : /* Temporarily override md_ro flag for MD modification */
7068 44 : _blob->md_ro = false;
7069 44 : blob_remove_xattr(_blob, BLOB_SNAPSHOT, true);
7070 44 : _blob->state = SPDK_BLOB_STATE_DIRTY;
7071 :
7072 44 : spdk_blob_sync_md(_blob, bs_clone_snapshot_origblob_cleanup, ctx);
7073 : }
7074 :
7075 : /* Check if cluster needs allocation */
7076 : static inline bool
7077 1280 : bs_cluster_needs_allocation(struct spdk_blob *blob, uint64_t cluster, bool allocate_all)
7078 : {
7079 : struct spdk_blob_bs_dev *b;
7080 :
7081 1280 : assert(blob != NULL);
7082 :
7083 1280 : if (blob->active.clusters[cluster] != 0) {
7084 : /* Cluster is already allocated */
7085 32 : return false;
7086 : }
7087 :
7088 1248 : if (blob->parent_id == SPDK_BLOBID_INVALID) {
7089 : /* Blob have no parent blob */
7090 80 : return allocate_all;
7091 : }
7092 :
7093 1168 : if (blob->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
7094 64 : return true;
7095 : }
7096 :
7097 1104 : b = (struct spdk_blob_bs_dev *)blob->back_bs_dev;
7098 1104 : return (allocate_all || b->blob->active.clusters[cluster] != 0);
7099 : }
7100 :
7101 : static void
7102 512 : bs_inflate_blob_touch_next(void *cb_arg, int bserrno)
7103 : {
7104 512 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
7105 512 : struct spdk_blob *_blob = ctx->original.blob;
7106 512 : struct spdk_bs_cpl cpl;
7107 : spdk_bs_user_op_t *op;
7108 : uint64_t offset;
7109 :
7110 512 : if (bserrno != 0) {
7111 0 : bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
7112 0 : return;
7113 : }
7114 :
7115 700 : for (; ctx->cluster < _blob->active.num_clusters; ctx->cluster++) {
7116 640 : if (bs_cluster_needs_allocation(_blob, ctx->cluster, ctx->allocate_all)) {
7117 452 : break;
7118 : }
7119 : }
7120 :
7121 512 : if (ctx->cluster < _blob->active.num_clusters) {
7122 452 : offset = bs_cluster_to_lba(_blob->bs, ctx->cluster);
7123 :
7124 : /* We may safely increment a cluster before copying */
7125 452 : ctx->cluster++;
7126 :
7127 : /* Use a dummy 0B read as a context for cluster copy */
7128 452 : cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
7129 452 : cpl.u.blob_basic.cb_fn = bs_inflate_blob_touch_next;
7130 452 : cpl.u.blob_basic.cb_arg = ctx;
7131 :
7132 452 : op = bs_user_op_alloc(ctx->channel, &cpl, SPDK_BLOB_READ, _blob,
7133 : NULL, 0, offset, 0);
7134 452 : if (!op) {
7135 0 : bs_clone_snapshot_origblob_cleanup(ctx, -ENOMEM);
7136 0 : return;
7137 : }
7138 :
7139 452 : bs_allocate_and_copy_cluster(_blob, ctx->channel, offset, op);
7140 : } else {
7141 60 : bs_inflate_blob_done(ctx);
7142 : }
7143 : }
7144 :
7145 : static void
7146 64 : bs_inflate_blob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
7147 : {
7148 64 : struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
7149 : uint64_t clusters_needed;
7150 : uint64_t i;
7151 :
7152 64 : if (bserrno != 0) {
7153 0 : bs_clone_snapshot_cleanup_finish(ctx, bserrno);
7154 0 : return;
7155 : }
7156 :
7157 64 : ctx->original.blob = _blob;
7158 64 : ctx->original.md_ro = _blob->md_ro;
7159 :
7160 64 : if (_blob->locked_operation_in_progress) {
7161 0 : SPDK_DEBUGLOG(blob, "Cannot inflate blob - another operation in progress\n");
7162 0 : ctx->bserrno = -EBUSY;
7163 0 : spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
7164 0 : return;
7165 : }
7166 :
7167 64 : _blob->locked_operation_in_progress = true;
7168 :
7169 64 : switch (_blob->parent_id) {
7170 8 : case SPDK_BLOBID_INVALID:
7171 8 : if (!ctx->allocate_all) {
7172 : /* This blob has no parent, so we cannot decouple it. */
7173 4 : SPDK_ERRLOG("Cannot decouple parent of blob with no parent.\n");
7174 4 : bs_clone_snapshot_origblob_cleanup(ctx, -EINVAL);
7175 4 : return;
7176 : }
7177 4 : break;
7178 8 : case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
7179 : /*
7180 : * It would be better to rely on back_bs_dev->is_zeroes(), to determine which
7181 : * clusters require allocation. Until there is a blobstore consumer that
7182 : * uses esnaps with an spdk_bs_dev that implements a useful is_zeroes() it is not
7183 : * worth the effort.
7184 : */
7185 8 : ctx->allocate_all = true;
7186 8 : break;
7187 48 : default:
7188 48 : break;
7189 : }
7190 :
7191 60 : if (spdk_blob_is_thin_provisioned(_blob) == false) {
7192 : /* This is not thin provisioned blob. No need to inflate. */
7193 0 : bs_clone_snapshot_origblob_cleanup(ctx, 0);
7194 0 : return;
7195 : }
7196 :
7197 : /* Do two passes - one to verify that we can obtain enough clusters
7198 : * and another to actually claim them.
7199 : */
7200 60 : clusters_needed = 0;
7201 700 : for (i = 0; i < _blob->active.num_clusters; i++) {
7202 640 : if (bs_cluster_needs_allocation(_blob, i, ctx->allocate_all)) {
7203 452 : clusters_needed++;
7204 : }
7205 : }
7206 :
7207 60 : if (clusters_needed > _blob->bs->num_free_clusters) {
7208 : /* Not enough free clusters. Cannot satisfy the request. */
7209 0 : bs_clone_snapshot_origblob_cleanup(ctx, -ENOSPC);
7210 0 : return;
7211 : }
7212 :
7213 60 : ctx->cluster = 0;
7214 60 : bs_inflate_blob_touch_next(ctx, 0);
7215 : }
7216 :
7217 : static void
7218 64 : bs_inflate_blob(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
7219 : spdk_blob_id blobid, bool allocate_all, spdk_blob_op_complete cb_fn, void *cb_arg)
7220 : {
7221 64 : struct spdk_clone_snapshot_ctx *ctx = calloc(1, sizeof(*ctx));
7222 :
7223 64 : if (!ctx) {
7224 0 : cb_fn(cb_arg, -ENOMEM);
7225 0 : return;
7226 : }
7227 64 : ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
7228 64 : ctx->cpl.u.bs_basic.cb_fn = cb_fn;
7229 64 : ctx->cpl.u.bs_basic.cb_arg = cb_arg;
7230 64 : ctx->bserrno = 0;
7231 64 : ctx->original.id = blobid;
7232 64 : ctx->channel = channel;
7233 64 : ctx->allocate_all = allocate_all;
7234 :
7235 64 : spdk_bs_open_blob(bs, ctx->original.id, bs_inflate_blob_open_cpl, ctx);
7236 : }
7237 :
7238 : void
7239 28 : spdk_bs_inflate_blob(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
7240 : spdk_blob_id blobid, spdk_blob_op_complete cb_fn, void *cb_arg)
7241 : {
7242 28 : bs_inflate_blob(bs, channel, blobid, true, cb_fn, cb_arg);
7243 28 : }
7244 :
7245 : void
7246 36 : spdk_bs_blob_decouple_parent(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
7247 : spdk_blob_id blobid, spdk_blob_op_complete cb_fn, void *cb_arg)
7248 : {
7249 36 : bs_inflate_blob(bs, channel, blobid, false, cb_fn, cb_arg);
7250 36 : }
7251 : /* END spdk_bs_inflate_blob */
7252 :
7253 : /* START spdk_bs_blob_shallow_copy */
7254 :
7255 : struct shallow_copy_ctx {
7256 : struct spdk_bs_cpl cpl;
7257 : int bserrno;
7258 :
7259 : /* Blob source for copy */
7260 : struct spdk_blob_store *bs;
7261 : spdk_blob_id blobid;
7262 : struct spdk_blob *blob;
7263 : struct spdk_io_channel *blob_channel;
7264 :
7265 : /* Destination device for copy */
7266 : struct spdk_bs_dev *ext_dev;
7267 : struct spdk_io_channel *ext_channel;
7268 :
7269 : /* Current cluster for copy operation */
7270 : uint64_t cluster;
7271 :
7272 : /* Buffer for blob reading */
7273 : uint8_t *read_buff;
7274 :
7275 : /* Struct for external device writing */
7276 : struct spdk_bs_dev_cb_args ext_args;
7277 :
7278 : /* Actual number of copied clusters */
7279 : uint64_t copied_clusters_count;
7280 :
7281 : /* Status callback for updates about the ongoing operation */
7282 : spdk_blob_shallow_copy_status status_cb;
7283 :
7284 : /* Argument passed to function status_cb */
7285 : void *status_cb_arg;
7286 : };
7287 :
7288 : static void
7289 16 : bs_shallow_copy_cleanup_finish(void *cb_arg, int bserrno)
7290 : {
7291 16 : struct shallow_copy_ctx *ctx = cb_arg;
7292 16 : struct spdk_bs_cpl *cpl = &ctx->cpl;
7293 :
7294 16 : if (bserrno != 0) {
7295 0 : SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, cleanup error %d\n", ctx->blob->id, bserrno);
7296 0 : ctx->bserrno = bserrno;
7297 : }
7298 :
7299 16 : ctx->ext_dev->destroy_channel(ctx->ext_dev, ctx->ext_channel);
7300 16 : spdk_free(ctx->read_buff);
7301 :
7302 16 : cpl->u.blob_basic.cb_fn(cpl->u.blob_basic.cb_arg, ctx->bserrno);
7303 :
7304 16 : free(ctx);
7305 16 : }
7306 :
7307 : static void
7308 8 : bs_shallow_copy_bdev_write_cpl(struct spdk_io_channel *channel, void *cb_arg, int bserrno)
7309 : {
7310 8 : struct shallow_copy_ctx *ctx = cb_arg;
7311 8 : struct spdk_blob *_blob = ctx->blob;
7312 :
7313 8 : if (bserrno != 0) {
7314 0 : SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, ext dev write error %d\n", ctx->blob->id, bserrno);
7315 0 : ctx->bserrno = bserrno;
7316 0 : _blob->locked_operation_in_progress = false;
7317 0 : spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
7318 0 : return;
7319 : }
7320 :
7321 8 : ctx->cluster++;
7322 8 : if (ctx->status_cb) {
7323 8 : ctx->copied_clusters_count++;
7324 8 : ctx->status_cb(ctx->copied_clusters_count, ctx->status_cb_arg);
7325 : }
7326 :
7327 8 : bs_shallow_copy_cluster_find_next(ctx);
7328 : }
7329 :
7330 : static void
7331 8 : bs_shallow_copy_blob_read_cpl(void *cb_arg, int bserrno)
7332 : {
7333 8 : struct shallow_copy_ctx *ctx = cb_arg;
7334 8 : struct spdk_bs_dev *ext_dev = ctx->ext_dev;
7335 8 : struct spdk_blob *_blob = ctx->blob;
7336 :
7337 8 : if (bserrno != 0) {
7338 0 : SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, blob read error %d\n", ctx->blob->id, bserrno);
7339 0 : ctx->bserrno = bserrno;
7340 0 : _blob->locked_operation_in_progress = false;
7341 0 : spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
7342 0 : return;
7343 : }
7344 :
7345 8 : ctx->ext_args.channel = ctx->ext_channel;
7346 8 : ctx->ext_args.cb_fn = bs_shallow_copy_bdev_write_cpl;
7347 8 : ctx->ext_args.cb_arg = ctx;
7348 :
7349 8 : ext_dev->write(ext_dev, ctx->ext_channel, ctx->read_buff,
7350 8 : bs_cluster_to_lba(_blob->bs, ctx->cluster),
7351 8 : bs_dev_byte_to_lba(_blob->bs->dev, _blob->bs->cluster_sz),
7352 : &ctx->ext_args);
7353 : }
7354 :
7355 : static void
7356 12 : bs_shallow_copy_cluster_find_next(void *cb_arg)
7357 : {
7358 12 : struct shallow_copy_ctx *ctx = cb_arg;
7359 12 : struct spdk_blob *_blob = ctx->blob;
7360 :
7361 20 : while (ctx->cluster < _blob->active.num_clusters) {
7362 16 : if (_blob->active.clusters[ctx->cluster] != 0) {
7363 8 : break;
7364 : }
7365 :
7366 8 : ctx->cluster++;
7367 : }
7368 :
7369 12 : if (ctx->cluster < _blob->active.num_clusters) {
7370 8 : blob_request_submit_op_single(ctx->blob_channel, _blob, ctx->read_buff,
7371 8 : bs_cluster_to_lba(_blob->bs, ctx->cluster),
7372 8 : bs_dev_byte_to_lba(_blob->bs->dev, _blob->bs->cluster_sz),
7373 : bs_shallow_copy_blob_read_cpl, ctx, SPDK_BLOB_READ);
7374 : } else {
7375 4 : _blob->locked_operation_in_progress = false;
7376 4 : spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
7377 : }
7378 12 : }
7379 :
7380 : static void
7381 16 : bs_shallow_copy_blob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
7382 : {
7383 16 : struct shallow_copy_ctx *ctx = cb_arg;
7384 16 : struct spdk_bs_dev *ext_dev = ctx->ext_dev;
7385 : uint32_t blob_block_size;
7386 : uint64_t blob_total_size;
7387 :
7388 16 : if (bserrno != 0) {
7389 0 : SPDK_ERRLOG("Shallow copy blob open error %d\n", bserrno);
7390 0 : ctx->bserrno = bserrno;
7391 0 : bs_shallow_copy_cleanup_finish(ctx, 0);
7392 0 : return;
7393 : }
7394 :
7395 16 : if (!spdk_blob_is_read_only(_blob)) {
7396 4 : SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, blob must be read only\n", _blob->id);
7397 4 : ctx->bserrno = -EPERM;
7398 4 : spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
7399 4 : return;
7400 : }
7401 :
7402 12 : blob_block_size = _blob->bs->dev->blocklen;
7403 12 : blob_total_size = spdk_blob_get_num_clusters(_blob) * spdk_bs_get_cluster_size(_blob->bs);
7404 :
7405 12 : if (blob_total_size > ext_dev->blockcnt * ext_dev->blocklen) {
7406 4 : SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, external device must have at least blob size\n",
7407 : _blob->id);
7408 4 : ctx->bserrno = -EINVAL;
7409 4 : spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
7410 4 : return;
7411 : }
7412 :
7413 8 : if (blob_block_size % ext_dev->blocklen != 0) {
7414 4 : SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, external device block size is not compatible with \
7415 : blobstore block size\n", _blob->id);
7416 4 : ctx->bserrno = -EINVAL;
7417 4 : spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
7418 4 : return;
7419 : }
7420 :
7421 4 : ctx->blob = _blob;
7422 :
7423 4 : if (_blob->locked_operation_in_progress) {
7424 0 : SPDK_DEBUGLOG(blob, "blob 0x%" PRIx64 " shallow copy - another operation in progress\n", _blob->id);
7425 0 : ctx->bserrno = -EBUSY;
7426 0 : spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
7427 0 : return;
7428 : }
7429 :
7430 4 : _blob->locked_operation_in_progress = true;
7431 :
7432 4 : ctx->cluster = 0;
7433 4 : bs_shallow_copy_cluster_find_next(ctx);
7434 : }
7435 :
7436 : int
7437 16 : spdk_bs_blob_shallow_copy(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
7438 : spdk_blob_id blobid, struct spdk_bs_dev *ext_dev,
7439 : spdk_blob_shallow_copy_status status_cb_fn, void *status_cb_arg,
7440 : spdk_blob_op_complete cb_fn, void *cb_arg)
7441 : {
7442 : struct shallow_copy_ctx *ctx;
7443 : struct spdk_io_channel *ext_channel;
7444 :
7445 16 : ctx = calloc(1, sizeof(*ctx));
7446 16 : if (!ctx) {
7447 0 : return -ENOMEM;
7448 : }
7449 :
7450 16 : ctx->bs = bs;
7451 16 : ctx->blobid = blobid;
7452 16 : ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
7453 16 : ctx->cpl.u.bs_basic.cb_fn = cb_fn;
7454 16 : ctx->cpl.u.bs_basic.cb_arg = cb_arg;
7455 16 : ctx->bserrno = 0;
7456 16 : ctx->blob_channel = channel;
7457 16 : ctx->status_cb = status_cb_fn;
7458 16 : ctx->status_cb_arg = status_cb_arg;
7459 16 : ctx->read_buff = spdk_malloc(bs->cluster_sz, bs->dev->blocklen, NULL,
7460 : SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
7461 16 : if (!ctx->read_buff) {
7462 0 : free(ctx);
7463 0 : return -ENOMEM;
7464 : }
7465 :
7466 16 : ext_channel = ext_dev->create_channel(ext_dev);
7467 16 : if (!ext_channel) {
7468 0 : spdk_free(ctx->read_buff);
7469 0 : free(ctx);
7470 0 : return -ENOMEM;
7471 : }
7472 16 : ctx->ext_dev = ext_dev;
7473 16 : ctx->ext_channel = ext_channel;
7474 :
7475 16 : spdk_bs_open_blob(ctx->bs, ctx->blobid, bs_shallow_copy_blob_open_cpl, ctx);
7476 :
7477 16 : return 0;
7478 : }
7479 : /* END spdk_bs_blob_shallow_copy */
7480 :
7481 : /* START spdk_bs_blob_set_parent */
7482 :
7483 : struct set_parent_ctx {
7484 : struct spdk_blob_store *bs;
7485 : int bserrno;
7486 : spdk_bs_op_complete cb_fn;
7487 : void *cb_arg;
7488 :
7489 : struct spdk_blob *blob;
7490 : bool blob_md_ro;
7491 :
7492 : struct blob_parent parent;
7493 : };
7494 :
7495 : static void
7496 24 : bs_set_parent_cleanup_finish(void *cb_arg, int bserrno)
7497 : {
7498 24 : struct set_parent_ctx *ctx = cb_arg;
7499 :
7500 24 : assert(ctx != NULL);
7501 :
7502 24 : if (bserrno != 0) {
7503 0 : SPDK_ERRLOG("blob set parent finish error %d\n", bserrno);
7504 0 : if (ctx->bserrno == 0) {
7505 0 : ctx->bserrno = bserrno;
7506 : }
7507 : }
7508 :
7509 24 : ctx->cb_fn(ctx->cb_arg, ctx->bserrno);
7510 :
7511 24 : free(ctx);
7512 24 : }
7513 :
7514 : static void
7515 20 : bs_set_parent_close_snapshot(void *cb_arg, int bserrno)
7516 : {
7517 20 : struct set_parent_ctx *ctx = cb_arg;
7518 :
7519 20 : if (ctx->bserrno != 0) {
7520 8 : spdk_blob_close(ctx->parent.u.snapshot.blob, bs_set_parent_cleanup_finish, ctx);
7521 8 : return;
7522 : }
7523 :
7524 12 : if (bserrno != 0) {
7525 0 : SPDK_ERRLOG("blob close error %d\n", bserrno);
7526 0 : ctx->bserrno = bserrno;
7527 : }
7528 :
7529 12 : bs_set_parent_cleanup_finish(ctx, ctx->bserrno);
7530 : }
7531 :
7532 : static void
7533 12 : bs_set_parent_close_blob(void *cb_arg, int bserrno)
7534 : {
7535 12 : struct set_parent_ctx *ctx = cb_arg;
7536 12 : struct spdk_blob *blob = ctx->blob;
7537 12 : struct spdk_blob *snapshot = ctx->parent.u.snapshot.blob;
7538 :
7539 12 : if (bserrno != 0 && ctx->bserrno == 0) {
7540 0 : SPDK_ERRLOG("error %d in metadata sync\n", bserrno);
7541 0 : ctx->bserrno = bserrno;
7542 : }
7543 :
7544 : /* Revert md_ro to original state */
7545 12 : blob->md_ro = ctx->blob_md_ro;
7546 :
7547 12 : blob->locked_operation_in_progress = false;
7548 12 : snapshot->locked_operation_in_progress = false;
7549 :
7550 12 : spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
7551 12 : }
7552 :
7553 : static void
7554 12 : bs_set_parent_set_back_bs_dev_done(void *cb_arg, int bserrno)
7555 : {
7556 12 : struct set_parent_ctx *ctx = cb_arg;
7557 12 : struct spdk_blob *blob = ctx->blob;
7558 :
7559 12 : if (bserrno != 0) {
7560 0 : SPDK_ERRLOG("error %d setting back_bs_dev\n", bserrno);
7561 0 : ctx->bserrno = bserrno;
7562 0 : bs_set_parent_close_blob(ctx, bserrno);
7563 0 : return;
7564 : }
7565 :
7566 12 : spdk_blob_sync_md(blob, bs_set_parent_close_blob, ctx);
7567 : }
7568 :
7569 : static int
7570 12 : bs_set_parent_refs(struct spdk_blob *blob, struct blob_parent *parent)
7571 : {
7572 : int rc;
7573 :
7574 12 : bs_blob_list_remove(blob);
7575 :
7576 12 : rc = blob_set_xattr(blob, BLOB_SNAPSHOT, &parent->u.snapshot.id, sizeof(spdk_blob_id), true);
7577 12 : if (rc != 0) {
7578 0 : SPDK_ERRLOG("error %d setting snapshot xattr\n", rc);
7579 0 : return rc;
7580 : }
7581 12 : blob->parent_id = parent->u.snapshot.id;
7582 :
7583 12 : if (blob_is_esnap_clone(blob)) {
7584 : /* Remove the xattr that references the external snapshot */
7585 4 : blob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
7586 4 : blob_remove_xattr(blob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
7587 : }
7588 :
7589 12 : bs_blob_list_add(blob);
7590 :
7591 12 : return 0;
7592 : }
7593 :
7594 : static void
7595 20 : bs_set_parent_snapshot_open_cpl(void *cb_arg, struct spdk_blob *snapshot, int bserrno)
7596 : {
7597 20 : struct set_parent_ctx *ctx = cb_arg;
7598 20 : struct spdk_blob *blob = ctx->blob;
7599 : struct spdk_bs_dev *back_bs_dev;
7600 :
7601 20 : if (bserrno != 0) {
7602 0 : SPDK_ERRLOG("snapshot open error %d\n", bserrno);
7603 0 : ctx->bserrno = bserrno;
7604 0 : spdk_blob_close(blob, bs_set_parent_cleanup_finish, ctx);
7605 0 : return;
7606 : }
7607 :
7608 20 : ctx->parent.u.snapshot.blob = snapshot;
7609 20 : ctx->parent.u.snapshot.id = snapshot->id;
7610 :
7611 20 : if (!spdk_blob_is_snapshot(snapshot)) {
7612 4 : SPDK_ERRLOG("parent blob is not a snapshot\n");
7613 4 : ctx->bserrno = -EINVAL;
7614 4 : spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
7615 4 : return;
7616 : }
7617 :
7618 16 : if (blob->active.num_clusters != snapshot->active.num_clusters) {
7619 4 : SPDK_ERRLOG("parent blob has a number of clusters different from child's ones\n");
7620 4 : ctx->bserrno = -EINVAL;
7621 4 : spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
7622 4 : return;
7623 : }
7624 :
7625 12 : if (blob->locked_operation_in_progress || snapshot->locked_operation_in_progress) {
7626 0 : SPDK_ERRLOG("cannot set parent of blob, another operation in progress\n");
7627 0 : ctx->bserrno = -EBUSY;
7628 0 : spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
7629 0 : return;
7630 : }
7631 :
7632 12 : blob->locked_operation_in_progress = true;
7633 12 : snapshot->locked_operation_in_progress = true;
7634 :
7635 : /* Temporarily override md_ro flag for MD modification */
7636 12 : blob->md_ro = false;
7637 :
7638 12 : back_bs_dev = bs_create_blob_bs_dev(snapshot);
7639 :
7640 12 : blob_set_back_bs_dev(blob, back_bs_dev, bs_set_parent_refs, &ctx->parent,
7641 : bs_set_parent_set_back_bs_dev_done,
7642 : ctx);
7643 : }
7644 :
7645 : static void
7646 24 : bs_set_parent_blob_open_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno)
7647 : {
7648 24 : struct set_parent_ctx *ctx = cb_arg;
7649 :
7650 24 : if (bserrno != 0) {
7651 0 : SPDK_ERRLOG("blob open error %d\n", bserrno);
7652 0 : ctx->bserrno = bserrno;
7653 0 : bs_set_parent_cleanup_finish(ctx, 0);
7654 0 : return;
7655 : }
7656 :
7657 24 : if (!spdk_blob_is_thin_provisioned(blob)) {
7658 4 : SPDK_ERRLOG("blob is not thin-provisioned\n");
7659 4 : ctx->bserrno = -EINVAL;
7660 4 : spdk_blob_close(blob, bs_set_parent_cleanup_finish, ctx);
7661 4 : return;
7662 : }
7663 :
7664 20 : ctx->blob = blob;
7665 20 : ctx->blob_md_ro = blob->md_ro;
7666 :
7667 20 : spdk_bs_open_blob(ctx->bs, ctx->parent.u.snapshot.id, bs_set_parent_snapshot_open_cpl, ctx);
7668 : }
7669 :
7670 : void
7671 36 : spdk_bs_blob_set_parent(struct spdk_blob_store *bs, spdk_blob_id blob_id,
7672 : spdk_blob_id snapshot_id, spdk_blob_op_complete cb_fn, void *cb_arg)
7673 : {
7674 : struct set_parent_ctx *ctx;
7675 :
7676 36 : if (snapshot_id == SPDK_BLOBID_INVALID) {
7677 4 : SPDK_ERRLOG("snapshot id not valid\n");
7678 4 : cb_fn(cb_arg, -EINVAL);
7679 4 : return;
7680 : }
7681 :
7682 32 : if (blob_id == snapshot_id) {
7683 4 : SPDK_ERRLOG("blob id and snapshot id cannot be the same\n");
7684 4 : cb_fn(cb_arg, -EINVAL);
7685 4 : return;
7686 : }
7687 :
7688 28 : if (spdk_blob_get_parent_snapshot(bs, blob_id) == snapshot_id) {
7689 4 : SPDK_NOTICELOG("snapshot is already the parent of blob\n");
7690 4 : cb_fn(cb_arg, -EEXIST);
7691 4 : return;
7692 : }
7693 :
7694 24 : ctx = calloc(1, sizeof(*ctx));
7695 24 : if (!ctx) {
7696 0 : cb_fn(cb_arg, -ENOMEM);
7697 0 : return;
7698 : }
7699 :
7700 24 : ctx->bs = bs;
7701 24 : ctx->parent.u.snapshot.id = snapshot_id;
7702 24 : ctx->cb_fn = cb_fn;
7703 24 : ctx->cb_arg = cb_arg;
7704 24 : ctx->bserrno = 0;
7705 :
7706 24 : spdk_bs_open_blob(bs, blob_id, bs_set_parent_blob_open_cpl, ctx);
7707 : }
7708 : /* END spdk_bs_blob_set_parent */
7709 :
7710 : /* START spdk_bs_blob_set_external_parent */
7711 :
7712 : static void
7713 16 : bs_set_external_parent_cleanup_finish(void *cb_arg, int bserrno)
7714 : {
7715 16 : struct set_parent_ctx *ctx = cb_arg;
7716 :
7717 16 : if (bserrno != 0) {
7718 0 : SPDK_ERRLOG("blob set external parent finish error %d\n", bserrno);
7719 0 : if (ctx->bserrno == 0) {
7720 0 : ctx->bserrno = bserrno;
7721 : }
7722 : }
7723 :
7724 16 : ctx->cb_fn(ctx->cb_arg, ctx->bserrno);
7725 :
7726 16 : free(ctx->parent.u.esnap.id);
7727 16 : free(ctx);
7728 16 : }
7729 :
7730 : static void
7731 8 : bs_set_external_parent_close_blob(void *cb_arg, int bserrno)
7732 : {
7733 8 : struct set_parent_ctx *ctx = cb_arg;
7734 8 : struct spdk_blob *blob = ctx->blob;
7735 :
7736 8 : if (bserrno != 0 && ctx->bserrno == 0) {
7737 0 : SPDK_ERRLOG("error %d in metadata sync\n", bserrno);
7738 0 : ctx->bserrno = bserrno;
7739 : }
7740 :
7741 : /* Revert md_ro to original state */
7742 8 : blob->md_ro = ctx->blob_md_ro;
7743 :
7744 8 : blob->locked_operation_in_progress = false;
7745 :
7746 8 : spdk_blob_close(blob, bs_set_external_parent_cleanup_finish, ctx);
7747 8 : }
7748 :
7749 : static void
7750 8 : bs_set_external_parent_unfrozen(void *cb_arg, int bserrno)
7751 : {
7752 8 : struct set_parent_ctx *ctx = cb_arg;
7753 8 : struct spdk_blob *blob = ctx->blob;
7754 :
7755 8 : if (bserrno != 0) {
7756 0 : SPDK_ERRLOG("error %d setting back_bs_dev\n", bserrno);
7757 0 : ctx->bserrno = bserrno;
7758 0 : bs_set_external_parent_close_blob(ctx, bserrno);
7759 0 : return;
7760 : }
7761 :
7762 8 : spdk_blob_sync_md(blob, bs_set_external_parent_close_blob, ctx);
7763 : }
7764 :
7765 : static int
7766 8 : bs_set_external_parent_refs(struct spdk_blob *blob, struct blob_parent *parent)
7767 : {
7768 : int rc;
7769 :
7770 8 : bs_blob_list_remove(blob);
7771 :
7772 8 : if (spdk_blob_is_clone(blob)) {
7773 : /* Remove the xattr that references the snapshot */
7774 0 : blob->parent_id = SPDK_BLOBID_INVALID;
7775 0 : blob_remove_xattr(blob, BLOB_SNAPSHOT, true);
7776 : }
7777 :
7778 8 : rc = blob_set_xattr(blob, BLOB_EXTERNAL_SNAPSHOT_ID, parent->u.esnap.id,
7779 8 : parent->u.esnap.id_len, true);
7780 8 : if (rc != 0) {
7781 0 : SPDK_ERRLOG("error %d setting external snapshot xattr\n", rc);
7782 0 : return rc;
7783 : }
7784 8 : blob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
7785 8 : blob->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
7786 :
7787 8 : bs_blob_list_add(blob);
7788 :
7789 8 : return 0;
7790 : }
7791 :
7792 : static void
7793 16 : bs_set_external_parent_blob_open_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno)
7794 : {
7795 16 : struct set_parent_ctx *ctx = cb_arg;
7796 16 : const void *esnap_id;
7797 16 : size_t esnap_id_len;
7798 : int rc;
7799 :
7800 16 : if (bserrno != 0) {
7801 0 : SPDK_ERRLOG("blob open error %d\n", bserrno);
7802 0 : ctx->bserrno = bserrno;
7803 0 : bs_set_parent_cleanup_finish(ctx, 0);
7804 0 : return;
7805 : }
7806 :
7807 16 : ctx->blob = blob;
7808 16 : ctx->blob_md_ro = blob->md_ro;
7809 :
7810 16 : rc = spdk_blob_get_esnap_id(blob, &esnap_id, &esnap_id_len);
7811 16 : if (rc == 0 && esnap_id != NULL && esnap_id_len == ctx->parent.u.esnap.id_len &&
7812 4 : memcmp(esnap_id, ctx->parent.u.esnap.id, esnap_id_len) == 0) {
7813 4 : SPDK_ERRLOG("external snapshot is already the parent of blob\n");
7814 4 : ctx->bserrno = -EEXIST;
7815 4 : goto error;
7816 : }
7817 :
7818 12 : if (!spdk_blob_is_thin_provisioned(blob)) {
7819 4 : SPDK_ERRLOG("blob is not thin-provisioned\n");
7820 4 : ctx->bserrno = -EINVAL;
7821 4 : goto error;
7822 : }
7823 :
7824 8 : if (blob->locked_operation_in_progress) {
7825 0 : SPDK_ERRLOG("cannot set external parent of blob, another operation in progress\n");
7826 0 : ctx->bserrno = -EBUSY;
7827 0 : goto error;
7828 : }
7829 :
7830 8 : blob->locked_operation_in_progress = true;
7831 :
7832 : /* Temporarily override md_ro flag for MD modification */
7833 8 : blob->md_ro = false;
7834 :
7835 8 : blob_set_back_bs_dev(blob, ctx->parent.u.esnap.back_bs_dev, bs_set_external_parent_refs,
7836 : &ctx->parent, bs_set_external_parent_unfrozen, ctx);
7837 8 : return;
7838 :
7839 8 : error:
7840 8 : spdk_blob_close(blob, bs_set_external_parent_cleanup_finish, ctx);
7841 : }
7842 :
7843 : void
7844 24 : spdk_bs_blob_set_external_parent(struct spdk_blob_store *bs, spdk_blob_id blob_id,
7845 : struct spdk_bs_dev *esnap_bs_dev, const void *esnap_id,
7846 : uint32_t esnap_id_len, spdk_blob_op_complete cb_fn, void *cb_arg)
7847 : {
7848 : struct set_parent_ctx *ctx;
7849 : uint64_t esnap_dev_size, cluster_sz;
7850 :
7851 24 : if (sizeof(blob_id) == esnap_id_len && memcmp(&blob_id, esnap_id, sizeof(blob_id)) == 0) {
7852 4 : SPDK_ERRLOG("blob id and external snapshot id cannot be the same\n");
7853 4 : cb_fn(cb_arg, -EINVAL);
7854 4 : return;
7855 : }
7856 :
7857 20 : esnap_dev_size = esnap_bs_dev->blockcnt * esnap_bs_dev->blocklen;
7858 20 : cluster_sz = spdk_bs_get_cluster_size(bs);
7859 20 : if ((esnap_dev_size % cluster_sz) != 0) {
7860 4 : SPDK_ERRLOG("Esnap device size %" PRIu64 " is not an integer multiple of "
7861 : "cluster size %" PRIu64 "\n", esnap_dev_size, cluster_sz);
7862 4 : cb_fn(cb_arg, -EINVAL);
7863 4 : return;
7864 : }
7865 :
7866 16 : ctx = calloc(1, sizeof(*ctx));
7867 16 : if (!ctx) {
7868 0 : cb_fn(cb_arg, -ENOMEM);
7869 0 : return;
7870 : }
7871 :
7872 16 : ctx->parent.u.esnap.id = calloc(1, esnap_id_len);
7873 16 : if (!ctx->parent.u.esnap.id) {
7874 0 : free(ctx);
7875 0 : cb_fn(cb_arg, -ENOMEM);
7876 0 : return;
7877 : }
7878 :
7879 16 : ctx->bs = bs;
7880 16 : ctx->parent.u.esnap.back_bs_dev = esnap_bs_dev;
7881 16 : memcpy(ctx->parent.u.esnap.id, esnap_id, esnap_id_len);
7882 16 : ctx->parent.u.esnap.id_len = esnap_id_len;
7883 16 : ctx->cb_fn = cb_fn;
7884 16 : ctx->cb_arg = cb_arg;
7885 16 : ctx->bserrno = 0;
7886 :
7887 16 : spdk_bs_open_blob(bs, blob_id, bs_set_external_parent_blob_open_cpl, ctx);
7888 : }
7889 : /* END spdk_bs_blob_set_external_parent */
7890 :
7891 : /* START spdk_blob_resize */
7892 : struct spdk_bs_resize_ctx {
7893 : spdk_blob_op_complete cb_fn;
7894 : void *cb_arg;
7895 : struct spdk_blob *blob;
7896 : uint64_t sz;
7897 : int rc;
7898 : };
7899 :
7900 : static void
7901 202 : bs_resize_unfreeze_cpl(void *cb_arg, int rc)
7902 : {
7903 202 : struct spdk_bs_resize_ctx *ctx = (struct spdk_bs_resize_ctx *)cb_arg;
7904 :
7905 202 : if (rc != 0) {
7906 0 : SPDK_ERRLOG("Unfreeze failed, rc=%d\n", rc);
7907 : }
7908 :
7909 202 : if (ctx->rc != 0) {
7910 4 : SPDK_ERRLOG("Unfreeze failed, ctx->rc=%d\n", ctx->rc);
7911 4 : rc = ctx->rc;
7912 : }
7913 :
7914 202 : ctx->blob->locked_operation_in_progress = false;
7915 :
7916 202 : ctx->cb_fn(ctx->cb_arg, rc);
7917 202 : free(ctx);
7918 202 : }
7919 :
7920 : static void
7921 202 : bs_resize_freeze_cpl(void *cb_arg, int rc)
7922 : {
7923 202 : struct spdk_bs_resize_ctx *ctx = (struct spdk_bs_resize_ctx *)cb_arg;
7924 :
7925 202 : if (rc != 0) {
7926 0 : ctx->blob->locked_operation_in_progress = false;
7927 0 : ctx->cb_fn(ctx->cb_arg, rc);
7928 0 : free(ctx);
7929 0 : return;
7930 : }
7931 :
7932 202 : ctx->rc = blob_resize(ctx->blob, ctx->sz);
7933 :
7934 202 : blob_unfreeze_io(ctx->blob, bs_resize_unfreeze_cpl, ctx);
7935 : }
7936 :
7937 : void
7938 216 : spdk_blob_resize(struct spdk_blob *blob, uint64_t sz, spdk_blob_op_complete cb_fn, void *cb_arg)
7939 : {
7940 : struct spdk_bs_resize_ctx *ctx;
7941 :
7942 216 : blob_verify_md_op(blob);
7943 :
7944 216 : SPDK_DEBUGLOG(blob, "Resizing blob 0x%" PRIx64 " to %" PRIu64 " clusters\n", blob->id, sz);
7945 :
7946 216 : if (blob->md_ro) {
7947 4 : cb_fn(cb_arg, -EPERM);
7948 4 : return;
7949 : }
7950 :
7951 212 : if (sz == blob->active.num_clusters) {
7952 10 : cb_fn(cb_arg, 0);
7953 10 : return;
7954 : }
7955 :
7956 202 : if (blob->locked_operation_in_progress) {
7957 0 : cb_fn(cb_arg, -EBUSY);
7958 0 : return;
7959 : }
7960 :
7961 202 : ctx = calloc(1, sizeof(*ctx));
7962 202 : if (!ctx) {
7963 0 : cb_fn(cb_arg, -ENOMEM);
7964 0 : return;
7965 : }
7966 :
7967 202 : blob->locked_operation_in_progress = true;
7968 202 : ctx->cb_fn = cb_fn;
7969 202 : ctx->cb_arg = cb_arg;
7970 202 : ctx->blob = blob;
7971 202 : ctx->sz = sz;
7972 202 : blob_freeze_io(blob, bs_resize_freeze_cpl, ctx);
7973 : }
7974 :
7975 : /* END spdk_blob_resize */
7976 :
7977 :
7978 : /* START spdk_bs_delete_blob */
7979 :
7980 : static void
7981 1492 : bs_delete_close_cpl(void *cb_arg, int bserrno)
7982 : {
7983 1492 : spdk_bs_sequence_t *seq = cb_arg;
7984 :
7985 1492 : bs_sequence_finish(seq, bserrno);
7986 1492 : }
7987 :
7988 : static void
7989 1492 : bs_delete_persist_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
7990 : {
7991 1492 : struct spdk_blob *blob = cb_arg;
7992 :
7993 1492 : if (bserrno != 0) {
7994 : /*
7995 : * We already removed this blob from the blobstore tailq, so
7996 : * we need to free it here since this is the last reference
7997 : * to it.
7998 : */
7999 0 : blob_free(blob);
8000 0 : bs_delete_close_cpl(seq, bserrno);
8001 0 : return;
8002 : }
8003 :
8004 : /*
8005 : * This will immediately decrement the ref_count and call
8006 : * the completion routine since the metadata state is clean.
8007 : * By calling spdk_blob_close, we reduce the number of call
8008 : * points into code that touches the blob->open_ref count
8009 : * and the blobstore's blob list.
8010 : */
8011 1492 : spdk_blob_close(blob, bs_delete_close_cpl, seq);
8012 : }
8013 :
8014 : struct delete_snapshot_ctx {
8015 : struct spdk_blob_list *parent_snapshot_entry;
8016 : struct spdk_blob *snapshot;
8017 : struct spdk_blob_md_page *page;
8018 : bool snapshot_md_ro;
8019 : struct spdk_blob *clone;
8020 : bool clone_md_ro;
8021 : spdk_blob_op_with_handle_complete cb_fn;
8022 : void *cb_arg;
8023 : int bserrno;
8024 : uint32_t next_extent_page;
8025 : };
8026 :
8027 : static void
8028 110 : delete_blob_cleanup_finish(void *cb_arg, int bserrno)
8029 : {
8030 110 : struct delete_snapshot_ctx *ctx = cb_arg;
8031 :
8032 110 : if (bserrno != 0) {
8033 0 : SPDK_ERRLOG("Snapshot cleanup error %d\n", bserrno);
8034 : }
8035 :
8036 110 : assert(ctx != NULL);
8037 :
8038 110 : if (bserrno != 0 && ctx->bserrno == 0) {
8039 0 : ctx->bserrno = bserrno;
8040 : }
8041 :
8042 110 : ctx->cb_fn(ctx->cb_arg, ctx->snapshot, ctx->bserrno);
8043 110 : spdk_free(ctx->page);
8044 110 : free(ctx);
8045 110 : }
8046 :
8047 : static void
8048 22 : delete_snapshot_cleanup_snapshot(void *cb_arg, int bserrno)
8049 : {
8050 22 : struct delete_snapshot_ctx *ctx = cb_arg;
8051 :
8052 22 : if (bserrno != 0) {
8053 0 : ctx->bserrno = bserrno;
8054 0 : SPDK_ERRLOG("Clone cleanup error %d\n", bserrno);
8055 : }
8056 :
8057 22 : if (ctx->bserrno != 0) {
8058 22 : assert(blob_lookup(ctx->snapshot->bs, ctx->snapshot->id) == NULL);
8059 22 : RB_INSERT(spdk_blob_tree, &ctx->snapshot->bs->open_blobs, ctx->snapshot);
8060 22 : spdk_bit_array_set(ctx->snapshot->bs->open_blobids, ctx->snapshot->id);
8061 : }
8062 :
8063 22 : ctx->snapshot->locked_operation_in_progress = false;
8064 22 : ctx->snapshot->md_ro = ctx->snapshot_md_ro;
8065 :
8066 22 : spdk_blob_close(ctx->snapshot, delete_blob_cleanup_finish, ctx);
8067 22 : }
8068 :
8069 : static void
8070 12 : delete_snapshot_cleanup_clone(void *cb_arg, int bserrno)
8071 : {
8072 12 : struct delete_snapshot_ctx *ctx = cb_arg;
8073 :
8074 12 : ctx->clone->locked_operation_in_progress = false;
8075 12 : ctx->clone->md_ro = ctx->clone_md_ro;
8076 :
8077 12 : spdk_blob_close(ctx->clone, delete_snapshot_cleanup_snapshot, ctx);
8078 12 : }
8079 :
8080 : static void
8081 48 : delete_snapshot_unfreeze_cpl(void *cb_arg, int bserrno)
8082 : {
8083 48 : struct delete_snapshot_ctx *ctx = cb_arg;
8084 :
8085 48 : if (bserrno) {
8086 0 : ctx->bserrno = bserrno;
8087 0 : delete_snapshot_cleanup_clone(ctx, 0);
8088 0 : return;
8089 : }
8090 :
8091 48 : ctx->clone->locked_operation_in_progress = false;
8092 48 : spdk_blob_close(ctx->clone, delete_blob_cleanup_finish, ctx);
8093 : }
8094 :
8095 : static void
8096 52 : delete_snapshot_sync_snapshot_cpl(void *cb_arg, int bserrno)
8097 : {
8098 52 : struct delete_snapshot_ctx *ctx = cb_arg;
8099 52 : struct spdk_blob_list *parent_snapshot_entry = NULL;
8100 52 : struct spdk_blob_list *snapshot_entry = NULL;
8101 52 : struct spdk_blob_list *clone_entry = NULL;
8102 52 : struct spdk_blob_list *snapshot_clone_entry = NULL;
8103 :
8104 52 : if (bserrno) {
8105 4 : SPDK_ERRLOG("Failed to sync MD on blob\n");
8106 4 : ctx->bserrno = bserrno;
8107 4 : delete_snapshot_cleanup_clone(ctx, 0);
8108 4 : return;
8109 : }
8110 :
8111 : /* Get snapshot entry for the snapshot we want to remove */
8112 48 : snapshot_entry = bs_get_snapshot_entry(ctx->snapshot->bs, ctx->snapshot->id);
8113 :
8114 48 : assert(snapshot_entry != NULL);
8115 :
8116 : /* Remove clone entry in this snapshot (at this point there can be only one clone) */
8117 48 : clone_entry = TAILQ_FIRST(&snapshot_entry->clones);
8118 48 : assert(clone_entry != NULL);
8119 48 : TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
8120 48 : snapshot_entry->clone_count--;
8121 48 : assert(TAILQ_EMPTY(&snapshot_entry->clones));
8122 :
8123 48 : switch (ctx->snapshot->parent_id) {
8124 40 : case SPDK_BLOBID_INVALID:
8125 : case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
8126 : /* No parent snapshot - just remove clone entry */
8127 40 : free(clone_entry);
8128 40 : break;
8129 8 : default:
8130 : /* This snapshot is at the same time a clone of another snapshot - we need to
8131 : * update parent snapshot (remove current clone, add new one inherited from
8132 : * the snapshot that is being removed) */
8133 :
8134 : /* Get snapshot entry for parent snapshot and clone entry within that snapshot for
8135 : * snapshot that we are removing */
8136 8 : blob_get_snapshot_and_clone_entries(ctx->snapshot, &parent_snapshot_entry,
8137 : &snapshot_clone_entry);
8138 :
8139 : /* Switch clone entry in parent snapshot */
8140 8 : TAILQ_INSERT_TAIL(&parent_snapshot_entry->clones, clone_entry, link);
8141 8 : TAILQ_REMOVE(&parent_snapshot_entry->clones, snapshot_clone_entry, link);
8142 8 : free(snapshot_clone_entry);
8143 : }
8144 :
8145 : /* Restore md_ro flags */
8146 48 : ctx->clone->md_ro = ctx->clone_md_ro;
8147 48 : ctx->snapshot->md_ro = ctx->snapshot_md_ro;
8148 :
8149 48 : blob_unfreeze_io(ctx->clone, delete_snapshot_unfreeze_cpl, ctx);
8150 : }
8151 :
8152 : static void
8153 56 : delete_snapshot_sync_clone_cpl(void *cb_arg, int bserrno)
8154 : {
8155 56 : struct delete_snapshot_ctx *ctx = cb_arg;
8156 : uint64_t i;
8157 :
8158 56 : ctx->snapshot->md_ro = false;
8159 :
8160 56 : if (bserrno) {
8161 4 : SPDK_ERRLOG("Failed to sync MD on clone\n");
8162 4 : ctx->bserrno = bserrno;
8163 :
8164 : /* Restore snapshot to previous state */
8165 4 : bserrno = blob_remove_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, true);
8166 4 : if (bserrno != 0) {
8167 0 : delete_snapshot_cleanup_clone(ctx, bserrno);
8168 0 : return;
8169 : }
8170 :
8171 4 : spdk_blob_sync_md(ctx->snapshot, delete_snapshot_cleanup_clone, ctx);
8172 4 : return;
8173 : }
8174 :
8175 : /* Clear cluster map entries for snapshot */
8176 552 : for (i = 0; i < ctx->snapshot->active.num_clusters && i < ctx->clone->active.num_clusters; i++) {
8177 500 : if (ctx->clone->active.clusters[i] == ctx->snapshot->active.clusters[i]) {
8178 492 : if (ctx->snapshot->active.clusters[i] != 0) {
8179 328 : ctx->snapshot->active.num_allocated_clusters--;
8180 : }
8181 492 : ctx->snapshot->active.clusters[i] = 0;
8182 : }
8183 : }
8184 78 : for (i = 0; i < ctx->snapshot->active.num_extent_pages &&
8185 52 : i < ctx->clone->active.num_extent_pages; i++) {
8186 26 : if (ctx->clone->active.extent_pages[i] == ctx->snapshot->active.extent_pages[i]) {
8187 24 : ctx->snapshot->active.extent_pages[i] = 0;
8188 : }
8189 : }
8190 :
8191 52 : blob_set_thin_provision(ctx->snapshot);
8192 52 : ctx->snapshot->state = SPDK_BLOB_STATE_DIRTY;
8193 :
8194 52 : if (ctx->parent_snapshot_entry != NULL) {
8195 8 : ctx->snapshot->back_bs_dev = NULL;
8196 : }
8197 :
8198 52 : spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_cpl, ctx);
8199 : }
8200 :
8201 : static void
8202 56 : delete_snapshot_update_extent_pages_cpl(struct delete_snapshot_ctx *ctx)
8203 : {
8204 : int bserrno;
8205 :
8206 : /* Delete old backing bs_dev from clone (related to snapshot that will be removed) */
8207 56 : blob_back_bs_destroy(ctx->clone);
8208 :
8209 : /* Set/remove snapshot xattr and switch parent ID and backing bs_dev on clone... */
8210 56 : if (ctx->snapshot->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
8211 8 : bserrno = bs_snapshot_copy_xattr(ctx->clone, ctx->snapshot,
8212 : BLOB_EXTERNAL_SNAPSHOT_ID);
8213 8 : if (bserrno != 0) {
8214 0 : ctx->bserrno = bserrno;
8215 :
8216 : /* Restore snapshot to previous state */
8217 0 : bserrno = blob_remove_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, true);
8218 0 : if (bserrno != 0) {
8219 0 : delete_snapshot_cleanup_clone(ctx, bserrno);
8220 0 : return;
8221 : }
8222 :
8223 0 : spdk_blob_sync_md(ctx->snapshot, delete_snapshot_cleanup_clone, ctx);
8224 0 : return;
8225 : }
8226 8 : ctx->clone->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
8227 8 : ctx->clone->back_bs_dev = ctx->snapshot->back_bs_dev;
8228 : /* Do not delete the external snapshot along with this snapshot */
8229 8 : ctx->snapshot->back_bs_dev = NULL;
8230 8 : ctx->clone->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
8231 48 : } else if (ctx->parent_snapshot_entry != NULL) {
8232 : /* ...to parent snapshot */
8233 8 : ctx->clone->parent_id = ctx->parent_snapshot_entry->id;
8234 8 : ctx->clone->back_bs_dev = ctx->snapshot->back_bs_dev;
8235 8 : blob_set_xattr(ctx->clone, BLOB_SNAPSHOT, &ctx->parent_snapshot_entry->id,
8236 : sizeof(spdk_blob_id),
8237 : true);
8238 : } else {
8239 : /* ...to blobid invalid and zeroes dev */
8240 40 : ctx->clone->parent_id = SPDK_BLOBID_INVALID;
8241 40 : ctx->clone->back_bs_dev = bs_create_zeroes_dev();
8242 40 : blob_remove_xattr(ctx->clone, BLOB_SNAPSHOT, true);
8243 : }
8244 :
8245 56 : spdk_blob_sync_md(ctx->clone, delete_snapshot_sync_clone_cpl, ctx);
8246 : }
8247 :
8248 : static void
8249 58 : delete_snapshot_update_extent_pages(void *cb_arg, int bserrno)
8250 : {
8251 58 : struct delete_snapshot_ctx *ctx = cb_arg;
8252 : uint32_t *extent_page;
8253 : uint64_t i;
8254 :
8255 84 : for (i = ctx->next_extent_page; i < ctx->snapshot->active.num_extent_pages &&
8256 54 : i < ctx->clone->active.num_extent_pages; i++) {
8257 28 : if (ctx->snapshot->active.extent_pages[i] == 0) {
8258 : /* No extent page to use from snapshot */
8259 8 : continue;
8260 : }
8261 :
8262 20 : extent_page = &ctx->clone->active.extent_pages[i];
8263 20 : if (*extent_page == 0) {
8264 : /* Copy extent page from snapshot when clone did not have a matching one */
8265 18 : *extent_page = ctx->snapshot->active.extent_pages[i];
8266 18 : continue;
8267 : }
8268 :
8269 : /* Clone and snapshot both contain partially filled matching extent pages.
8270 : * Update the clone extent page in place with cluster map containing the mix of both. */
8271 2 : ctx->next_extent_page = i + 1;
8272 2 : memset(ctx->page, 0, SPDK_BS_PAGE_SIZE);
8273 :
8274 2 : blob_write_extent_page(ctx->clone, *extent_page, i * SPDK_EXTENTS_PER_EP, ctx->page,
8275 : delete_snapshot_update_extent_pages, ctx);
8276 2 : return;
8277 : }
8278 56 : delete_snapshot_update_extent_pages_cpl(ctx);
8279 : }
8280 :
8281 : static void
8282 60 : delete_snapshot_sync_snapshot_xattr_cpl(void *cb_arg, int bserrno)
8283 : {
8284 60 : struct delete_snapshot_ctx *ctx = cb_arg;
8285 : uint64_t i;
8286 :
8287 : /* Temporarily override md_ro flag for clone for MD modification */
8288 60 : ctx->clone_md_ro = ctx->clone->md_ro;
8289 60 : ctx->clone->md_ro = false;
8290 :
8291 60 : if (bserrno) {
8292 4 : SPDK_ERRLOG("Failed to sync MD with xattr on blob\n");
8293 4 : ctx->bserrno = bserrno;
8294 4 : delete_snapshot_cleanup_clone(ctx, 0);
8295 4 : return;
8296 : }
8297 :
8298 : /* Copy snapshot map to clone map (only unallocated clusters in clone) */
8299 596 : for (i = 0; i < ctx->snapshot->active.num_clusters && i < ctx->clone->active.num_clusters; i++) {
8300 540 : if (ctx->clone->active.clusters[i] == 0) {
8301 532 : ctx->clone->active.clusters[i] = ctx->snapshot->active.clusters[i];
8302 532 : if (ctx->clone->active.clusters[i] != 0) {
8303 368 : ctx->clone->active.num_allocated_clusters++;
8304 : }
8305 : }
8306 : }
8307 56 : ctx->next_extent_page = 0;
8308 56 : delete_snapshot_update_extent_pages(ctx, 0);
8309 : }
8310 :
8311 : static void
8312 8 : delete_snapshot_esnap_channels_destroyed_cb(void *cb_arg, struct spdk_blob *blob, int bserrno)
8313 : {
8314 8 : struct delete_snapshot_ctx *ctx = cb_arg;
8315 :
8316 8 : if (bserrno != 0) {
8317 0 : SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to destroy esnap channels: %d\n",
8318 : blob->id, bserrno);
8319 : /* That error should not stop us from syncing metadata. */
8320 : }
8321 :
8322 8 : spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_xattr_cpl, ctx);
8323 8 : }
8324 :
8325 : static void
8326 60 : delete_snapshot_freeze_io_cb(void *cb_arg, int bserrno)
8327 : {
8328 60 : struct delete_snapshot_ctx *ctx = cb_arg;
8329 :
8330 60 : if (bserrno) {
8331 0 : SPDK_ERRLOG("Failed to freeze I/O on clone\n");
8332 0 : ctx->bserrno = bserrno;
8333 0 : delete_snapshot_cleanup_clone(ctx, 0);
8334 0 : return;
8335 : }
8336 :
8337 : /* Temporarily override md_ro flag for snapshot for MD modification */
8338 60 : ctx->snapshot_md_ro = ctx->snapshot->md_ro;
8339 60 : ctx->snapshot->md_ro = false;
8340 :
8341 : /* Mark blob as pending for removal for power failure safety, use clone id for recovery */
8342 60 : ctx->bserrno = blob_set_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, &ctx->clone->id,
8343 : sizeof(spdk_blob_id), true);
8344 60 : if (ctx->bserrno != 0) {
8345 0 : delete_snapshot_cleanup_clone(ctx, 0);
8346 0 : return;
8347 : }
8348 :
8349 60 : if (blob_is_esnap_clone(ctx->snapshot)) {
8350 8 : blob_esnap_destroy_bs_dev_channels(ctx->snapshot, false,
8351 : delete_snapshot_esnap_channels_destroyed_cb,
8352 : ctx);
8353 8 : return;
8354 : }
8355 :
8356 52 : spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_xattr_cpl, ctx);
8357 : }
8358 :
8359 : static void
8360 70 : delete_snapshot_open_clone_cb(void *cb_arg, struct spdk_blob *clone, int bserrno)
8361 : {
8362 70 : struct delete_snapshot_ctx *ctx = cb_arg;
8363 :
8364 70 : if (bserrno) {
8365 10 : SPDK_ERRLOG("Failed to open clone\n");
8366 10 : ctx->bserrno = bserrno;
8367 10 : delete_snapshot_cleanup_snapshot(ctx, 0);
8368 10 : return;
8369 : }
8370 :
8371 60 : ctx->clone = clone;
8372 :
8373 60 : if (clone->locked_operation_in_progress) {
8374 0 : SPDK_DEBUGLOG(blob, "Cannot remove blob - another operation in progress on its clone\n");
8375 0 : ctx->bserrno = -EBUSY;
8376 0 : spdk_blob_close(ctx->clone, delete_snapshot_cleanup_snapshot, ctx);
8377 0 : return;
8378 : }
8379 :
8380 60 : clone->locked_operation_in_progress = true;
8381 :
8382 60 : blob_freeze_io(clone, delete_snapshot_freeze_io_cb, ctx);
8383 : }
8384 :
8385 : static void
8386 70 : update_clone_on_snapshot_deletion(struct spdk_blob *snapshot, struct delete_snapshot_ctx *ctx)
8387 : {
8388 70 : struct spdk_blob_list *snapshot_entry = NULL;
8389 70 : struct spdk_blob_list *clone_entry = NULL;
8390 70 : struct spdk_blob_list *snapshot_clone_entry = NULL;
8391 :
8392 : /* Get snapshot entry for the snapshot we want to remove */
8393 70 : snapshot_entry = bs_get_snapshot_entry(snapshot->bs, snapshot->id);
8394 :
8395 70 : assert(snapshot_entry != NULL);
8396 :
8397 : /* Get clone of the snapshot (at this point there can be only one clone) */
8398 70 : clone_entry = TAILQ_FIRST(&snapshot_entry->clones);
8399 70 : assert(snapshot_entry->clone_count == 1);
8400 70 : assert(clone_entry != NULL);
8401 :
8402 : /* Get snapshot entry for parent snapshot and clone entry within that snapshot for
8403 : * snapshot that we are removing */
8404 70 : blob_get_snapshot_and_clone_entries(snapshot, &ctx->parent_snapshot_entry,
8405 : &snapshot_clone_entry);
8406 :
8407 70 : spdk_bs_open_blob(snapshot->bs, clone_entry->id, delete_snapshot_open_clone_cb, ctx);
8408 70 : }
8409 :
8410 : static void
8411 1554 : bs_delete_blob_finish(void *cb_arg, struct spdk_blob *blob, int bserrno)
8412 : {
8413 1554 : spdk_bs_sequence_t *seq = cb_arg;
8414 1554 : struct spdk_blob_list *snapshot_entry = NULL;
8415 : uint32_t page_num;
8416 :
8417 1554 : if (bserrno) {
8418 62 : SPDK_ERRLOG("Failed to remove blob\n");
8419 62 : bs_sequence_finish(seq, bserrno);
8420 62 : return;
8421 : }
8422 :
8423 : /* Remove snapshot from the list */
8424 1492 : snapshot_entry = bs_get_snapshot_entry(blob->bs, blob->id);
8425 1492 : if (snapshot_entry != NULL) {
8426 144 : TAILQ_REMOVE(&blob->bs->snapshots, snapshot_entry, link);
8427 144 : free(snapshot_entry);
8428 : }
8429 :
8430 1492 : page_num = bs_blobid_to_page(blob->id);
8431 1492 : spdk_bit_array_clear(blob->bs->used_blobids, page_num);
8432 1492 : blob->state = SPDK_BLOB_STATE_DIRTY;
8433 1492 : blob->active.num_pages = 0;
8434 1492 : blob_resize(blob, 0);
8435 :
8436 1492 : blob_persist(seq, blob, bs_delete_persist_cpl, blob);
8437 : }
8438 :
8439 : static int
8440 1554 : bs_is_blob_deletable(struct spdk_blob *blob, bool *update_clone)
8441 : {
8442 1554 : struct spdk_blob_list *snapshot_entry = NULL;
8443 1554 : struct spdk_blob_list *clone_entry = NULL;
8444 1554 : struct spdk_blob *clone = NULL;
8445 1554 : bool has_one_clone = false;
8446 :
8447 : /* Check if this is a snapshot with clones */
8448 1554 : snapshot_entry = bs_get_snapshot_entry(blob->bs, blob->id);
8449 1554 : if (snapshot_entry != NULL) {
8450 194 : if (snapshot_entry->clone_count > 1) {
8451 24 : SPDK_ERRLOG("Cannot remove snapshot with more than one clone\n");
8452 24 : return -EBUSY;
8453 170 : } else if (snapshot_entry->clone_count == 1) {
8454 70 : has_one_clone = true;
8455 : }
8456 : }
8457 :
8458 : /* Check if someone has this blob open (besides this delete context):
8459 : * - open_ref = 1 - only this context opened blob, so it is ok to remove it
8460 : * - open_ref <= 2 && has_one_clone = true - clone is holding snapshot
8461 : * and that is ok, because we will update it accordingly */
8462 1530 : if (blob->open_ref <= 2 && has_one_clone) {
8463 70 : clone_entry = TAILQ_FIRST(&snapshot_entry->clones);
8464 70 : assert(clone_entry != NULL);
8465 70 : clone = blob_lookup(blob->bs, clone_entry->id);
8466 :
8467 70 : if (blob->open_ref == 2 && clone == NULL) {
8468 : /* Clone is closed and someone else opened this blob */
8469 0 : SPDK_ERRLOG("Cannot remove snapshot because it is open\n");
8470 0 : return -EBUSY;
8471 : }
8472 :
8473 70 : *update_clone = true;
8474 70 : return 0;
8475 : }
8476 :
8477 1460 : if (blob->open_ref > 1) {
8478 16 : SPDK_ERRLOG("Cannot remove snapshot because it is open\n");
8479 16 : return -EBUSY;
8480 : }
8481 :
8482 1444 : assert(has_one_clone == false);
8483 1444 : *update_clone = false;
8484 1444 : return 0;
8485 : }
8486 :
8487 : static void
8488 0 : bs_delete_enomem_close_cpl(void *cb_arg, int bserrno)
8489 : {
8490 0 : spdk_bs_sequence_t *seq = cb_arg;
8491 :
8492 0 : bs_sequence_finish(seq, -ENOMEM);
8493 0 : }
8494 :
8495 : static void
8496 1564 : bs_delete_open_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno)
8497 : {
8498 1564 : spdk_bs_sequence_t *seq = cb_arg;
8499 : struct delete_snapshot_ctx *ctx;
8500 1564 : bool update_clone = false;
8501 :
8502 1564 : if (bserrno != 0) {
8503 10 : bs_sequence_finish(seq, bserrno);
8504 10 : return;
8505 : }
8506 :
8507 1554 : blob_verify_md_op(blob);
8508 :
8509 1554 : ctx = calloc(1, sizeof(*ctx));
8510 1554 : if (ctx == NULL) {
8511 0 : spdk_blob_close(blob, bs_delete_enomem_close_cpl, seq);
8512 0 : return;
8513 : }
8514 :
8515 1554 : ctx->snapshot = blob;
8516 1554 : ctx->cb_fn = bs_delete_blob_finish;
8517 1554 : ctx->cb_arg = seq;
8518 :
8519 : /* Check if blob can be removed and if it is a snapshot with clone on top of it */
8520 1554 : ctx->bserrno = bs_is_blob_deletable(blob, &update_clone);
8521 1554 : if (ctx->bserrno) {
8522 40 : spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
8523 40 : return;
8524 : }
8525 :
8526 1514 : if (blob->locked_operation_in_progress) {
8527 0 : SPDK_DEBUGLOG(blob, "Cannot remove blob - another operation in progress\n");
8528 0 : ctx->bserrno = -EBUSY;
8529 0 : spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
8530 0 : return;
8531 : }
8532 :
8533 1514 : blob->locked_operation_in_progress = true;
8534 :
8535 : /*
8536 : * Remove the blob from the blob_store list now, to ensure it does not
8537 : * get returned after this point by blob_lookup().
8538 : */
8539 1514 : spdk_bit_array_clear(blob->bs->open_blobids, blob->id);
8540 1514 : RB_REMOVE(spdk_blob_tree, &blob->bs->open_blobs, blob);
8541 :
8542 1514 : if (update_clone) {
8543 70 : ctx->page = spdk_zmalloc(SPDK_BS_PAGE_SIZE, 0, NULL, SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
8544 70 : if (!ctx->page) {
8545 0 : ctx->bserrno = -ENOMEM;
8546 0 : spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
8547 0 : return;
8548 : }
8549 : /* This blob is a snapshot with active clone - update clone first */
8550 70 : update_clone_on_snapshot_deletion(blob, ctx);
8551 : } else {
8552 : /* This blob does not have any clones - just remove it */
8553 1444 : bs_blob_list_remove(blob);
8554 1444 : bs_delete_blob_finish(seq, blob, 0);
8555 1444 : free(ctx);
8556 : }
8557 : }
8558 :
8559 : void
8560 1564 : spdk_bs_delete_blob(struct spdk_blob_store *bs, spdk_blob_id blobid,
8561 : spdk_blob_op_complete cb_fn, void *cb_arg)
8562 : {
8563 1564 : struct spdk_bs_cpl cpl;
8564 : spdk_bs_sequence_t *seq;
8565 :
8566 1564 : SPDK_DEBUGLOG(blob, "Deleting blob 0x%" PRIx64 "\n", blobid);
8567 :
8568 1564 : assert(spdk_get_thread() == bs->md_thread);
8569 :
8570 1564 : cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
8571 1564 : cpl.u.blob_basic.cb_fn = cb_fn;
8572 1564 : cpl.u.blob_basic.cb_arg = cb_arg;
8573 :
8574 1564 : seq = bs_sequence_start_bs(bs->md_channel, &cpl);
8575 1564 : if (!seq) {
8576 0 : cb_fn(cb_arg, -ENOMEM);
8577 0 : return;
8578 : }
8579 :
8580 1564 : spdk_bs_open_blob(bs, blobid, bs_delete_open_cpl, seq);
8581 : }
8582 :
8583 : /* END spdk_bs_delete_blob */
8584 :
8585 : /* START spdk_bs_open_blob */
8586 :
8587 : static void
8588 3474 : bs_open_blob_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
8589 : {
8590 3474 : struct spdk_blob *blob = cb_arg;
8591 : struct spdk_blob *existing;
8592 :
8593 3474 : if (bserrno != 0) {
8594 64 : blob_free(blob);
8595 64 : seq->cpl.u.blob_handle.blob = NULL;
8596 64 : bs_sequence_finish(seq, bserrno);
8597 64 : return;
8598 : }
8599 :
8600 3410 : existing = blob_lookup(blob->bs, blob->id);
8601 3410 : if (existing) {
8602 4 : blob_free(blob);
8603 4 : existing->open_ref++;
8604 4 : seq->cpl.u.blob_handle.blob = existing;
8605 4 : bs_sequence_finish(seq, 0);
8606 4 : return;
8607 : }
8608 :
8609 3406 : blob->open_ref++;
8610 :
8611 3406 : spdk_bit_array_set(blob->bs->open_blobids, blob->id);
8612 3406 : RB_INSERT(spdk_blob_tree, &blob->bs->open_blobs, blob);
8613 :
8614 3406 : bs_sequence_finish(seq, bserrno);
8615 : }
8616 :
8617 : static inline void
8618 4 : blob_open_opts_copy(const struct spdk_blob_open_opts *src, struct spdk_blob_open_opts *dst)
8619 : {
8620 : #define FIELD_OK(field) \
8621 : offsetof(struct spdk_blob_open_opts, field) + sizeof(src->field) <= src->opts_size
8622 :
8623 : #define SET_FIELD(field) \
8624 : if (FIELD_OK(field)) { \
8625 : dst->field = src->field; \
8626 : } \
8627 :
8628 4 : SET_FIELD(clear_method);
8629 4 : SET_FIELD(esnap_ctx);
8630 :
8631 4 : dst->opts_size = src->opts_size;
8632 :
8633 : /* You should not remove this statement, but need to update the assert statement
8634 : * if you add a new field, and also add a corresponding SET_FIELD statement */
8635 : SPDK_STATIC_ASSERT(sizeof(struct spdk_blob_open_opts) == 24, "Incorrect size");
8636 :
8637 : #undef FIELD_OK
8638 : #undef SET_FIELD
8639 4 : }
8640 :
8641 : static void
8642 4279 : bs_open_blob(struct spdk_blob_store *bs,
8643 : spdk_blob_id blobid,
8644 : struct spdk_blob_open_opts *opts,
8645 : spdk_blob_op_with_handle_complete cb_fn,
8646 : void *cb_arg)
8647 : {
8648 : struct spdk_blob *blob;
8649 4279 : struct spdk_bs_cpl cpl;
8650 4279 : struct spdk_blob_open_opts opts_local;
8651 : spdk_bs_sequence_t *seq;
8652 : uint32_t page_num;
8653 :
8654 4279 : SPDK_DEBUGLOG(blob, "Opening blob 0x%" PRIx64 "\n", blobid);
8655 4279 : assert(spdk_get_thread() == bs->md_thread);
8656 :
8657 4279 : page_num = bs_blobid_to_page(blobid);
8658 4279 : if (spdk_bit_array_get(bs->used_blobids, page_num) == false) {
8659 : /* Invalid blobid */
8660 48 : cb_fn(cb_arg, NULL, -ENOENT);
8661 48 : return;
8662 : }
8663 :
8664 4231 : blob = blob_lookup(bs, blobid);
8665 4231 : if (blob) {
8666 757 : blob->open_ref++;
8667 757 : cb_fn(cb_arg, blob, 0);
8668 757 : return;
8669 : }
8670 :
8671 3474 : blob = blob_alloc(bs, blobid);
8672 3474 : if (!blob) {
8673 0 : cb_fn(cb_arg, NULL, -ENOMEM);
8674 0 : return;
8675 : }
8676 :
8677 3474 : spdk_blob_open_opts_init(&opts_local, sizeof(opts_local));
8678 3474 : if (opts) {
8679 4 : blob_open_opts_copy(opts, &opts_local);
8680 : }
8681 :
8682 3474 : blob->clear_method = opts_local.clear_method;
8683 :
8684 3474 : cpl.type = SPDK_BS_CPL_TYPE_BLOB_HANDLE;
8685 3474 : cpl.u.blob_handle.cb_fn = cb_fn;
8686 3474 : cpl.u.blob_handle.cb_arg = cb_arg;
8687 3474 : cpl.u.blob_handle.blob = blob;
8688 3474 : cpl.u.blob_handle.esnap_ctx = opts_local.esnap_ctx;
8689 :
8690 3474 : seq = bs_sequence_start_bs(bs->md_channel, &cpl);
8691 3474 : if (!seq) {
8692 0 : blob_free(blob);
8693 0 : cb_fn(cb_arg, NULL, -ENOMEM);
8694 0 : return;
8695 : }
8696 :
8697 3474 : blob_load(seq, blob, bs_open_blob_cpl, blob);
8698 : }
8699 :
8700 : void
8701 4275 : spdk_bs_open_blob(struct spdk_blob_store *bs, spdk_blob_id blobid,
8702 : spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
8703 : {
8704 4275 : bs_open_blob(bs, blobid, NULL, cb_fn, cb_arg);
8705 4275 : }
8706 :
8707 : void
8708 4 : spdk_bs_open_blob_ext(struct spdk_blob_store *bs, spdk_blob_id blobid,
8709 : struct spdk_blob_open_opts *opts, spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
8710 : {
8711 4 : bs_open_blob(bs, blobid, opts, cb_fn, cb_arg);
8712 4 : }
8713 :
8714 : /* END spdk_bs_open_blob */
8715 :
8716 : /* START spdk_blob_set_read_only */
8717 : int
8718 236 : spdk_blob_set_read_only(struct spdk_blob *blob)
8719 : {
8720 236 : blob_verify_md_op(blob);
8721 :
8722 236 : blob->data_ro_flags |= SPDK_BLOB_READ_ONLY;
8723 :
8724 236 : blob->state = SPDK_BLOB_STATE_DIRTY;
8725 236 : return 0;
8726 : }
8727 : /* END spdk_blob_set_read_only */
8728 :
8729 : /* START spdk_blob_sync_md */
8730 :
8731 : static void
8732 1607 : blob_sync_md_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
8733 : {
8734 1607 : struct spdk_blob *blob = cb_arg;
8735 :
8736 1607 : if (bserrno == 0 && (blob->data_ro_flags & SPDK_BLOB_READ_ONLY)) {
8737 404 : blob->data_ro = true;
8738 404 : blob->md_ro = true;
8739 : }
8740 :
8741 1607 : bs_sequence_finish(seq, bserrno);
8742 1607 : }
8743 :
8744 : static void
8745 1607 : blob_sync_md(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
8746 : {
8747 1607 : struct spdk_bs_cpl cpl;
8748 : spdk_bs_sequence_t *seq;
8749 :
8750 1607 : cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
8751 1607 : cpl.u.blob_basic.cb_fn = cb_fn;
8752 1607 : cpl.u.blob_basic.cb_arg = cb_arg;
8753 :
8754 1607 : seq = bs_sequence_start_bs(blob->bs->md_channel, &cpl);
8755 1607 : if (!seq) {
8756 0 : cb_fn(cb_arg, -ENOMEM);
8757 0 : return;
8758 : }
8759 :
8760 1607 : blob_persist(seq, blob, blob_sync_md_cpl, blob);
8761 : }
8762 :
8763 : void
8764 1097 : spdk_blob_sync_md(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
8765 : {
8766 1097 : blob_verify_md_op(blob);
8767 :
8768 1097 : SPDK_DEBUGLOG(blob, "Syncing blob 0x%" PRIx64 "\n", blob->id);
8769 :
8770 1097 : if (blob->md_ro) {
8771 4 : assert(blob->state == SPDK_BLOB_STATE_CLEAN);
8772 4 : cb_fn(cb_arg, 0);
8773 4 : return;
8774 : }
8775 :
8776 1093 : blob_sync_md(blob, cb_fn, cb_arg);
8777 : }
8778 :
8779 : /* END spdk_blob_sync_md */
8780 :
8781 : struct spdk_blob_cluster_op_ctx {
8782 : struct spdk_thread *thread;
8783 : struct spdk_blob *blob;
8784 : uint32_t cluster_num; /* cluster index in blob */
8785 : uint32_t cluster; /* cluster on disk */
8786 : uint32_t extent_page; /* extent page on disk */
8787 : struct spdk_blob_md_page *page; /* preallocated extent page */
8788 : int rc;
8789 : spdk_blob_op_complete cb_fn;
8790 : void *cb_arg;
8791 : };
8792 :
8793 : static void
8794 876 : blob_op_cluster_msg_cpl(void *arg)
8795 : {
8796 876 : struct spdk_blob_cluster_op_ctx *ctx = arg;
8797 :
8798 876 : ctx->cb_fn(ctx->cb_arg, ctx->rc);
8799 876 : free(ctx);
8800 876 : }
8801 :
8802 : static void
8803 846 : blob_op_cluster_msg_cb(void *arg, int bserrno)
8804 : {
8805 846 : struct spdk_blob_cluster_op_ctx *ctx = arg;
8806 :
8807 846 : ctx->rc = bserrno;
8808 846 : spdk_thread_send_msg(ctx->thread, blob_op_cluster_msg_cpl, ctx);
8809 846 : }
8810 :
8811 : static void
8812 82 : blob_insert_new_ep_cb(void *arg, int bserrno)
8813 : {
8814 82 : struct spdk_blob_cluster_op_ctx *ctx = arg;
8815 : uint32_t *extent_page;
8816 :
8817 82 : extent_page = bs_cluster_to_extent_page(ctx->blob, ctx->cluster_num);
8818 82 : *extent_page = ctx->extent_page;
8819 82 : ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
8820 82 : blob_sync_md(ctx->blob, blob_op_cluster_msg_cb, ctx);
8821 82 : }
8822 :
8823 : struct spdk_blob_write_extent_page_ctx {
8824 : struct spdk_blob_store *bs;
8825 :
8826 : uint32_t extent;
8827 : struct spdk_blob_md_page *page;
8828 : };
8829 :
8830 : static void
8831 26 : blob_free_cluster_msg_cb(void *arg, int bserrno)
8832 : {
8833 26 : struct spdk_blob_cluster_op_ctx *ctx = arg;
8834 :
8835 26 : spdk_spin_lock(&ctx->blob->bs->used_lock);
8836 26 : bs_release_cluster(ctx->blob->bs, ctx->cluster);
8837 26 : spdk_spin_unlock(&ctx->blob->bs->used_lock);
8838 :
8839 26 : ctx->rc = bserrno;
8840 26 : spdk_thread_send_msg(ctx->thread, blob_op_cluster_msg_cpl, ctx);
8841 26 : }
8842 :
8843 : static void
8844 26 : blob_free_cluster_update_ep_cb(void *arg, int bserrno)
8845 : {
8846 26 : struct spdk_blob_cluster_op_ctx *ctx = arg;
8847 :
8848 26 : if (bserrno != 0 || ctx->blob->bs->clean == 0) {
8849 26 : blob_free_cluster_msg_cb(ctx, bserrno);
8850 26 : return;
8851 : }
8852 :
8853 0 : ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
8854 0 : blob_sync_md(ctx->blob, blob_free_cluster_msg_cb, ctx);
8855 : }
8856 :
8857 : static void
8858 0 : blob_free_cluster_free_ep_cb(void *arg, int bserrno)
8859 : {
8860 0 : struct spdk_blob_cluster_op_ctx *ctx = arg;
8861 :
8862 0 : spdk_spin_lock(&ctx->blob->bs->used_lock);
8863 0 : assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
8864 0 : bs_release_md_page(ctx->blob->bs, ctx->extent_page);
8865 0 : spdk_spin_unlock(&ctx->blob->bs->used_lock);
8866 0 : ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
8867 0 : blob_sync_md(ctx->blob, blob_free_cluster_msg_cb, ctx);
8868 0 : }
8869 :
8870 : static void
8871 434 : blob_persist_extent_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
8872 : {
8873 434 : struct spdk_blob_write_extent_page_ctx *ctx = cb_arg;
8874 :
8875 434 : free(ctx);
8876 434 : bs_sequence_finish(seq, bserrno);
8877 434 : }
8878 :
8879 : static void
8880 434 : blob_write_extent_page_ready(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
8881 : {
8882 434 : struct spdk_blob_write_extent_page_ctx *ctx = cb_arg;
8883 :
8884 434 : if (bserrno != 0) {
8885 0 : blob_persist_extent_page_cpl(seq, ctx, bserrno);
8886 0 : return;
8887 : }
8888 434 : bs_sequence_write_dev(seq, ctx->page, bs_md_page_to_lba(ctx->bs, ctx->extent),
8889 434 : bs_byte_to_lba(ctx->bs, SPDK_BS_PAGE_SIZE),
8890 : blob_persist_extent_page_cpl, ctx);
8891 : }
8892 :
8893 : static void
8894 434 : blob_write_extent_page(struct spdk_blob *blob, uint32_t extent, uint64_t cluster_num,
8895 : struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg)
8896 : {
8897 : struct spdk_blob_write_extent_page_ctx *ctx;
8898 : spdk_bs_sequence_t *seq;
8899 434 : struct spdk_bs_cpl cpl;
8900 :
8901 434 : ctx = calloc(1, sizeof(*ctx));
8902 434 : if (!ctx) {
8903 0 : cb_fn(cb_arg, -ENOMEM);
8904 0 : return;
8905 : }
8906 434 : ctx->bs = blob->bs;
8907 434 : ctx->extent = extent;
8908 434 : ctx->page = page;
8909 :
8910 434 : cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
8911 434 : cpl.u.blob_basic.cb_fn = cb_fn;
8912 434 : cpl.u.blob_basic.cb_arg = cb_arg;
8913 :
8914 434 : seq = bs_sequence_start_bs(blob->bs->md_channel, &cpl);
8915 434 : if (!seq) {
8916 0 : free(ctx);
8917 0 : cb_fn(cb_arg, -ENOMEM);
8918 0 : return;
8919 : }
8920 :
8921 434 : assert(page);
8922 434 : page->next = SPDK_INVALID_MD_PAGE;
8923 434 : page->id = blob->id;
8924 434 : page->sequence_num = 0;
8925 :
8926 434 : blob_serialize_extent_page(blob, cluster_num, page);
8927 :
8928 434 : page->crc = blob_md_page_calc_crc(page);
8929 :
8930 434 : assert(spdk_bit_array_get(blob->bs->used_md_pages, extent) == true);
8931 :
8932 434 : bs_mark_dirty(seq, blob->bs, blob_write_extent_page_ready, ctx);
8933 : }
8934 :
8935 : static void
8936 816 : blob_insert_cluster_msg(void *arg)
8937 : {
8938 816 : struct spdk_blob_cluster_op_ctx *ctx = arg;
8939 : uint32_t *extent_page;
8940 :
8941 816 : ctx->rc = blob_insert_cluster(ctx->blob, ctx->cluster_num, ctx->cluster);
8942 816 : if (ctx->rc != 0) {
8943 4 : spdk_thread_send_msg(ctx->thread, blob_op_cluster_msg_cpl, ctx);
8944 4 : return;
8945 : }
8946 :
8947 812 : if (ctx->blob->use_extent_table == false) {
8948 : /* Extent table is not used, proceed with sync of md that will only use extents_rle. */
8949 406 : ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
8950 406 : blob_sync_md(ctx->blob, blob_op_cluster_msg_cb, ctx);
8951 406 : return;
8952 : }
8953 :
8954 406 : extent_page = bs_cluster_to_extent_page(ctx->blob, ctx->cluster_num);
8955 406 : if (*extent_page == 0) {
8956 : /* Extent page requires allocation.
8957 : * It was already claimed in the used_md_pages map and placed in ctx. */
8958 82 : assert(ctx->extent_page != 0);
8959 82 : assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
8960 82 : blob_write_extent_page(ctx->blob, ctx->extent_page, ctx->cluster_num, ctx->page,
8961 : blob_insert_new_ep_cb, ctx);
8962 : } else {
8963 : /* It is possible for original thread to allocate extent page for
8964 : * different cluster in the same extent page. In such case proceed with
8965 : * updating the existing extent page, but release the additional one. */
8966 324 : if (ctx->extent_page != 0) {
8967 0 : spdk_spin_lock(&ctx->blob->bs->used_lock);
8968 0 : assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
8969 0 : bs_release_md_page(ctx->blob->bs, ctx->extent_page);
8970 0 : spdk_spin_unlock(&ctx->blob->bs->used_lock);
8971 0 : ctx->extent_page = 0;
8972 : }
8973 : /* Extent page already allocated.
8974 : * Every cluster allocation, requires just an update of single extent page. */
8975 324 : blob_write_extent_page(ctx->blob, *extent_page, ctx->cluster_num, ctx->page,
8976 : blob_op_cluster_msg_cb, ctx);
8977 : }
8978 : }
8979 :
8980 : static void
8981 816 : blob_insert_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
8982 : uint64_t cluster, uint32_t extent_page, struct spdk_blob_md_page *page,
8983 : spdk_blob_op_complete cb_fn, void *cb_arg)
8984 : {
8985 : struct spdk_blob_cluster_op_ctx *ctx;
8986 :
8987 816 : ctx = calloc(1, sizeof(*ctx));
8988 816 : if (ctx == NULL) {
8989 0 : cb_fn(cb_arg, -ENOMEM);
8990 0 : return;
8991 : }
8992 :
8993 816 : ctx->thread = spdk_get_thread();
8994 816 : ctx->blob = blob;
8995 816 : ctx->cluster_num = cluster_num;
8996 816 : ctx->cluster = cluster;
8997 816 : ctx->extent_page = extent_page;
8998 816 : ctx->page = page;
8999 816 : ctx->cb_fn = cb_fn;
9000 816 : ctx->cb_arg = cb_arg;
9001 :
9002 816 : spdk_thread_send_msg(blob->bs->md_thread, blob_insert_cluster_msg, ctx);
9003 : }
9004 :
9005 : static void
9006 60 : blob_free_cluster_msg(void *arg)
9007 : {
9008 60 : struct spdk_blob_cluster_op_ctx *ctx = arg;
9009 : uint32_t *extent_page;
9010 : uint32_t start_cluster_idx;
9011 60 : bool free_extent_page = true;
9012 : size_t i;
9013 :
9014 60 : ctx->cluster = bs_lba_to_cluster(ctx->blob->bs, ctx->blob->active.clusters[ctx->cluster_num]);
9015 :
9016 : /* There were concurrent unmaps to the same cluster, only release the cluster on the first one */
9017 60 : if (ctx->cluster == 0) {
9018 8 : blob_op_cluster_msg_cb(ctx, 0);
9019 8 : return;
9020 : }
9021 :
9022 52 : ctx->blob->active.clusters[ctx->cluster_num] = 0;
9023 52 : if (ctx->cluster != 0) {
9024 52 : ctx->blob->active.num_allocated_clusters--;
9025 : }
9026 :
9027 52 : if (ctx->blob->use_extent_table == false) {
9028 : /* Extent table is not used, proceed with sync of md that will only use extents_rle. */
9029 26 : spdk_spin_lock(&ctx->blob->bs->used_lock);
9030 26 : bs_release_cluster(ctx->blob->bs, ctx->cluster);
9031 26 : spdk_spin_unlock(&ctx->blob->bs->used_lock);
9032 26 : ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
9033 26 : blob_sync_md(ctx->blob, blob_op_cluster_msg_cb, ctx);
9034 26 : return;
9035 : }
9036 :
9037 26 : extent_page = bs_cluster_to_extent_page(ctx->blob, ctx->cluster_num);
9038 :
9039 : /* There shouldn't be parallel release operations on same cluster */
9040 26 : assert(*extent_page == ctx->extent_page);
9041 :
9042 26 : start_cluster_idx = (ctx->cluster_num / SPDK_EXTENTS_PER_EP) * SPDK_EXTENTS_PER_EP;
9043 48 : for (i = 0; i < SPDK_EXTENTS_PER_EP; ++i) {
9044 48 : if (ctx->blob->active.clusters[start_cluster_idx + i] != 0) {
9045 26 : free_extent_page = false;
9046 26 : break;
9047 : }
9048 : }
9049 :
9050 26 : if (free_extent_page) {
9051 0 : assert(ctx->extent_page != 0);
9052 0 : assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
9053 0 : ctx->blob->active.extent_pages[bs_cluster_to_extent_table_id(ctx->cluster_num)] = 0;
9054 0 : blob_write_extent_page(ctx->blob, ctx->extent_page, ctx->cluster_num, ctx->page,
9055 : blob_free_cluster_free_ep_cb, ctx);
9056 : } else {
9057 26 : blob_write_extent_page(ctx->blob, *extent_page, ctx->cluster_num, ctx->page,
9058 : blob_free_cluster_update_ep_cb, ctx);
9059 : }
9060 : }
9061 :
9062 :
9063 : static void
9064 60 : blob_free_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num, uint32_t extent_page,
9065 : struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg)
9066 : {
9067 : struct spdk_blob_cluster_op_ctx *ctx;
9068 :
9069 60 : ctx = calloc(1, sizeof(*ctx));
9070 60 : if (ctx == NULL) {
9071 0 : cb_fn(cb_arg, -ENOMEM);
9072 0 : return;
9073 : }
9074 :
9075 60 : ctx->thread = spdk_get_thread();
9076 60 : ctx->blob = blob;
9077 60 : ctx->cluster_num = cluster_num;
9078 60 : ctx->extent_page = extent_page;
9079 60 : ctx->page = page;
9080 60 : ctx->cb_fn = cb_fn;
9081 60 : ctx->cb_arg = cb_arg;
9082 :
9083 60 : spdk_thread_send_msg(blob->bs->md_thread, blob_free_cluster_msg, ctx);
9084 : }
9085 :
9086 : /* START spdk_blob_close */
9087 :
9088 : static void
9089 4167 : blob_close_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
9090 : {
9091 4167 : struct spdk_blob *blob = cb_arg;
9092 :
9093 4167 : if (bserrno == 0) {
9094 4167 : blob->open_ref--;
9095 4167 : if (blob->open_ref == 0) {
9096 : /*
9097 : * Blobs with active.num_pages == 0 are deleted blobs.
9098 : * these blobs are removed from the blob_store list
9099 : * when the deletion process starts - so don't try to
9100 : * remove them again.
9101 : */
9102 3406 : if (blob->active.num_pages > 0) {
9103 1914 : spdk_bit_array_clear(blob->bs->open_blobids, blob->id);
9104 1914 : RB_REMOVE(spdk_blob_tree, &blob->bs->open_blobs, blob);
9105 : }
9106 3406 : blob_free(blob);
9107 : }
9108 : }
9109 :
9110 4167 : bs_sequence_finish(seq, bserrno);
9111 4167 : }
9112 :
9113 : static void
9114 120 : blob_close_esnap_done(void *cb_arg, struct spdk_blob *blob, int bserrno)
9115 : {
9116 120 : spdk_bs_sequence_t *seq = cb_arg;
9117 :
9118 120 : if (bserrno != 0) {
9119 0 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": close failed with error %d\n",
9120 : blob->id, bserrno);
9121 0 : bs_sequence_finish(seq, bserrno);
9122 0 : return;
9123 : }
9124 :
9125 120 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": closed, syncing metadata on thread %s\n",
9126 : blob->id, spdk_thread_get_name(spdk_get_thread()));
9127 :
9128 : /* Sync metadata */
9129 120 : blob_persist(seq, blob, blob_close_cpl, blob);
9130 : }
9131 :
9132 : void
9133 4167 : spdk_blob_close(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
9134 : {
9135 4167 : struct spdk_bs_cpl cpl;
9136 : spdk_bs_sequence_t *seq;
9137 :
9138 4167 : blob_verify_md_op(blob);
9139 :
9140 4167 : SPDK_DEBUGLOG(blob, "Closing blob 0x%" PRIx64 "\n", blob->id);
9141 :
9142 4167 : if (blob->open_ref == 0) {
9143 0 : cb_fn(cb_arg, -EBADF);
9144 0 : return;
9145 : }
9146 :
9147 4167 : cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
9148 4167 : cpl.u.blob_basic.cb_fn = cb_fn;
9149 4167 : cpl.u.blob_basic.cb_arg = cb_arg;
9150 :
9151 4167 : seq = bs_sequence_start_bs(blob->bs->md_channel, &cpl);
9152 4167 : if (!seq) {
9153 0 : cb_fn(cb_arg, -ENOMEM);
9154 0 : return;
9155 : }
9156 :
9157 4167 : if (blob->open_ref == 1 && blob_is_esnap_clone(blob)) {
9158 120 : blob_esnap_destroy_bs_dev_channels(blob, false, blob_close_esnap_done, seq);
9159 120 : return;
9160 : }
9161 :
9162 : /* Sync metadata */
9163 4047 : blob_persist(seq, blob, blob_close_cpl, blob);
9164 : }
9165 :
9166 : /* END spdk_blob_close */
9167 :
9168 233 : struct spdk_io_channel *spdk_bs_alloc_io_channel(struct spdk_blob_store *bs)
9169 : {
9170 233 : return spdk_get_io_channel(bs);
9171 : }
9172 :
9173 : void
9174 233 : spdk_bs_free_io_channel(struct spdk_io_channel *channel)
9175 : {
9176 233 : blob_esnap_destroy_bs_channel(spdk_io_channel_get_ctx(channel));
9177 233 : spdk_put_io_channel(channel);
9178 233 : }
9179 :
9180 : void
9181 108 : spdk_blob_io_unmap(struct spdk_blob *blob, struct spdk_io_channel *channel,
9182 : uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg)
9183 : {
9184 108 : blob_request_submit_op(blob, channel, NULL, offset, length, cb_fn, cb_arg,
9185 : SPDK_BLOB_UNMAP);
9186 108 : }
9187 :
9188 : void
9189 48 : spdk_blob_io_write_zeroes(struct spdk_blob *blob, struct spdk_io_channel *channel,
9190 : uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg)
9191 : {
9192 48 : blob_request_submit_op(blob, channel, NULL, offset, length, cb_fn, cb_arg,
9193 : SPDK_BLOB_WRITE_ZEROES);
9194 48 : }
9195 :
9196 : void
9197 20868 : spdk_blob_io_write(struct spdk_blob *blob, struct spdk_io_channel *channel,
9198 : void *payload, uint64_t offset, uint64_t length,
9199 : spdk_blob_op_complete cb_fn, void *cb_arg)
9200 : {
9201 20868 : blob_request_submit_op(blob, channel, payload, offset, length, cb_fn, cb_arg,
9202 : SPDK_BLOB_WRITE);
9203 20868 : }
9204 :
9205 : void
9206 17500 : spdk_blob_io_read(struct spdk_blob *blob, struct spdk_io_channel *channel,
9207 : void *payload, uint64_t offset, uint64_t length,
9208 : spdk_blob_op_complete cb_fn, void *cb_arg)
9209 : {
9210 17500 : blob_request_submit_op(blob, channel, payload, offset, length, cb_fn, cb_arg,
9211 : SPDK_BLOB_READ);
9212 17500 : }
9213 :
9214 : void
9215 140 : spdk_blob_io_writev(struct spdk_blob *blob, struct spdk_io_channel *channel,
9216 : struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
9217 : spdk_blob_op_complete cb_fn, void *cb_arg)
9218 : {
9219 140 : blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, false, NULL);
9220 140 : }
9221 :
9222 : void
9223 940 : spdk_blob_io_readv(struct spdk_blob *blob, struct spdk_io_channel *channel,
9224 : struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
9225 : spdk_blob_op_complete cb_fn, void *cb_arg)
9226 : {
9227 940 : blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, true, NULL);
9228 940 : }
9229 :
9230 : void
9231 208 : spdk_blob_io_writev_ext(struct spdk_blob *blob, struct spdk_io_channel *channel,
9232 : struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
9233 : spdk_blob_op_complete cb_fn, void *cb_arg, struct spdk_blob_ext_io_opts *io_opts)
9234 : {
9235 208 : blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, false,
9236 : io_opts);
9237 208 : }
9238 :
9239 : void
9240 1300 : spdk_blob_io_readv_ext(struct spdk_blob *blob, struct spdk_io_channel *channel,
9241 : struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
9242 : spdk_blob_op_complete cb_fn, void *cb_arg, struct spdk_blob_ext_io_opts *io_opts)
9243 : {
9244 1300 : blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, true,
9245 : io_opts);
9246 1300 : }
9247 :
9248 : struct spdk_bs_iter_ctx {
9249 : int64_t page_num;
9250 : struct spdk_blob_store *bs;
9251 :
9252 : spdk_blob_op_with_handle_complete cb_fn;
9253 : void *cb_arg;
9254 : };
9255 :
9256 : static void
9257 1164 : bs_iter_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
9258 : {
9259 1164 : struct spdk_bs_iter_ctx *ctx = cb_arg;
9260 1164 : struct spdk_blob_store *bs = ctx->bs;
9261 : spdk_blob_id id;
9262 :
9263 1164 : if (bserrno == 0) {
9264 444 : ctx->cb_fn(ctx->cb_arg, _blob, bserrno);
9265 444 : free(ctx);
9266 444 : return;
9267 : }
9268 :
9269 720 : ctx->page_num++;
9270 720 : ctx->page_num = spdk_bit_array_find_first_set(bs->used_blobids, ctx->page_num);
9271 720 : if (ctx->page_num >= spdk_bit_array_capacity(bs->used_blobids)) {
9272 268 : ctx->cb_fn(ctx->cb_arg, NULL, -ENOENT);
9273 268 : free(ctx);
9274 268 : return;
9275 : }
9276 :
9277 452 : id = bs_page_to_blobid(ctx->page_num);
9278 :
9279 452 : spdk_bs_open_blob(bs, id, bs_iter_cpl, ctx);
9280 : }
9281 :
9282 : void
9283 292 : spdk_bs_iter_first(struct spdk_blob_store *bs,
9284 : spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
9285 : {
9286 : struct spdk_bs_iter_ctx *ctx;
9287 :
9288 292 : ctx = calloc(1, sizeof(*ctx));
9289 292 : if (!ctx) {
9290 0 : cb_fn(cb_arg, NULL, -ENOMEM);
9291 0 : return;
9292 : }
9293 :
9294 292 : ctx->page_num = -1;
9295 292 : ctx->bs = bs;
9296 292 : ctx->cb_fn = cb_fn;
9297 292 : ctx->cb_arg = cb_arg;
9298 :
9299 292 : bs_iter_cpl(ctx, NULL, -1);
9300 : }
9301 :
9302 : static void
9303 420 : bs_iter_close_cpl(void *cb_arg, int bserrno)
9304 : {
9305 420 : struct spdk_bs_iter_ctx *ctx = cb_arg;
9306 :
9307 420 : bs_iter_cpl(ctx, NULL, -1);
9308 420 : }
9309 :
9310 : void
9311 420 : spdk_bs_iter_next(struct spdk_blob_store *bs, struct spdk_blob *blob,
9312 : spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
9313 : {
9314 : struct spdk_bs_iter_ctx *ctx;
9315 :
9316 420 : assert(blob != NULL);
9317 :
9318 420 : ctx = calloc(1, sizeof(*ctx));
9319 420 : if (!ctx) {
9320 0 : cb_fn(cb_arg, NULL, -ENOMEM);
9321 0 : return;
9322 : }
9323 :
9324 420 : ctx->page_num = bs_blobid_to_page(blob->id);
9325 420 : ctx->bs = bs;
9326 420 : ctx->cb_fn = cb_fn;
9327 420 : ctx->cb_arg = cb_arg;
9328 :
9329 : /* Close the existing blob */
9330 420 : spdk_blob_close(blob, bs_iter_close_cpl, ctx);
9331 : }
9332 :
9333 : static int
9334 959 : blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
9335 : uint16_t value_len, bool internal)
9336 : {
9337 : struct spdk_xattr_tailq *xattrs;
9338 : struct spdk_xattr *xattr;
9339 : size_t desc_size;
9340 : void *tmp;
9341 :
9342 959 : blob_verify_md_op(blob);
9343 :
9344 959 : if (blob->md_ro) {
9345 4 : return -EPERM;
9346 : }
9347 :
9348 955 : desc_size = sizeof(struct spdk_blob_md_descriptor_xattr) + strlen(name) + value_len;
9349 955 : if (desc_size > SPDK_BS_MAX_DESC_SIZE) {
9350 4 : SPDK_DEBUGLOG(blob, "Xattr '%s' of size %zu does not fix into single page %zu\n", name,
9351 : desc_size, SPDK_BS_MAX_DESC_SIZE);
9352 4 : return -ENOMEM;
9353 : }
9354 :
9355 951 : if (internal) {
9356 740 : xattrs = &blob->xattrs_internal;
9357 740 : blob->invalid_flags |= SPDK_BLOB_INTERNAL_XATTR;
9358 : } else {
9359 211 : xattrs = &blob->xattrs;
9360 : }
9361 :
9362 1182 : TAILQ_FOREACH(xattr, xattrs, link) {
9363 340 : if (!strcmp(name, xattr->name)) {
9364 109 : tmp = malloc(value_len);
9365 109 : if (!tmp) {
9366 0 : return -ENOMEM;
9367 : }
9368 :
9369 109 : free(xattr->value);
9370 109 : xattr->value_len = value_len;
9371 109 : xattr->value = tmp;
9372 109 : memcpy(xattr->value, value, value_len);
9373 :
9374 109 : blob->state = SPDK_BLOB_STATE_DIRTY;
9375 :
9376 109 : return 0;
9377 : }
9378 : }
9379 :
9380 842 : xattr = calloc(1, sizeof(*xattr));
9381 842 : if (!xattr) {
9382 0 : return -ENOMEM;
9383 : }
9384 :
9385 842 : xattr->name = strdup(name);
9386 842 : if (!xattr->name) {
9387 0 : free(xattr);
9388 0 : return -ENOMEM;
9389 : }
9390 :
9391 842 : xattr->value_len = value_len;
9392 842 : xattr->value = malloc(value_len);
9393 842 : if (!xattr->value) {
9394 0 : free(xattr->name);
9395 0 : free(xattr);
9396 0 : return -ENOMEM;
9397 : }
9398 842 : memcpy(xattr->value, value, value_len);
9399 842 : TAILQ_INSERT_TAIL(xattrs, xattr, link);
9400 :
9401 842 : blob->state = SPDK_BLOB_STATE_DIRTY;
9402 :
9403 842 : return 0;
9404 : }
9405 :
9406 : int
9407 183 : spdk_blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
9408 : uint16_t value_len)
9409 : {
9410 183 : return blob_set_xattr(blob, name, value, value_len, false);
9411 : }
9412 :
9413 : static int
9414 416 : blob_remove_xattr(struct spdk_blob *blob, const char *name, bool internal)
9415 : {
9416 : struct spdk_xattr_tailq *xattrs;
9417 : struct spdk_xattr *xattr;
9418 :
9419 416 : blob_verify_md_op(blob);
9420 :
9421 416 : if (blob->md_ro) {
9422 4 : return -EPERM;
9423 : }
9424 412 : xattrs = internal ? &blob->xattrs_internal : &blob->xattrs;
9425 :
9426 424 : TAILQ_FOREACH(xattr, xattrs, link) {
9427 372 : if (!strcmp(name, xattr->name)) {
9428 360 : TAILQ_REMOVE(xattrs, xattr, link);
9429 360 : free(xattr->value);
9430 360 : free(xattr->name);
9431 360 : free(xattr);
9432 :
9433 360 : if (internal && TAILQ_EMPTY(&blob->xattrs_internal)) {
9434 244 : blob->invalid_flags &= ~SPDK_BLOB_INTERNAL_XATTR;
9435 : }
9436 360 : blob->state = SPDK_BLOB_STATE_DIRTY;
9437 :
9438 360 : return 0;
9439 : }
9440 : }
9441 :
9442 52 : return -ENOENT;
9443 : }
9444 :
9445 : int
9446 36 : spdk_blob_remove_xattr(struct spdk_blob *blob, const char *name)
9447 : {
9448 36 : return blob_remove_xattr(blob, name, false);
9449 : }
9450 :
9451 : static int
9452 2292 : blob_get_xattr_value(struct spdk_blob *blob, const char *name,
9453 : const void **value, size_t *value_len, bool internal)
9454 : {
9455 : struct spdk_xattr *xattr;
9456 : struct spdk_xattr_tailq *xattrs;
9457 :
9458 2292 : xattrs = internal ? &blob->xattrs_internal : &blob->xattrs;
9459 :
9460 2922 : TAILQ_FOREACH(xattr, xattrs, link) {
9461 1396 : if (!strcmp(name, xattr->name)) {
9462 766 : *value = xattr->value;
9463 766 : *value_len = xattr->value_len;
9464 766 : return 0;
9465 : }
9466 : }
9467 1526 : return -ENOENT;
9468 : }
9469 :
9470 : int
9471 154 : spdk_blob_get_xattr_value(struct spdk_blob *blob, const char *name,
9472 : const void **value, size_t *value_len)
9473 : {
9474 154 : blob_verify_md_op(blob);
9475 :
9476 154 : return blob_get_xattr_value(blob, name, value, value_len, false);
9477 : }
9478 :
9479 : struct spdk_xattr_names {
9480 : uint32_t count;
9481 : const char *names[0];
9482 : };
9483 :
9484 : static int
9485 4 : blob_get_xattr_names(struct spdk_xattr_tailq *xattrs, struct spdk_xattr_names **names)
9486 : {
9487 : struct spdk_xattr *xattr;
9488 4 : int count = 0;
9489 :
9490 12 : TAILQ_FOREACH(xattr, xattrs, link) {
9491 8 : count++;
9492 : }
9493 :
9494 4 : *names = calloc(1, sizeof(struct spdk_xattr_names) + count * sizeof(char *));
9495 4 : if (*names == NULL) {
9496 0 : return -ENOMEM;
9497 : }
9498 :
9499 12 : TAILQ_FOREACH(xattr, xattrs, link) {
9500 8 : (*names)->names[(*names)->count++] = xattr->name;
9501 : }
9502 :
9503 4 : return 0;
9504 : }
9505 :
9506 : int
9507 4 : spdk_blob_get_xattr_names(struct spdk_blob *blob, struct spdk_xattr_names **names)
9508 : {
9509 4 : blob_verify_md_op(blob);
9510 :
9511 4 : return blob_get_xattr_names(&blob->xattrs, names);
9512 : }
9513 :
9514 : uint32_t
9515 4 : spdk_xattr_names_get_count(struct spdk_xattr_names *names)
9516 : {
9517 4 : assert(names != NULL);
9518 :
9519 4 : return names->count;
9520 : }
9521 :
9522 : const char *
9523 8 : spdk_xattr_names_get_name(struct spdk_xattr_names *names, uint32_t index)
9524 : {
9525 8 : if (index >= names->count) {
9526 0 : return NULL;
9527 : }
9528 :
9529 8 : return names->names[index];
9530 : }
9531 :
9532 : void
9533 4 : spdk_xattr_names_free(struct spdk_xattr_names *names)
9534 : {
9535 4 : free(names);
9536 4 : }
9537 :
9538 : struct spdk_bs_type
9539 2 : spdk_bs_get_bstype(struct spdk_blob_store *bs)
9540 : {
9541 2 : return bs->bstype;
9542 : }
9543 :
9544 : void
9545 0 : spdk_bs_set_bstype(struct spdk_blob_store *bs, struct spdk_bs_type bstype)
9546 : {
9547 0 : memcpy(&bs->bstype, &bstype, sizeof(bstype));
9548 0 : }
9549 :
9550 : bool
9551 48 : spdk_blob_is_read_only(struct spdk_blob *blob)
9552 : {
9553 48 : assert(blob != NULL);
9554 48 : return (blob->data_ro || blob->md_ro);
9555 : }
9556 :
9557 : bool
9558 52 : spdk_blob_is_snapshot(struct spdk_blob *blob)
9559 : {
9560 : struct spdk_blob_list *snapshot_entry;
9561 :
9562 52 : assert(blob != NULL);
9563 :
9564 52 : snapshot_entry = bs_get_snapshot_entry(blob->bs, blob->id);
9565 52 : if (snapshot_entry == NULL) {
9566 28 : return false;
9567 : }
9568 :
9569 24 : return true;
9570 : }
9571 :
9572 : bool
9573 68 : spdk_blob_is_clone(struct spdk_blob *blob)
9574 : {
9575 68 : assert(blob != NULL);
9576 :
9577 68 : if (blob->parent_id != SPDK_BLOBID_INVALID &&
9578 52 : blob->parent_id != SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
9579 40 : assert(spdk_blob_is_thin_provisioned(blob));
9580 40 : return true;
9581 : }
9582 :
9583 28 : return false;
9584 : }
9585 :
9586 : bool
9587 36536 : spdk_blob_is_thin_provisioned(struct spdk_blob *blob)
9588 : {
9589 36536 : assert(blob != NULL);
9590 36536 : return !!(blob->invalid_flags & SPDK_BLOB_THIN_PROV);
9591 : }
9592 :
9593 : bool
9594 40888 : spdk_blob_is_esnap_clone(const struct spdk_blob *blob)
9595 : {
9596 40888 : return blob_is_esnap_clone(blob);
9597 : }
9598 :
9599 : static void
9600 3434 : blob_update_clear_method(struct spdk_blob *blob)
9601 : {
9602 : enum blob_clear_method stored_cm;
9603 :
9604 3434 : assert(blob != NULL);
9605 :
9606 : /* If BLOB_CLEAR_WITH_DEFAULT was passed in, use the setting stored
9607 : * in metadata previously. If something other than the default was
9608 : * specified, ignore stored value and used what was passed in.
9609 : */
9610 3434 : stored_cm = ((blob->md_ro_flags & SPDK_BLOB_CLEAR_METHOD) >> SPDK_BLOB_CLEAR_METHOD_SHIFT);
9611 :
9612 3434 : if (blob->clear_method == BLOB_CLEAR_WITH_DEFAULT) {
9613 3434 : blob->clear_method = stored_cm;
9614 0 : } else if (blob->clear_method != stored_cm) {
9615 0 : SPDK_WARNLOG("Using passed in clear method 0x%x instead of stored value of 0x%x\n",
9616 : blob->clear_method, stored_cm);
9617 : }
9618 3434 : }
9619 :
9620 : spdk_blob_id
9621 258 : spdk_blob_get_parent_snapshot(struct spdk_blob_store *bs, spdk_blob_id blob_id)
9622 : {
9623 258 : struct spdk_blob_list *snapshot_entry = NULL;
9624 258 : struct spdk_blob_list *clone_entry = NULL;
9625 :
9626 494 : TAILQ_FOREACH(snapshot_entry, &bs->snapshots, link) {
9627 732 : TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
9628 496 : if (clone_entry->id == blob_id) {
9629 168 : return snapshot_entry->id;
9630 : }
9631 : }
9632 : }
9633 :
9634 90 : return SPDK_BLOBID_INVALID;
9635 : }
9636 :
9637 : int
9638 196 : spdk_blob_get_clones(struct spdk_blob_store *bs, spdk_blob_id blobid, spdk_blob_id *ids,
9639 : size_t *count)
9640 : {
9641 : struct spdk_blob_list *snapshot_entry, *clone_entry;
9642 : size_t n;
9643 :
9644 196 : snapshot_entry = bs_get_snapshot_entry(bs, blobid);
9645 196 : if (snapshot_entry == NULL) {
9646 28 : *count = 0;
9647 28 : return 0;
9648 : }
9649 :
9650 168 : if (ids == NULL || *count < snapshot_entry->clone_count) {
9651 8 : *count = snapshot_entry->clone_count;
9652 8 : return -ENOMEM;
9653 : }
9654 160 : *count = snapshot_entry->clone_count;
9655 :
9656 160 : n = 0;
9657 340 : TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
9658 180 : ids[n++] = clone_entry->id;
9659 : }
9660 :
9661 160 : return 0;
9662 : }
9663 :
9664 : static void
9665 4 : bs_load_grow_continue(struct spdk_bs_load_ctx *ctx)
9666 : {
9667 : int rc;
9668 :
9669 4 : if (ctx->super->size == 0) {
9670 0 : ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
9671 : }
9672 :
9673 4 : if (ctx->super->io_unit_size == 0) {
9674 0 : ctx->super->io_unit_size = SPDK_BS_PAGE_SIZE;
9675 : }
9676 :
9677 : /* Parse the super block */
9678 4 : ctx->bs->clean = 1;
9679 4 : ctx->bs->cluster_sz = ctx->super->cluster_size;
9680 4 : ctx->bs->total_clusters = ctx->super->size / ctx->super->cluster_size;
9681 4 : ctx->bs->pages_per_cluster = ctx->bs->cluster_sz / SPDK_BS_PAGE_SIZE;
9682 4 : if (spdk_u32_is_pow2(ctx->bs->pages_per_cluster)) {
9683 4 : ctx->bs->pages_per_cluster_shift = spdk_u32log2(ctx->bs->pages_per_cluster);
9684 : }
9685 4 : ctx->bs->io_unit_size = ctx->super->io_unit_size;
9686 4 : rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->bs->total_clusters);
9687 4 : if (rc < 0) {
9688 0 : bs_load_ctx_fail(ctx, -ENOMEM);
9689 0 : return;
9690 : }
9691 4 : ctx->bs->md_start = ctx->super->md_start;
9692 4 : ctx->bs->md_len = ctx->super->md_len;
9693 4 : rc = spdk_bit_array_resize(&ctx->bs->open_blobids, ctx->bs->md_len);
9694 4 : if (rc < 0) {
9695 0 : bs_load_ctx_fail(ctx, -ENOMEM);
9696 0 : return;
9697 : }
9698 :
9699 8 : ctx->bs->total_data_clusters = ctx->bs->total_clusters - spdk_divide_round_up(
9700 4 : ctx->bs->md_start + ctx->bs->md_len, ctx->bs->pages_per_cluster);
9701 4 : ctx->bs->super_blob = ctx->super->super_blob;
9702 4 : memcpy(&ctx->bs->bstype, &ctx->super->bstype, sizeof(ctx->super->bstype));
9703 :
9704 4 : if (ctx->super->used_blobid_mask_len == 0 || ctx->super->clean == 0) {
9705 0 : SPDK_ERRLOG("Can not grow an unclean blobstore, please load it normally to clean it.\n");
9706 0 : bs_load_ctx_fail(ctx, -EIO);
9707 0 : return;
9708 : } else {
9709 4 : bs_load_read_used_pages(ctx);
9710 : }
9711 : }
9712 :
9713 : static void
9714 4 : bs_load_grow_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
9715 : {
9716 4 : struct spdk_bs_load_ctx *ctx = cb_arg;
9717 :
9718 4 : if (bserrno != 0) {
9719 0 : bs_load_ctx_fail(ctx, bserrno);
9720 0 : return;
9721 : }
9722 4 : bs_load_grow_continue(ctx);
9723 : }
9724 :
9725 : static void
9726 4 : bs_load_grow_used_clusters_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
9727 : {
9728 4 : struct spdk_bs_load_ctx *ctx = cb_arg;
9729 :
9730 4 : if (bserrno != 0) {
9731 0 : bs_load_ctx_fail(ctx, bserrno);
9732 0 : return;
9733 : }
9734 :
9735 4 : spdk_free(ctx->mask);
9736 :
9737 4 : bs_sequence_write_dev(ctx->seq, ctx->super, bs_page_to_lba(ctx->bs, 0),
9738 4 : bs_byte_to_lba(ctx->bs, sizeof(*ctx->super)),
9739 : bs_load_grow_super_write_cpl, ctx);
9740 : }
9741 :
9742 : static void
9743 4 : bs_load_grow_used_clusters_read_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
9744 : {
9745 4 : struct spdk_bs_load_ctx *ctx = cb_arg;
9746 : uint64_t lba, lba_count;
9747 : uint64_t dev_size;
9748 : uint64_t total_clusters;
9749 :
9750 4 : if (bserrno != 0) {
9751 0 : bs_load_ctx_fail(ctx, bserrno);
9752 0 : return;
9753 : }
9754 :
9755 : /* The type must be correct */
9756 4 : assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_CLUSTERS);
9757 : /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
9758 4 : assert(ctx->mask->length <= (ctx->super->used_cluster_mask_len * sizeof(
9759 : struct spdk_blob_md_page) * 8));
9760 4 : dev_size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
9761 4 : total_clusters = dev_size / ctx->super->cluster_size;
9762 4 : ctx->mask->length = total_clusters;
9763 :
9764 4 : lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
9765 4 : lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
9766 4 : bs_sequence_write_dev(ctx->seq, ctx->mask, lba, lba_count,
9767 : bs_load_grow_used_clusters_write_cpl, ctx);
9768 : }
9769 :
9770 : static void
9771 4 : bs_load_try_to_grow(struct spdk_bs_load_ctx *ctx)
9772 : {
9773 : uint64_t dev_size, total_clusters, used_cluster_mask_len, max_used_cluster_mask;
9774 : uint64_t lba, lba_count, mask_size;
9775 :
9776 4 : dev_size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
9777 4 : total_clusters = dev_size / ctx->super->cluster_size;
9778 4 : used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
9779 4 : spdk_divide_round_up(total_clusters, 8),
9780 : SPDK_BS_PAGE_SIZE);
9781 4 : max_used_cluster_mask = ctx->super->used_blobid_mask_start - ctx->super->used_cluster_mask_start;
9782 : /* No necessary to grow or no space to grow */
9783 4 : if (ctx->super->size >= dev_size || used_cluster_mask_len > max_used_cluster_mask) {
9784 0 : SPDK_DEBUGLOG(blob, "No grow\n");
9785 0 : bs_load_grow_continue(ctx);
9786 0 : return;
9787 : }
9788 :
9789 4 : SPDK_DEBUGLOG(blob, "Resize blobstore\n");
9790 :
9791 4 : ctx->super->size = dev_size;
9792 4 : ctx->super->used_cluster_mask_len = used_cluster_mask_len;
9793 4 : ctx->super->crc = blob_md_page_calc_crc(ctx->super);
9794 :
9795 4 : mask_size = used_cluster_mask_len * SPDK_BS_PAGE_SIZE;
9796 4 : ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_SOCKET_ID_ANY,
9797 : SPDK_MALLOC_DMA);
9798 4 : if (!ctx->mask) {
9799 0 : bs_load_ctx_fail(ctx, -ENOMEM);
9800 0 : return;
9801 : }
9802 4 : lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
9803 4 : lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
9804 4 : bs_sequence_read_dev(ctx->seq, ctx->mask, lba, lba_count,
9805 : bs_load_grow_used_clusters_read_cpl, ctx);
9806 : }
9807 :
9808 : static void
9809 4 : bs_grow_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
9810 : {
9811 4 : struct spdk_bs_load_ctx *ctx = cb_arg;
9812 : int rc;
9813 :
9814 4 : rc = bs_super_validate(ctx->super, ctx->bs);
9815 4 : if (rc != 0) {
9816 0 : bs_load_ctx_fail(ctx, rc);
9817 0 : return;
9818 : }
9819 :
9820 4 : bs_load_try_to_grow(ctx);
9821 : }
9822 :
9823 : struct spdk_bs_grow_ctx {
9824 : struct spdk_blob_store *bs;
9825 : struct spdk_bs_super_block *super;
9826 :
9827 : struct spdk_bit_pool *new_used_clusters;
9828 : struct spdk_bs_md_mask *new_used_clusters_mask;
9829 :
9830 : spdk_bs_sequence_t *seq;
9831 : };
9832 :
9833 : static void
9834 32 : bs_grow_live_done(struct spdk_bs_grow_ctx *ctx, int bserrno)
9835 : {
9836 32 : if (bserrno != 0) {
9837 8 : spdk_bit_pool_free(&ctx->new_used_clusters);
9838 : }
9839 :
9840 32 : bs_sequence_finish(ctx->seq, bserrno);
9841 32 : free(ctx->new_used_clusters_mask);
9842 32 : spdk_free(ctx->super);
9843 32 : free(ctx);
9844 32 : }
9845 :
9846 : static void
9847 8 : bs_grow_live_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
9848 : {
9849 8 : struct spdk_bs_grow_ctx *ctx = cb_arg;
9850 8 : struct spdk_blob_store *bs = ctx->bs;
9851 : uint64_t total_clusters;
9852 :
9853 8 : if (bserrno != 0) {
9854 0 : bs_grow_live_done(ctx, bserrno);
9855 0 : return;
9856 : }
9857 :
9858 : /*
9859 : * Blobstore is not clean until unload, for now only the super block is up to date.
9860 : * This is similar to state right after blobstore init, when bs_write_used_md() didn't
9861 : * yet execute.
9862 : * When cleanly unloaded, the used md pages will be written out.
9863 : * In case of unclean shutdown, loading blobstore will go through recovery path correctly
9864 : * filling out the used_clusters with new size and writing it out.
9865 : */
9866 8 : bs->clean = 0;
9867 :
9868 : /* Reverting the super->size past this point is complex, avoid any error paths
9869 : * that require to do so. */
9870 8 : spdk_spin_lock(&bs->used_lock);
9871 :
9872 8 : total_clusters = ctx->super->size / ctx->super->cluster_size;
9873 :
9874 8 : assert(total_clusters >= spdk_bit_pool_capacity(bs->used_clusters));
9875 8 : spdk_bit_pool_store_mask(bs->used_clusters, ctx->new_used_clusters_mask);
9876 :
9877 8 : assert(total_clusters == spdk_bit_pool_capacity(ctx->new_used_clusters));
9878 8 : spdk_bit_pool_load_mask(ctx->new_used_clusters, ctx->new_used_clusters_mask);
9879 :
9880 8 : spdk_bit_pool_free(&bs->used_clusters);
9881 8 : bs->used_clusters = ctx->new_used_clusters;
9882 :
9883 8 : bs->total_clusters = total_clusters;
9884 16 : bs->total_data_clusters = bs->total_clusters - spdk_divide_round_up(
9885 8 : bs->md_start + bs->md_len, bs->pages_per_cluster);
9886 :
9887 8 : bs->num_free_clusters = spdk_bit_pool_count_free(bs->used_clusters);
9888 8 : assert(ctx->bs->num_free_clusters <= ctx->bs->total_clusters);
9889 8 : spdk_spin_unlock(&bs->used_lock);
9890 :
9891 8 : bs_grow_live_done(ctx, 0);
9892 : }
9893 :
9894 : static void
9895 32 : bs_grow_live_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
9896 : {
9897 32 : struct spdk_bs_grow_ctx *ctx = cb_arg;
9898 : uint64_t dev_size, total_clusters, used_cluster_mask_len, max_used_cluster_mask;
9899 : int rc;
9900 :
9901 32 : if (bserrno != 0) {
9902 0 : bs_grow_live_done(ctx, bserrno);
9903 0 : return;
9904 : }
9905 :
9906 32 : rc = bs_super_validate(ctx->super, ctx->bs);
9907 32 : if (rc != 0) {
9908 4 : bs_grow_live_done(ctx, rc);
9909 4 : return;
9910 : }
9911 :
9912 28 : dev_size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
9913 28 : total_clusters = dev_size / ctx->super->cluster_size;
9914 28 : used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
9915 28 : spdk_divide_round_up(total_clusters, 8),
9916 : SPDK_BS_PAGE_SIZE);
9917 28 : max_used_cluster_mask = ctx->super->used_blobid_mask_start - ctx->super->used_cluster_mask_start;
9918 : /* Only checking dev_size. Since it can change, but total_clusters remain the same. */
9919 28 : if (dev_size == ctx->super->size) {
9920 16 : SPDK_DEBUGLOG(blob, "No need to grow blobstore\n");
9921 16 : bs_grow_live_done(ctx, 0);
9922 16 : return;
9923 : }
9924 : /*
9925 : * Blobstore cannot be shrunk, so check before if:
9926 : * - new size of the device is smaller than size in super_block
9927 : * - new total number of clusters is smaller than used_clusters bit_pool
9928 : * - there is enough space in metadata for used_cluster_mask to be written out
9929 : */
9930 12 : if (dev_size < ctx->super->size ||
9931 12 : total_clusters < spdk_bit_pool_capacity(ctx->bs->used_clusters) ||
9932 : used_cluster_mask_len > max_used_cluster_mask) {
9933 4 : SPDK_DEBUGLOG(blob, "No space to grow blobstore\n");
9934 4 : bs_grow_live_done(ctx, -ENOSPC);
9935 4 : return;
9936 : }
9937 :
9938 8 : SPDK_DEBUGLOG(blob, "Resizing blobstore\n");
9939 :
9940 8 : ctx->new_used_clusters_mask = calloc(1, total_clusters);
9941 8 : if (!ctx->new_used_clusters_mask) {
9942 0 : bs_grow_live_done(ctx, -ENOMEM);
9943 0 : return;
9944 : }
9945 8 : ctx->new_used_clusters = spdk_bit_pool_create(total_clusters);
9946 8 : if (!ctx->new_used_clusters) {
9947 0 : bs_grow_live_done(ctx, -ENOMEM);
9948 0 : return;
9949 : }
9950 :
9951 8 : ctx->super->clean = 0;
9952 8 : ctx->super->size = dev_size;
9953 8 : ctx->super->used_cluster_mask_len = used_cluster_mask_len;
9954 8 : bs_write_super(seq, ctx->bs, ctx->super, bs_grow_live_super_write_cpl, ctx);
9955 : }
9956 :
9957 : void
9958 32 : spdk_bs_grow_live(struct spdk_blob_store *bs,
9959 : spdk_bs_op_complete cb_fn, void *cb_arg)
9960 : {
9961 32 : struct spdk_bs_cpl cpl;
9962 : struct spdk_bs_grow_ctx *ctx;
9963 :
9964 32 : assert(spdk_get_thread() == bs->md_thread);
9965 :
9966 32 : SPDK_DEBUGLOG(blob, "Growing blobstore on dev %p\n", bs->dev);
9967 :
9968 32 : cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
9969 32 : cpl.u.bs_basic.cb_fn = cb_fn;
9970 32 : cpl.u.bs_basic.cb_arg = cb_arg;
9971 :
9972 32 : ctx = calloc(1, sizeof(struct spdk_bs_grow_ctx));
9973 32 : if (!ctx) {
9974 0 : cb_fn(cb_arg, -ENOMEM);
9975 0 : return;
9976 : }
9977 32 : ctx->bs = bs;
9978 :
9979 32 : ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
9980 : SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
9981 32 : if (!ctx->super) {
9982 0 : free(ctx);
9983 0 : cb_fn(cb_arg, -ENOMEM);
9984 0 : return;
9985 : }
9986 :
9987 32 : ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
9988 32 : if (!ctx->seq) {
9989 0 : spdk_free(ctx->super);
9990 0 : free(ctx);
9991 0 : cb_fn(cb_arg, -ENOMEM);
9992 0 : return;
9993 : }
9994 :
9995 : /* Read the super block */
9996 32 : bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
9997 32 : bs_byte_to_lba(bs, sizeof(*ctx->super)),
9998 : bs_grow_live_load_super_cpl, ctx);
9999 : }
10000 :
10001 : void
10002 4 : spdk_bs_grow(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
10003 : spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
10004 : {
10005 4 : struct spdk_blob_store *bs;
10006 4 : struct spdk_bs_cpl cpl;
10007 4 : struct spdk_bs_load_ctx *ctx;
10008 4 : struct spdk_bs_opts opts = {};
10009 : int err;
10010 :
10011 4 : SPDK_DEBUGLOG(blob, "Loading blobstore from dev %p\n", dev);
10012 :
10013 4 : if ((SPDK_BS_PAGE_SIZE % dev->blocklen) != 0) {
10014 0 : SPDK_DEBUGLOG(blob, "unsupported dev block length of %d\n", dev->blocklen);
10015 0 : dev->destroy(dev);
10016 0 : cb_fn(cb_arg, NULL, -EINVAL);
10017 0 : return;
10018 : }
10019 :
10020 4 : spdk_bs_opts_init(&opts, sizeof(opts));
10021 4 : if (o) {
10022 4 : if (bs_opts_copy(o, &opts)) {
10023 0 : return;
10024 : }
10025 : }
10026 :
10027 4 : if (opts.max_md_ops == 0 || opts.max_channel_ops == 0) {
10028 0 : dev->destroy(dev);
10029 0 : cb_fn(cb_arg, NULL, -EINVAL);
10030 0 : return;
10031 : }
10032 :
10033 4 : err = bs_alloc(dev, &opts, &bs, &ctx);
10034 4 : if (err) {
10035 0 : dev->destroy(dev);
10036 0 : cb_fn(cb_arg, NULL, err);
10037 0 : return;
10038 : }
10039 :
10040 4 : cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
10041 4 : cpl.u.bs_handle.cb_fn = cb_fn;
10042 4 : cpl.u.bs_handle.cb_arg = cb_arg;
10043 4 : cpl.u.bs_handle.bs = bs;
10044 :
10045 4 : ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
10046 4 : if (!ctx->seq) {
10047 0 : spdk_free(ctx->super);
10048 0 : free(ctx);
10049 0 : bs_free(bs);
10050 0 : cb_fn(cb_arg, NULL, -ENOMEM);
10051 0 : return;
10052 : }
10053 :
10054 : /* Read the super block */
10055 4 : bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
10056 4 : bs_byte_to_lba(bs, sizeof(*ctx->super)),
10057 : bs_grow_load_super_cpl, ctx);
10058 : }
10059 :
10060 : int
10061 24 : spdk_blob_get_esnap_id(struct spdk_blob *blob, const void **id, size_t *len)
10062 : {
10063 24 : if (!blob_is_esnap_clone(blob)) {
10064 12 : return -EINVAL;
10065 : }
10066 :
10067 12 : return blob_get_xattr_value(blob, BLOB_EXTERNAL_SNAPSHOT_ID, id, len, true);
10068 : }
10069 :
10070 : struct spdk_io_channel *
10071 8840 : blob_esnap_get_io_channel(struct spdk_io_channel *ch, struct spdk_blob *blob)
10072 : {
10073 8840 : struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(ch);
10074 8840 : struct spdk_bs_dev *bs_dev = blob->back_bs_dev;
10075 8840 : struct blob_esnap_channel find = {};
10076 : struct blob_esnap_channel *esnap_channel, *existing;
10077 :
10078 8840 : find.blob_id = blob->id;
10079 8840 : esnap_channel = RB_FIND(blob_esnap_channel_tree, &bs_channel->esnap_channels, &find);
10080 8840 : if (spdk_likely(esnap_channel != NULL)) {
10081 8796 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": using cached channel on thread %s\n",
10082 : blob->id, spdk_thread_get_name(spdk_get_thread()));
10083 8796 : return esnap_channel->channel;
10084 : }
10085 :
10086 44 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": allocating channel on thread %s\n",
10087 : blob->id, spdk_thread_get_name(spdk_get_thread()));
10088 :
10089 44 : esnap_channel = calloc(1, sizeof(*esnap_channel));
10090 44 : if (esnap_channel == NULL) {
10091 0 : SPDK_NOTICELOG("blob 0x%" PRIx64 " channel allocation failed: no memory\n",
10092 : find.blob_id);
10093 0 : return NULL;
10094 : }
10095 44 : esnap_channel->channel = bs_dev->create_channel(bs_dev);
10096 44 : if (esnap_channel->channel == NULL) {
10097 0 : SPDK_NOTICELOG("blob 0x%" PRIx64 " back channel allocation failed\n", blob->id);
10098 0 : free(esnap_channel);
10099 0 : return NULL;
10100 : }
10101 44 : esnap_channel->blob_id = find.blob_id;
10102 44 : existing = RB_INSERT(blob_esnap_channel_tree, &bs_channel->esnap_channels, esnap_channel);
10103 44 : if (spdk_unlikely(existing != NULL)) {
10104 : /*
10105 : * This should be unreachable: all modifications to this tree happen on this thread.
10106 : */
10107 0 : SPDK_ERRLOG("blob 0x%" PRIx64 "lost race to allocate a channel\n", find.blob_id);
10108 0 : assert(false);
10109 :
10110 : bs_dev->destroy_channel(bs_dev, esnap_channel->channel);
10111 : free(esnap_channel);
10112 :
10113 : return existing->channel;
10114 : }
10115 :
10116 44 : return esnap_channel->channel;
10117 : }
10118 :
10119 : static int
10120 8816 : blob_esnap_channel_compare(struct blob_esnap_channel *c1, struct blob_esnap_channel *c2)
10121 : {
10122 8816 : return (c1->blob_id < c2->blob_id ? -1 : c1->blob_id > c2->blob_id);
10123 : }
10124 :
10125 : struct blob_esnap_destroy_ctx {
10126 : spdk_blob_op_with_handle_complete cb_fn;
10127 : void *cb_arg;
10128 : struct spdk_blob *blob;
10129 : struct spdk_bs_dev *back_bs_dev;
10130 : bool abort_io;
10131 : };
10132 :
10133 : static void
10134 152 : blob_esnap_destroy_channels_done(struct spdk_io_channel_iter *i, int status)
10135 : {
10136 152 : struct blob_esnap_destroy_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
10137 152 : struct spdk_blob *blob = ctx->blob;
10138 152 : struct spdk_blob_store *bs = blob->bs;
10139 :
10140 152 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": done destroying channels for this blob\n",
10141 : blob->id);
10142 :
10143 152 : if (ctx->cb_fn != NULL) {
10144 136 : ctx->cb_fn(ctx->cb_arg, blob, status);
10145 : }
10146 152 : free(ctx);
10147 :
10148 152 : bs->esnap_channels_unloading--;
10149 152 : if (bs->esnap_channels_unloading == 0 && bs->esnap_unload_cb_fn != NULL) {
10150 4 : spdk_bs_unload(bs, bs->esnap_unload_cb_fn, bs->esnap_unload_cb_arg);
10151 : }
10152 152 : }
10153 :
10154 : static void
10155 160 : blob_esnap_destroy_one_channel(struct spdk_io_channel_iter *i)
10156 : {
10157 160 : struct blob_esnap_destroy_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
10158 160 : struct spdk_blob *blob = ctx->blob;
10159 160 : struct spdk_bs_dev *bs_dev = ctx->back_bs_dev;
10160 160 : struct spdk_io_channel *channel = spdk_io_channel_iter_get_channel(i);
10161 160 : struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(channel);
10162 : struct blob_esnap_channel *esnap_channel;
10163 160 : struct blob_esnap_channel find = {};
10164 :
10165 160 : assert(spdk_get_thread() == spdk_io_channel_get_thread(channel));
10166 :
10167 160 : find.blob_id = blob->id;
10168 160 : esnap_channel = RB_FIND(blob_esnap_channel_tree, &bs_channel->esnap_channels, &find);
10169 160 : if (esnap_channel != NULL) {
10170 12 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": destroying channel on thread %s\n",
10171 : blob->id, spdk_thread_get_name(spdk_get_thread()));
10172 12 : RB_REMOVE(blob_esnap_channel_tree, &bs_channel->esnap_channels, esnap_channel);
10173 :
10174 12 : if (ctx->abort_io) {
10175 : spdk_bs_user_op_t *op, *tmp;
10176 :
10177 8 : TAILQ_FOREACH_SAFE(op, &bs_channel->queued_io, link, tmp) {
10178 0 : if (op->back_channel == esnap_channel->channel) {
10179 0 : TAILQ_REMOVE(&bs_channel->queued_io, op, link);
10180 0 : bs_user_op_abort(op, -EIO);
10181 : }
10182 : }
10183 : }
10184 :
10185 12 : bs_dev->destroy_channel(bs_dev, esnap_channel->channel);
10186 12 : free(esnap_channel);
10187 : }
10188 :
10189 160 : spdk_for_each_channel_continue(i, 0);
10190 160 : }
10191 :
10192 : /*
10193 : * Destroy the channels for a specific blob on each thread with a blobstore channel. This should be
10194 : * used when closing an esnap clone blob and after decoupling from the parent.
10195 : */
10196 : static void
10197 500 : blob_esnap_destroy_bs_dev_channels(struct spdk_blob *blob, bool abort_io,
10198 : spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
10199 : {
10200 : struct blob_esnap_destroy_ctx *ctx;
10201 :
10202 500 : if (!blob_is_esnap_clone(blob) || blob->back_bs_dev == NULL) {
10203 348 : if (cb_fn != NULL) {
10204 348 : cb_fn(cb_arg, blob, 0);
10205 : }
10206 348 : return;
10207 : }
10208 :
10209 152 : ctx = calloc(1, sizeof(*ctx));
10210 152 : if (ctx == NULL) {
10211 0 : if (cb_fn != NULL) {
10212 0 : cb_fn(cb_arg, blob, -ENOMEM);
10213 : }
10214 0 : return;
10215 : }
10216 152 : ctx->cb_fn = cb_fn;
10217 152 : ctx->cb_arg = cb_arg;
10218 152 : ctx->blob = blob;
10219 152 : ctx->back_bs_dev = blob->back_bs_dev;
10220 152 : ctx->abort_io = abort_io;
10221 :
10222 152 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": destroying channels for this blob\n",
10223 : blob->id);
10224 :
10225 152 : blob->bs->esnap_channels_unloading++;
10226 152 : spdk_for_each_channel(blob->bs, blob_esnap_destroy_one_channel, ctx,
10227 : blob_esnap_destroy_channels_done);
10228 : }
10229 :
10230 : /*
10231 : * Destroy all bs_dev channels on a specific blobstore channel. This should be used when a
10232 : * bs_channel is destroyed.
10233 : */
10234 : static void
10235 1029 : blob_esnap_destroy_bs_channel(struct spdk_bs_channel *ch)
10236 : {
10237 : struct blob_esnap_channel *esnap_channel, *esnap_channel_tmp;
10238 :
10239 1029 : assert(spdk_get_thread() == spdk_io_channel_get_thread(spdk_io_channel_from_ctx(ch)));
10240 :
10241 1029 : SPDK_DEBUGLOG(blob_esnap, "destroying channels on thread %s\n",
10242 : spdk_thread_get_name(spdk_get_thread()));
10243 1061 : RB_FOREACH_SAFE(esnap_channel, blob_esnap_channel_tree, &ch->esnap_channels,
10244 : esnap_channel_tmp) {
10245 32 : SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64
10246 : ": destroying one channel in thread %s\n",
10247 : esnap_channel->blob_id, spdk_thread_get_name(spdk_get_thread()));
10248 32 : RB_REMOVE(blob_esnap_channel_tree, &ch->esnap_channels, esnap_channel);
10249 32 : spdk_put_io_channel(esnap_channel->channel);
10250 32 : free(esnap_channel);
10251 : }
10252 1029 : SPDK_DEBUGLOG(blob_esnap, "done destroying channels on thread %s\n",
10253 : spdk_thread_get_name(spdk_get_thread()));
10254 1029 : }
10255 :
10256 : static void
10257 28 : blob_set_back_bs_dev_done(void *_ctx, int bserrno)
10258 : {
10259 28 : struct set_bs_dev_ctx *ctx = _ctx;
10260 :
10261 28 : if (bserrno != 0) {
10262 : /* Even though the unfreeze failed, the update may have succeed. */
10263 0 : SPDK_ERRLOG("blob 0x%" PRIx64 ": unfreeze failed with error %d\n", ctx->blob->id,
10264 : bserrno);
10265 : }
10266 28 : ctx->cb_fn(ctx->cb_arg, ctx->bserrno);
10267 28 : free(ctx);
10268 28 : }
10269 :
10270 : static void
10271 28 : blob_frozen_set_back_bs_dev(void *_ctx, struct spdk_blob *blob, int bserrno)
10272 : {
10273 28 : struct set_bs_dev_ctx *ctx = _ctx;
10274 : int rc;
10275 :
10276 28 : if (bserrno != 0) {
10277 0 : SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to release old back_bs_dev with error %d\n",
10278 : blob->id, bserrno);
10279 0 : ctx->bserrno = bserrno;
10280 0 : blob_unfreeze_io(blob, blob_set_back_bs_dev_done, ctx);
10281 0 : return;
10282 : }
10283 :
10284 28 : if (blob->back_bs_dev != NULL) {
10285 28 : blob_back_bs_dev_unref(blob);
10286 : }
10287 :
10288 28 : if (ctx->parent_refs_cb_fn) {
10289 20 : rc = ctx->parent_refs_cb_fn(blob, ctx->parent_refs_cb_arg);
10290 20 : if (rc != 0) {
10291 0 : ctx->bserrno = rc;
10292 0 : blob_unfreeze_io(blob, blob_set_back_bs_dev_done, ctx);
10293 0 : return;
10294 : }
10295 : }
10296 :
10297 28 : SPDK_NOTICELOG("blob 0x%" PRIx64 ": hotplugged back_bs_dev\n", blob->id);
10298 28 : blob->back_bs_dev = ctx->back_bs_dev;
10299 28 : ctx->bserrno = 0;
10300 :
10301 28 : blob_unfreeze_io(blob, blob_set_back_bs_dev_done, ctx);
10302 : }
10303 :
10304 : static void
10305 28 : blob_set_back_bs_dev_frozen(void *_ctx, int bserrno)
10306 : {
10307 28 : struct set_bs_dev_ctx *ctx = _ctx;
10308 28 : struct spdk_blob *blob = ctx->blob;
10309 :
10310 28 : if (bserrno != 0) {
10311 0 : SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to freeze with error %d\n", blob->id,
10312 : bserrno);
10313 0 : ctx->cb_fn(ctx->cb_arg, bserrno);
10314 0 : free(ctx);
10315 0 : return;
10316 : }
10317 :
10318 : /*
10319 : * This does not prevent future reads from the esnap device because any future IO will
10320 : * lazily create a new esnap IO channel.
10321 : */
10322 28 : blob_esnap_destroy_bs_dev_channels(blob, true, blob_frozen_set_back_bs_dev, ctx);
10323 : }
10324 :
10325 : void
10326 8 : spdk_blob_set_esnap_bs_dev(struct spdk_blob *blob, struct spdk_bs_dev *back_bs_dev,
10327 : spdk_blob_op_complete cb_fn, void *cb_arg)
10328 : {
10329 8 : if (!blob_is_esnap_clone(blob)) {
10330 0 : SPDK_ERRLOG("blob 0x%" PRIx64 ": not an esnap clone\n", blob->id);
10331 0 : cb_fn(cb_arg, -EINVAL);
10332 0 : return;
10333 : }
10334 :
10335 8 : blob_set_back_bs_dev(blob, back_bs_dev, NULL, NULL, cb_fn, cb_arg);
10336 : }
10337 :
10338 : struct spdk_bs_dev *
10339 4 : spdk_blob_get_esnap_bs_dev(const struct spdk_blob *blob)
10340 : {
10341 4 : if (!blob_is_esnap_clone(blob)) {
10342 0 : SPDK_ERRLOG("blob 0x%" PRIx64 ": not an esnap clone\n", blob->id);
10343 0 : return NULL;
10344 : }
10345 :
10346 4 : return blob->back_bs_dev;
10347 : }
10348 :
10349 : bool
10350 28 : spdk_blob_is_degraded(const struct spdk_blob *blob)
10351 : {
10352 28 : if (blob->bs->dev->is_degraded != NULL && blob->bs->dev->is_degraded(blob->bs->dev)) {
10353 4 : return true;
10354 : }
10355 24 : if (blob->back_bs_dev == NULL || blob->back_bs_dev->is_degraded == NULL) {
10356 12 : return false;
10357 : }
10358 :
10359 12 : return blob->back_bs_dev->is_degraded(blob->back_bs_dev);
10360 : }
10361 :
10362 3 : SPDK_LOG_REGISTER_COMPONENT(blob)
10363 3 : SPDK_LOG_REGISTER_COMPONENT(blob_esnap)
|