LCOV - code coverage report
Current view: top level - lib/blob - blobstore.c (source / functions) Hit Total Coverage
Test: ut_cov_unit.info Lines: 4554 5650 80.6 %
Date: 2024-12-15 10:35:47 Functions: 339 355 95.5 %

          Line data    Source code
       1             : /*   SPDX-License-Identifier: BSD-3-Clause
       2             :  *   Copyright (C) 2017 Intel Corporation.
       3             :  *   All rights reserved.
       4             :  *   Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
       5             :  */
       6             : 
       7             : #include "spdk/stdinc.h"
       8             : 
       9             : #include "spdk/blob.h"
      10             : #include "spdk/crc32.h"
      11             : #include "spdk/env.h"
      12             : #include "spdk/queue.h"
      13             : #include "spdk/thread.h"
      14             : #include "spdk/bit_array.h"
      15             : #include "spdk/bit_pool.h"
      16             : #include "spdk/likely.h"
      17             : #include "spdk/util.h"
      18             : #include "spdk/string.h"
      19             : #include "spdk/trace.h"
      20             : 
      21             : #include "spdk_internal/assert.h"
      22             : #include "spdk_internal/trace_defs.h"
      23             : #include "spdk/log.h"
      24             : 
      25             : #include "blobstore.h"
      26             : 
      27             : #define BLOB_CRC32C_INITIAL    0xffffffffUL
      28             : 
      29             : static int bs_register_md_thread(struct spdk_blob_store *bs);
      30             : static int bs_unregister_md_thread(struct spdk_blob_store *bs);
      31             : static void blob_close_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno);
      32             : static void blob_insert_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
      33             :                 uint64_t cluster, uint32_t extent, struct spdk_blob_md_page *page,
      34             :                 spdk_blob_op_complete cb_fn, void *cb_arg);
      35             : static void blob_free_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
      36             :                 uint32_t extent_page, struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg);
      37             : 
      38             : static int blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
      39             :                           uint16_t value_len, bool internal);
      40             : static int blob_get_xattr_value(struct spdk_blob *blob, const char *name,
      41             :                                 const void **value, size_t *value_len, bool internal);
      42             : static int blob_remove_xattr(struct spdk_blob *blob, const char *name, bool internal);
      43             : 
      44             : static void blob_write_extent_page(struct spdk_blob *blob, uint32_t extent, uint64_t cluster_num,
      45             :                                    struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg);
      46             : static void blob_freeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg);
      47             : 
      48             : static void bs_shallow_copy_cluster_find_next(void *cb_arg);
      49             : 
      50             : /*
      51             :  * External snapshots require a channel per thread per esnap bdev.  The tree
      52             :  * is populated lazily as blob IOs are handled by the back_bs_dev. When this
      53             :  * channel is destroyed, all the channels in the tree are destroyed.
      54             :  */
      55             : 
      56             : struct blob_esnap_channel {
      57             :         RB_ENTRY(blob_esnap_channel)    node;
      58             :         spdk_blob_id                    blob_id;
      59             :         struct spdk_io_channel          *channel;
      60             : };
      61             : 
      62             : static int blob_esnap_channel_compare(struct blob_esnap_channel *c1, struct blob_esnap_channel *c2);
      63             : static void blob_esnap_destroy_bs_dev_channels(struct spdk_blob *blob, bool abort_io,
      64             :                 spdk_blob_op_with_handle_complete cb_fn, void *cb_arg);
      65             : static void blob_esnap_destroy_bs_channel(struct spdk_bs_channel *ch);
      66             : static void blob_set_back_bs_dev_frozen(void *_ctx, int bserrno);
      67       19256 : RB_GENERATE_STATIC(blob_esnap_channel_tree, blob_esnap_channel, node, blob_esnap_channel_compare)
      68             : 
      69             : static inline bool
      70       68456 : blob_is_esnap_clone(const struct spdk_blob *blob)
      71             : {
      72       68456 :         assert(blob != NULL);
      73       68456 :         return !!(blob->invalid_flags & SPDK_BLOB_EXTERNAL_SNAPSHOT);
      74             : }
      75             : 
      76             : static int
      77        2875 : blob_id_cmp(struct spdk_blob *blob1, struct spdk_blob *blob2)
      78             : {
      79        2875 :         assert(blob1 != NULL && blob2 != NULL);
      80        2875 :         return (blob1->id < blob2->id ? -1 : blob1->id > blob2->id);
      81             : }
      82             : 
      83       20559 : RB_GENERATE_STATIC(spdk_blob_tree, spdk_blob, link, blob_id_cmp);
      84             : 
      85             : static void
      86       46177 : blob_verify_md_op(struct spdk_blob *blob)
      87             : {
      88       46177 :         assert(blob != NULL);
      89       46177 :         assert(spdk_get_thread() == blob->bs->md_thread);
      90       46177 :         assert(blob->state != SPDK_BLOB_STATE_LOADING);
      91       46177 : }
      92             : 
      93             : static struct spdk_blob_list *
      94        4783 : bs_get_snapshot_entry(struct spdk_blob_store *bs, spdk_blob_id blobid)
      95             : {
      96        4783 :         struct spdk_blob_list *snapshot_entry = NULL;
      97             : 
      98        6018 :         TAILQ_FOREACH(snapshot_entry, &bs->snapshots, link) {
      99        2207 :                 if (snapshot_entry->id == blobid) {
     100         972 :                         break;
     101             :                 }
     102        1235 :         }
     103             : 
     104        4783 :         return snapshot_entry;
     105             : }
     106             : 
     107             : static void
     108        3807 : bs_claim_md_page(struct spdk_blob_store *bs, uint32_t page)
     109             : {
     110        3807 :         assert(spdk_spin_held(&bs->used_lock));
     111        3807 :         assert(page < spdk_bit_array_capacity(bs->used_md_pages));
     112        3807 :         assert(spdk_bit_array_get(bs->used_md_pages, page) == false);
     113             : 
     114        3807 :         spdk_bit_array_set(bs->used_md_pages, page);
     115        3807 : }
     116             : 
     117             : static void
     118        2901 : bs_release_md_page(struct spdk_blob_store *bs, uint32_t page)
     119             : {
     120        2901 :         assert(spdk_spin_held(&bs->used_lock));
     121        2901 :         assert(page < spdk_bit_array_capacity(bs->used_md_pages));
     122        2901 :         assert(spdk_bit_array_get(bs->used_md_pages, page) == true);
     123             : 
     124        2901 :         spdk_bit_array_clear(bs->used_md_pages, page);
     125        2901 : }
     126             : 
     127             : static uint32_t
     128       10283 : bs_claim_cluster(struct spdk_blob_store *bs)
     129             : {
     130             :         uint32_t cluster_num;
     131             : 
     132       10283 :         assert(spdk_spin_held(&bs->used_lock));
     133             : 
     134       10283 :         cluster_num = spdk_bit_pool_allocate_bit(bs->used_clusters);
     135       10283 :         if (cluster_num == UINT32_MAX) {
     136           0 :                 return UINT32_MAX;
     137             :         }
     138             : 
     139       10283 :         SPDK_DEBUGLOG(blob, "Claiming cluster %u\n", cluster_num);
     140       10283 :         bs->num_free_clusters--;
     141             : 
     142       10283 :         return cluster_num;
     143       10283 : }
     144             : 
     145             : static void
     146        2996 : bs_release_cluster(struct spdk_blob_store *bs, uint32_t cluster_num)
     147             : {
     148        2996 :         assert(spdk_spin_held(&bs->used_lock));
     149        2996 :         assert(cluster_num < spdk_bit_pool_capacity(bs->used_clusters));
     150        2996 :         assert(spdk_bit_pool_is_allocated(bs->used_clusters, cluster_num) == true);
     151        2996 :         assert(bs->num_free_clusters < bs->total_clusters);
     152             : 
     153        2996 :         SPDK_DEBUGLOG(blob, "Releasing cluster %u\n", cluster_num);
     154             : 
     155        2996 :         spdk_bit_pool_free_bit(bs->used_clusters, cluster_num);
     156        2996 :         bs->num_free_clusters++;
     157        2996 : }
     158             : 
     159             : static int
     160       10283 : blob_insert_cluster(struct spdk_blob *blob, uint32_t cluster_num, uint64_t cluster)
     161             : {
     162       10283 :         uint64_t *cluster_lba = &blob->active.clusters[cluster_num];
     163             : 
     164       10283 :         blob_verify_md_op(blob);
     165             : 
     166       10283 :         if (*cluster_lba != 0) {
     167           5 :                 return -EEXIST;
     168             :         }
     169             : 
     170       10278 :         *cluster_lba = bs_cluster_to_lba(blob->bs, cluster);
     171       10278 :         blob->active.num_allocated_clusters++;
     172             : 
     173       10278 :         return 0;
     174       10283 : }
     175             : 
     176             : static int
     177       10283 : bs_allocate_cluster(struct spdk_blob *blob, uint32_t cluster_num,
     178             :                     uint64_t *cluster, uint32_t *lowest_free_md_page, bool update_map)
     179             : {
     180       10283 :         uint32_t *extent_page = 0;
     181             : 
     182       10283 :         assert(spdk_spin_held(&blob->bs->used_lock));
     183             : 
     184       10283 :         *cluster = bs_claim_cluster(blob->bs);
     185       10283 :         if (*cluster == UINT32_MAX) {
     186             :                 /* No more free clusters. Cannot satisfy the request */
     187           0 :                 return -ENOSPC;
     188             :         }
     189             : 
     190       10283 :         if (blob->use_extent_table) {
     191        6227 :                 extent_page = bs_cluster_to_extent_page(blob, cluster_num);
     192        6227 :                 if (*extent_page == 0) {
     193             :                         /* Extent page shall never occupy md_page so start the search from 1 */
     194        1087 :                         if (*lowest_free_md_page == 0) {
     195        1084 :                                 *lowest_free_md_page = 1;
     196        1084 :                         }
     197             :                         /* No extent_page is allocated for the cluster */
     198        2174 :                         *lowest_free_md_page = spdk_bit_array_find_first_clear(blob->bs->used_md_pages,
     199        1087 :                                                *lowest_free_md_page);
     200        1087 :                         if (*lowest_free_md_page == UINT32_MAX) {
     201             :                                 /* No more free md pages. Cannot satisfy the request */
     202           0 :                                 bs_release_cluster(blob->bs, *cluster);
     203           0 :                                 return -ENOSPC;
     204             :                         }
     205        1087 :                         bs_claim_md_page(blob->bs, *lowest_free_md_page);
     206        1087 :                 }
     207        6227 :         }
     208             : 
     209       10283 :         SPDK_DEBUGLOG(blob, "Claiming cluster %" PRIu64 " for blob 0x%" PRIx64 "\n", *cluster,
     210             :                       blob->id);
     211             : 
     212       10283 :         if (update_map) {
     213        9253 :                 blob_insert_cluster(blob, cluster_num, *cluster);
     214        9253 :                 if (blob->use_extent_table && *extent_page == 0) {
     215         958 :                         *extent_page = *lowest_free_md_page;
     216         958 :                 }
     217        9253 :         }
     218             : 
     219       10283 :         return 0;
     220       10283 : }
     221             : 
     222             : static void
     223        6977 : blob_xattrs_init(struct spdk_blob_xattr_opts *xattrs)
     224             : {
     225        6977 :         xattrs->count = 0;
     226        6977 :         xattrs->names = NULL;
     227        6977 :         xattrs->ctx = NULL;
     228        6977 :         xattrs->get_value = NULL;
     229        6977 : }
     230             : 
     231             : void
     232        4611 : spdk_blob_opts_init(struct spdk_blob_opts *opts, size_t opts_size)
     233             : {
     234        4611 :         if (!opts) {
     235           0 :                 SPDK_ERRLOG("opts should not be NULL\n");
     236           0 :                 return;
     237             :         }
     238             : 
     239        4611 :         if (!opts_size) {
     240           0 :                 SPDK_ERRLOG("opts_size should not be zero value\n");
     241           0 :                 return;
     242             :         }
     243             : 
     244        4611 :         memset(opts, 0, opts_size);
     245        4611 :         opts->opts_size = opts_size;
     246             : 
     247             : #define FIELD_OK(field) \
     248             :         offsetof(struct spdk_blob_opts, field) + sizeof(opts->field) <= opts_size
     249             : 
     250             : #define SET_FIELD(field, value) \
     251             :         if (FIELD_OK(field)) { \
     252             :                 opts->field = value; \
     253             :         } \
     254             : 
     255        4611 :         SET_FIELD(num_clusters, 0);
     256        4611 :         SET_FIELD(thin_provision, false);
     257        4611 :         SET_FIELD(clear_method, BLOB_CLEAR_WITH_DEFAULT);
     258             : 
     259        4611 :         if (FIELD_OK(xattrs)) {
     260        4611 :                 blob_xattrs_init(&opts->xattrs);
     261        4611 :         }
     262             : 
     263        4611 :         SET_FIELD(use_extent_table, true);
     264             : 
     265             : #undef FIELD_OK
     266             : #undef SET_FIELD
     267        4611 : }
     268             : 
     269             : void
     270        4346 : spdk_blob_open_opts_init(struct spdk_blob_open_opts *opts, size_t opts_size)
     271             : {
     272        4346 :         if (!opts) {
     273           0 :                 SPDK_ERRLOG("opts should not be NULL\n");
     274           0 :                 return;
     275             :         }
     276             : 
     277        4346 :         if (!opts_size) {
     278           0 :                 SPDK_ERRLOG("opts_size should not be zero value\n");
     279           0 :                 return;
     280             :         }
     281             : 
     282        4346 :         memset(opts, 0, opts_size);
     283        4346 :         opts->opts_size = opts_size;
     284             : 
     285             : #define FIELD_OK(field) \
     286             :         offsetof(struct spdk_blob_open_opts, field) + sizeof(opts->field) <= opts_size
     287             : 
     288             : #define SET_FIELD(field, value) \
     289             :         if (FIELD_OK(field)) { \
     290             :                 opts->field = value; \
     291             :         } \
     292             : 
     293        4346 :         SET_FIELD(clear_method, BLOB_CLEAR_WITH_DEFAULT);
     294             : 
     295             : #undef FIELD_OK
     296             : #undef SET_FILED
     297        4346 : }
     298             : 
     299             : static struct spdk_blob *
     300        6707 : blob_alloc(struct spdk_blob_store *bs, spdk_blob_id id)
     301             : {
     302             :         struct spdk_blob *blob;
     303             : 
     304        6707 :         blob = calloc(1, sizeof(*blob));
     305        6707 :         if (!blob) {
     306           0 :                 return NULL;
     307             :         }
     308             : 
     309        6707 :         blob->id = id;
     310        6707 :         blob->bs = bs;
     311             : 
     312        6707 :         blob->parent_id = SPDK_BLOBID_INVALID;
     313             : 
     314        6707 :         blob->state = SPDK_BLOB_STATE_DIRTY;
     315        6707 :         blob->extent_rle_found = false;
     316        6707 :         blob->extent_table_found = false;
     317        6707 :         blob->active.num_pages = 1;
     318        6707 :         blob->active.pages = calloc(1, sizeof(*blob->active.pages));
     319        6707 :         if (!blob->active.pages) {
     320           0 :                 free(blob);
     321           0 :                 return NULL;
     322             :         }
     323             : 
     324        6707 :         blob->active.pages[0] = bs_blobid_to_page(id);
     325             : 
     326        6707 :         TAILQ_INIT(&blob->xattrs);
     327        6707 :         TAILQ_INIT(&blob->xattrs_internal);
     328        6707 :         TAILQ_INIT(&blob->pending_persists);
     329        6707 :         TAILQ_INIT(&blob->persists_to_complete);
     330             : 
     331        6707 :         return blob;
     332        6707 : }
     333             : 
     334             : static void
     335       13414 : xattrs_free(struct spdk_xattr_tailq *xattrs)
     336             : {
     337             :         struct spdk_xattr       *xattr, *xattr_tmp;
     338             : 
     339       15601 :         TAILQ_FOREACH_SAFE(xattr, xattrs, link, xattr_tmp) {
     340        2187 :                 TAILQ_REMOVE(xattrs, xattr, link);
     341        2187 :                 free(xattr->name);
     342        2187 :                 free(xattr->value);
     343        2187 :                 free(xattr);
     344        2187 :         }
     345       13414 : }
     346             : 
     347             : static void
     348        1398 : blob_unref_back_bs_dev(struct spdk_blob *blob)
     349             : {
     350        1398 :         blob->back_bs_dev->destroy(blob->back_bs_dev);
     351        1398 :         blob->back_bs_dev = NULL;
     352        1398 : }
     353             : 
     354             : static void
     355        6707 : blob_free(struct spdk_blob *blob)
     356             : {
     357        6707 :         assert(blob != NULL);
     358        6707 :         assert(TAILQ_EMPTY(&blob->pending_persists));
     359        6707 :         assert(TAILQ_EMPTY(&blob->persists_to_complete));
     360             : 
     361        6707 :         free(blob->active.extent_pages);
     362        6707 :         free(blob->clean.extent_pages);
     363        6707 :         free(blob->active.clusters);
     364        6707 :         free(blob->clean.clusters);
     365        6707 :         free(blob->active.pages);
     366        6707 :         free(blob->clean.pages);
     367             : 
     368        6707 :         xattrs_free(&blob->xattrs);
     369        6707 :         xattrs_free(&blob->xattrs_internal);
     370             : 
     371        6707 :         if (blob->back_bs_dev) {
     372        1363 :                 blob_unref_back_bs_dev(blob);
     373        1363 :         }
     374             : 
     375        6707 :         free(blob);
     376        6707 : }
     377             : 
     378             : static void
     379         406 : blob_back_bs_destroy_esnap_done(void *ctx, struct spdk_blob *blob, int bserrno)
     380             : {
     381         406 :         struct spdk_bs_dev      *bs_dev = ctx;
     382             : 
     383         406 :         if (bserrno != 0) {
     384             :                 /*
     385             :                  * This is probably due to a memory allocation failure when creating the
     386             :                  * blob_esnap_destroy_ctx before iterating threads.
     387             :                  */
     388           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": Unable to destroy bs dev channels: error %d\n",
     389             :                             blob->id, bserrno);
     390           0 :                 assert(false);
     391             :         }
     392             : 
     393         406 :         if (bs_dev == NULL) {
     394             :                 /*
     395             :                  * This check exists to make scanbuild happy.
     396             :                  *
     397             :                  * blob->back_bs_dev for an esnap is NULL during the first iteration of blobs while
     398             :                  * the blobstore is being loaded. It could also be NULL if there was an error
     399             :                  * opening the esnap device. In each of these cases, no channels could have been
     400             :                  * created because back_bs_dev->create_channel() would have led to a NULL pointer
     401             :                  * deref.
     402             :                  */
     403           0 :                 assert(false);
     404             :                 return;
     405             :         }
     406             : 
     407         406 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": calling destroy on back_bs_dev\n", blob->id);
     408         406 :         bs_dev->destroy(bs_dev);
     409         406 : }
     410             : 
     411             : static void
     412         406 : blob_back_bs_destroy(struct spdk_blob *blob)
     413             : {
     414         406 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": preparing to destroy back_bs_dev\n",
     415             :                       blob->id);
     416             : 
     417         812 :         blob_esnap_destroy_bs_dev_channels(blob, false, blob_back_bs_destroy_esnap_done,
     418         406 :                                            blob->back_bs_dev);
     419         406 :         blob->back_bs_dev = NULL;
     420         406 : }
     421             : 
     422             : struct blob_parent {
     423             :         union {
     424             :                 struct {
     425             :                         spdk_blob_id id;
     426             :                         struct spdk_blob *blob;
     427             :                 } snapshot;
     428             : 
     429             :                 struct {
     430             :                         void *id;
     431             :                         uint32_t id_len;
     432             :                         struct spdk_bs_dev *back_bs_dev;
     433             :                 } esnap;
     434             :         } u;
     435             : };
     436             : 
     437             : typedef int (*set_parent_refs_cb)(struct spdk_blob *blob, struct blob_parent *parent);
     438             : 
     439             : struct set_bs_dev_ctx {
     440             :         struct spdk_blob        *blob;
     441             :         struct spdk_bs_dev      *back_bs_dev;
     442             : 
     443             :         /*
     444             :          * This callback is used during a set parent operation to change the references
     445             :          * to the parent of the blob.
     446             :          */
     447             :         set_parent_refs_cb      parent_refs_cb_fn;
     448             :         struct blob_parent      *parent_refs_cb_arg;
     449             : 
     450             :         spdk_blob_op_complete   cb_fn;
     451             :         void                    *cb_arg;
     452             :         int                     bserrno;
     453             : };
     454             : 
     455             : static void
     456          35 : blob_set_back_bs_dev(struct spdk_blob *blob, struct spdk_bs_dev *back_bs_dev,
     457             :                      set_parent_refs_cb parent_refs_cb_fn, struct blob_parent *parent_refs_cb_arg,
     458             :                      spdk_blob_op_complete cb_fn, void *cb_arg)
     459             : {
     460             :         struct set_bs_dev_ctx   *ctx;
     461             : 
     462          35 :         ctx = calloc(1, sizeof(*ctx));
     463          35 :         if (ctx == NULL) {
     464           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": out of memory while setting back_bs_dev\n",
     465             :                             blob->id);
     466           0 :                 cb_fn(cb_arg, -ENOMEM);
     467           0 :                 return;
     468             :         }
     469             : 
     470          35 :         ctx->parent_refs_cb_fn = parent_refs_cb_fn;
     471          35 :         ctx->parent_refs_cb_arg = parent_refs_cb_arg;
     472          35 :         ctx->cb_fn = cb_fn;
     473          35 :         ctx->cb_arg = cb_arg;
     474          35 :         ctx->back_bs_dev = back_bs_dev;
     475          35 :         ctx->blob = blob;
     476             : 
     477          35 :         blob_freeze_io(blob, blob_set_back_bs_dev_frozen, ctx);
     478          35 : }
     479             : 
     480             : struct freeze_io_ctx {
     481             :         struct spdk_bs_cpl cpl;
     482             :         struct spdk_blob *blob;
     483             : };
     484             : 
     485             : static void
     486         663 : blob_io_sync(struct spdk_io_channel_iter *i)
     487             : {
     488         663 :         spdk_for_each_channel_continue(i, 0);
     489         663 : }
     490             : 
     491             : static void
     492         648 : blob_execute_queued_io(struct spdk_io_channel_iter *i)
     493             : {
     494         648 :         struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
     495         648 :         struct spdk_bs_channel *ch = spdk_io_channel_get_ctx(_ch);
     496         648 :         struct freeze_io_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
     497             :         struct spdk_bs_request_set      *set;
     498             :         struct spdk_bs_user_op_args     *args;
     499             :         spdk_bs_user_op_t *op, *tmp;
     500             : 
     501         653 :         TAILQ_FOREACH_SAFE(op, &ch->queued_io, link, tmp) {
     502           5 :                 set = (struct spdk_bs_request_set *)op;
     503           5 :                 args = &set->u.user_op;
     504             : 
     505           5 :                 if (args->blob == ctx->blob) {
     506           5 :                         TAILQ_REMOVE(&ch->queued_io, op, link);
     507           5 :                         bs_user_op_execute(op);
     508           5 :                 }
     509           5 :         }
     510             : 
     511         648 :         spdk_for_each_channel_continue(i, 0);
     512         648 : }
     513             : 
     514             : static void
     515        1271 : blob_io_cpl(struct spdk_io_channel_iter *i, int status)
     516             : {
     517        1271 :         struct freeze_io_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
     518             : 
     519        1271 :         ctx->cpl.u.blob_basic.cb_fn(ctx->cpl.u.blob_basic.cb_arg, 0);
     520             : 
     521        1271 :         free(ctx);
     522        1271 : }
     523             : 
     524             : static void
     525         643 : blob_freeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
     526             : {
     527             :         struct freeze_io_ctx *ctx;
     528             : 
     529         643 :         blob_verify_md_op(blob);
     530             : 
     531         643 :         ctx = calloc(1, sizeof(*ctx));
     532         643 :         if (!ctx) {
     533           0 :                 cb_fn(cb_arg, -ENOMEM);
     534           0 :                 return;
     535             :         }
     536             : 
     537         643 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
     538         643 :         ctx->cpl.u.blob_basic.cb_fn = cb_fn;
     539         643 :         ctx->cpl.u.blob_basic.cb_arg = cb_arg;
     540         643 :         ctx->blob = blob;
     541             : 
     542             :         /* Freeze I/O on blob */
     543         643 :         blob->frozen_refcnt++;
     544             : 
     545         643 :         spdk_for_each_channel(blob->bs, blob_io_sync, ctx, blob_io_cpl);
     546         643 : }
     547             : 
     548             : static void
     549         628 : blob_unfreeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
     550             : {
     551             :         struct freeze_io_ctx *ctx;
     552             : 
     553         628 :         blob_verify_md_op(blob);
     554             : 
     555         628 :         ctx = calloc(1, sizeof(*ctx));
     556         628 :         if (!ctx) {
     557           0 :                 cb_fn(cb_arg, -ENOMEM);
     558           0 :                 return;
     559             :         }
     560             : 
     561         628 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
     562         628 :         ctx->cpl.u.blob_basic.cb_fn = cb_fn;
     563         628 :         ctx->cpl.u.blob_basic.cb_arg = cb_arg;
     564         628 :         ctx->blob = blob;
     565             : 
     566         628 :         assert(blob->frozen_refcnt > 0);
     567             : 
     568         628 :         blob->frozen_refcnt--;
     569             : 
     570         628 :         spdk_for_each_channel(blob->bs, blob_execute_queued_io, ctx, blob_io_cpl);
     571         628 : }
     572             : 
     573             : static int
     574       10498 : blob_mark_clean(struct spdk_blob *blob)
     575             : {
     576       10498 :         uint32_t *extent_pages = NULL;
     577       10498 :         uint64_t *clusters = NULL;
     578       10498 :         uint32_t *pages = NULL;
     579             : 
     580       10498 :         assert(blob != NULL);
     581             : 
     582       10498 :         if (blob->active.num_extent_pages) {
     583        4258 :                 assert(blob->active.extent_pages);
     584        4258 :                 extent_pages = calloc(blob->active.num_extent_pages, sizeof(*blob->active.extent_pages));
     585        4258 :                 if (!extent_pages) {
     586           0 :                         return -ENOMEM;
     587             :                 }
     588        8516 :                 memcpy(extent_pages, blob->active.extent_pages,
     589        4258 :                        blob->active.num_extent_pages * sizeof(*extent_pages));
     590        4258 :         }
     591             : 
     592       10498 :         if (blob->active.num_clusters) {
     593        7349 :                 assert(blob->active.clusters);
     594        7349 :                 clusters = calloc(blob->active.num_clusters, sizeof(*blob->active.clusters));
     595        7349 :                 if (!clusters) {
     596           0 :                         free(extent_pages);
     597           0 :                         return -ENOMEM;
     598             :                 }
     599        7349 :                 memcpy(clusters, blob->active.clusters, blob->active.num_clusters * sizeof(*blob->active.clusters));
     600        7349 :         }
     601             : 
     602       10498 :         if (blob->active.num_pages) {
     603        8641 :                 assert(blob->active.pages);
     604        8641 :                 pages = calloc(blob->active.num_pages, sizeof(*blob->active.pages));
     605        8641 :                 if (!pages) {
     606           0 :                         free(extent_pages);
     607           0 :                         free(clusters);
     608           0 :                         return -ENOMEM;
     609             :                 }
     610        8641 :                 memcpy(pages, blob->active.pages, blob->active.num_pages * sizeof(*blob->active.pages));
     611        8641 :         }
     612             : 
     613       10498 :         free(blob->clean.extent_pages);
     614       10498 :         free(blob->clean.clusters);
     615       10498 :         free(blob->clean.pages);
     616             : 
     617       10498 :         blob->clean.num_extent_pages = blob->active.num_extent_pages;
     618       10498 :         blob->clean.extent_pages = blob->active.extent_pages;
     619       10498 :         blob->clean.num_clusters = blob->active.num_clusters;
     620       10498 :         blob->clean.clusters = blob->active.clusters;
     621       10498 :         blob->clean.num_allocated_clusters = blob->active.num_allocated_clusters;
     622       10498 :         blob->clean.num_pages = blob->active.num_pages;
     623       10498 :         blob->clean.pages = blob->active.pages;
     624             : 
     625       10498 :         blob->active.extent_pages = extent_pages;
     626       10498 :         blob->active.clusters = clusters;
     627       10498 :         blob->active.pages = pages;
     628             : 
     629             :         /* If the metadata was dirtied again while the metadata was being written to disk,
     630             :          *  we do not want to revert the DIRTY state back to CLEAN here.
     631             :          */
     632       10498 :         if (blob->state == SPDK_BLOB_STATE_LOADING) {
     633        4259 :                 blob->state = SPDK_BLOB_STATE_CLEAN;
     634        4259 :         }
     635             : 
     636       10498 :         return 0;
     637       10498 : }
     638             : 
     639             : static int
     640        1592 : blob_deserialize_xattr(struct spdk_blob *blob,
     641             :                        struct spdk_blob_md_descriptor_xattr *desc_xattr, bool internal)
     642             : {
     643             :         struct spdk_xattr                       *xattr;
     644             : 
     645        3184 :         if (desc_xattr->length != sizeof(desc_xattr->name_length) +
     646        1592 :             sizeof(desc_xattr->value_length) +
     647        3184 :             desc_xattr->name_length + desc_xattr->value_length) {
     648           0 :                 return -EINVAL;
     649             :         }
     650             : 
     651        1592 :         xattr = calloc(1, sizeof(*xattr));
     652        1592 :         if (xattr == NULL) {
     653           0 :                 return -ENOMEM;
     654             :         }
     655             : 
     656        1592 :         xattr->name = malloc(desc_xattr->name_length + 1);
     657        1592 :         if (xattr->name == NULL) {
     658           0 :                 free(xattr);
     659           0 :                 return -ENOMEM;
     660             :         }
     661             : 
     662        1592 :         xattr->value = malloc(desc_xattr->value_length);
     663        1592 :         if (xattr->value == NULL) {
     664           0 :                 free(xattr->name);
     665           0 :                 free(xattr);
     666           0 :                 return -ENOMEM;
     667             :         }
     668             : 
     669        1592 :         memcpy(xattr->name, desc_xattr->name, desc_xattr->name_length);
     670        1592 :         xattr->name[desc_xattr->name_length] = '\0';
     671        1592 :         xattr->value_len = desc_xattr->value_length;
     672        3184 :         memcpy(xattr->value,
     673        1592 :                (void *)((uintptr_t)desc_xattr->name + desc_xattr->name_length),
     674        1592 :                desc_xattr->value_length);
     675             : 
     676        1592 :         TAILQ_INSERT_TAIL(internal ? &blob->xattrs_internal : &blob->xattrs, xattr, link);
     677             : 
     678        1592 :         return 0;
     679        1592 : }
     680             : 
     681             : 
     682             : static int
     683        5980 : blob_parse_page(const struct spdk_blob_md_page *page, struct spdk_blob *blob)
     684             : {
     685             :         struct spdk_blob_md_descriptor *desc;
     686        5980 :         size_t  cur_desc = 0;
     687             :         void *tmp;
     688             : 
     689        5980 :         desc = (struct spdk_blob_md_descriptor *)page->descriptors;
     690       17389 :         while (cur_desc < sizeof(page->descriptors)) {
     691       17389 :                 if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
     692        5920 :                         if (desc->length == 0) {
     693             :                                 /* If padding and length are 0, this terminates the page */
     694        5920 :                                 break;
     695             :                         }
     696       11469 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
     697             :                         struct spdk_blob_md_descriptor_flags    *desc_flags;
     698             : 
     699        4301 :                         desc_flags = (struct spdk_blob_md_descriptor_flags *)desc;
     700             : 
     701        4301 :                         if (desc_flags->length != sizeof(*desc_flags) - sizeof(*desc)) {
     702           0 :                                 return -EINVAL;
     703             :                         }
     704             : 
     705        4301 :                         if ((desc_flags->invalid_flags | SPDK_BLOB_INVALID_FLAGS_MASK) !=
     706             :                             SPDK_BLOB_INVALID_FLAGS_MASK) {
     707          10 :                                 return -EINVAL;
     708             :                         }
     709             : 
     710        4291 :                         if ((desc_flags->data_ro_flags | SPDK_BLOB_DATA_RO_FLAGS_MASK) !=
     711             :                             SPDK_BLOB_DATA_RO_FLAGS_MASK) {
     712          15 :                                 blob->data_ro = true;
     713          15 :                                 blob->md_ro = true;
     714          15 :                         }
     715             : 
     716        4291 :                         if ((desc_flags->md_ro_flags | SPDK_BLOB_MD_RO_FLAGS_MASK) !=
     717             :                             SPDK_BLOB_MD_RO_FLAGS_MASK) {
     718          15 :                                 blob->md_ro = true;
     719          15 :                         }
     720             : 
     721        4291 :                         if ((desc_flags->data_ro_flags & SPDK_BLOB_READ_ONLY)) {
     722         712 :                                 blob->data_ro = true;
     723         712 :                                 blob->md_ro = true;
     724         712 :                         }
     725             : 
     726        4291 :                         blob->invalid_flags = desc_flags->invalid_flags;
     727        4291 :                         blob->data_ro_flags = desc_flags->data_ro_flags;
     728        4291 :                         blob->md_ro_flags = desc_flags->md_ro_flags;
     729             : 
     730       11459 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE) {
     731             :                         struct spdk_blob_md_descriptor_extent_rle       *desc_extent_rle;
     732             :                         unsigned int                            i, j;
     733        1396 :                         unsigned int                            cluster_count = blob->active.num_clusters;
     734             : 
     735        1396 :                         if (blob->extent_table_found) {
     736             :                                 /* Extent Table already present in the md,
     737             :                                  * both descriptors should never be at the same time. */
     738           0 :                                 return -EINVAL;
     739             :                         }
     740        1396 :                         blob->extent_rle_found = true;
     741             : 
     742        1396 :                         desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)desc;
     743             : 
     744        1396 :                         if (desc_extent_rle->length == 0 ||
     745        1396 :                             (desc_extent_rle->length % sizeof(desc_extent_rle->extents[0]) != 0)) {
     746           0 :                                 return -EINVAL;
     747             :                         }
     748             : 
     749        2968 :                         for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
     750       21240 :                                 for (j = 0; j < desc_extent_rle->extents[i].length; j++) {
     751       19668 :                                         if (desc_extent_rle->extents[i].cluster_idx != 0) {
     752       13384 :                                                 if (!spdk_bit_pool_is_allocated(blob->bs->used_clusters,
     753        6692 :                                                                                 desc_extent_rle->extents[i].cluster_idx + j)) {
     754           0 :                                                         return -EINVAL;
     755             :                                                 }
     756        6692 :                                         }
     757       19668 :                                         cluster_count++;
     758       19668 :                                 }
     759        1572 :                         }
     760             : 
     761        1396 :                         if (cluster_count == 0) {
     762           0 :                                 return -EINVAL;
     763             :                         }
     764        1396 :                         tmp = realloc(blob->active.clusters, cluster_count * sizeof(*blob->active.clusters));
     765        1396 :                         if (tmp == NULL) {
     766           0 :                                 return -ENOMEM;
     767             :                         }
     768        1396 :                         blob->active.clusters = tmp;
     769        1396 :                         blob->active.cluster_array_size = cluster_count;
     770             : 
     771        2968 :                         for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
     772       21240 :                                 for (j = 0; j < desc_extent_rle->extents[i].length; j++) {
     773       19668 :                                         if (desc_extent_rle->extents[i].cluster_idx != 0) {
     774       13384 :                                                 blob->active.clusters[blob->active.num_clusters++] = bs_cluster_to_lba(blob->bs,
     775        6692 :                                                                 desc_extent_rle->extents[i].cluster_idx + j);
     776        6692 :                                                 blob->active.num_allocated_clusters++;
     777       19668 :                                         } else if (spdk_blob_is_thin_provisioned(blob)) {
     778       12976 :                                                 blob->active.clusters[blob->active.num_clusters++] = 0;
     779       12976 :                                         } else {
     780           0 :                                                 return -EINVAL;
     781             :                                         }
     782       19668 :                                 }
     783        1572 :                         }
     784        7168 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
     785             :                         struct spdk_blob_md_descriptor_extent_table *desc_extent_table;
     786        2624 :                         uint32_t num_extent_pages = blob->active.num_extent_pages;
     787             :                         uint32_t i, j;
     788             :                         size_t extent_pages_length;
     789             : 
     790        2624 :                         desc_extent_table = (struct spdk_blob_md_descriptor_extent_table *)desc;
     791        2624 :                         extent_pages_length = desc_extent_table->length - sizeof(desc_extent_table->num_clusters);
     792             : 
     793        2624 :                         if (blob->extent_rle_found) {
     794             :                                 /* This means that Extent RLE is present in MD,
     795             :                                  * both should never be at the same time. */
     796           0 :                                 return -EINVAL;
     797        2624 :                         } else if (blob->extent_table_found &&
     798           0 :                                    desc_extent_table->num_clusters != blob->remaining_clusters_in_et) {
     799             :                                 /* Number of clusters in this ET does not match number
     800             :                                  * from previously read EXTENT_TABLE. */
     801           0 :                                 return -EINVAL;
     802             :                         }
     803             : 
     804        2624 :                         if (desc_extent_table->length == 0 ||
     805        2624 :                             (extent_pages_length % sizeof(desc_extent_table->extent_page[0]) != 0)) {
     806           0 :                                 return -EINVAL;
     807             :                         }
     808             : 
     809        2624 :                         blob->extent_table_found = true;
     810             : 
     811        4825 :                         for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
     812        2201 :                                 num_extent_pages += desc_extent_table->extent_page[i].num_pages;
     813        2201 :                         }
     814             : 
     815        2624 :                         if (num_extent_pages > 0) {
     816        2177 :                                 tmp = realloc(blob->active.extent_pages, num_extent_pages * sizeof(uint32_t));
     817        2177 :                                 if (tmp == NULL) {
     818           0 :                                         return -ENOMEM;
     819             :                                 }
     820        2177 :                                 blob->active.extent_pages = tmp;
     821        2177 :                         }
     822        2624 :                         blob->active.extent_pages_array_size = num_extent_pages;
     823             : 
     824        2624 :                         blob->remaining_clusters_in_et = desc_extent_table->num_clusters;
     825             : 
     826             :                         /* Extent table entries contain md page numbers for extent pages.
     827             :                          * Zeroes represent unallocated extent pages, those are run-length-encoded.
     828             :                          */
     829        4825 :                         for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
     830        2201 :                                 if (desc_extent_table->extent_page[i].page_idx != 0) {
     831        1565 :                                         assert(desc_extent_table->extent_page[i].num_pages == 1);
     832        1565 :                                         blob->active.extent_pages[blob->active.num_extent_pages++] =
     833        1565 :                                                 desc_extent_table->extent_page[i].page_idx;
     834        2201 :                                 } else if (spdk_blob_is_thin_provisioned(blob)) {
     835        1272 :                                         for (j = 0; j < desc_extent_table->extent_page[i].num_pages; j++) {
     836         636 :                                                 blob->active.extent_pages[blob->active.num_extent_pages++] = 0;
     837         636 :                                         }
     838         636 :                                 } else {
     839           0 :                                         return -EINVAL;
     840             :                                 }
     841        2201 :                         }
     842        5772 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
     843             :                         struct spdk_blob_md_descriptor_extent_page      *desc_extent;
     844             :                         unsigned int                                    i;
     845        1556 :                         unsigned int                                    cluster_count = 0;
     846             :                         size_t                                          cluster_idx_length;
     847             : 
     848        1556 :                         if (blob->extent_rle_found) {
     849             :                                 /* This means that Extent RLE is present in MD,
     850             :                                  * both should never be at the same time. */
     851           0 :                                 return -EINVAL;
     852             :                         }
     853             : 
     854        1556 :                         desc_extent = (struct spdk_blob_md_descriptor_extent_page *)desc;
     855        1556 :                         cluster_idx_length = desc_extent->length - sizeof(desc_extent->start_cluster_idx);
     856             : 
     857        1556 :                         if (desc_extent->length <= sizeof(desc_extent->start_cluster_idx) ||
     858        1556 :                             (cluster_idx_length % sizeof(desc_extent->cluster_idx[0]) != 0)) {
     859           0 :                                 return -EINVAL;
     860             :                         }
     861             : 
     862       24472 :                         for (i = 0; i < cluster_idx_length / sizeof(desc_extent->cluster_idx[0]); i++) {
     863       22916 :                                 if (desc_extent->cluster_idx[i] != 0) {
     864       10415 :                                         if (!spdk_bit_pool_is_allocated(blob->bs->used_clusters, desc_extent->cluster_idx[i])) {
     865           0 :                                                 return -EINVAL;
     866             :                                         }
     867       10415 :                                 }
     868       22916 :                                 cluster_count++;
     869       22916 :                         }
     870             : 
     871        1556 :                         if (cluster_count == 0) {
     872           0 :                                 return -EINVAL;
     873             :                         }
     874             : 
     875             :                         /* When reading extent pages sequentially starting cluster idx should match
     876             :                          * current size of a blob.
     877             :                          * If changed to batch reading, this check shall be removed. */
     878        1556 :                         if (desc_extent->start_cluster_idx != blob->active.num_clusters) {
     879           0 :                                 return -EINVAL;
     880             :                         }
     881             : 
     882        3112 :                         tmp = realloc(blob->active.clusters,
     883        1556 :                                       (cluster_count + blob->active.num_clusters) * sizeof(*blob->active.clusters));
     884        1556 :                         if (tmp == NULL) {
     885           0 :                                 return -ENOMEM;
     886             :                         }
     887        1556 :                         blob->active.clusters = tmp;
     888        1556 :                         blob->active.cluster_array_size = (cluster_count + blob->active.num_clusters);
     889             : 
     890       24472 :                         for (i = 0; i < cluster_idx_length / sizeof(desc_extent->cluster_idx[0]); i++) {
     891       22916 :                                 if (desc_extent->cluster_idx[i] != 0) {
     892       20830 :                                         blob->active.clusters[blob->active.num_clusters++] = bs_cluster_to_lba(blob->bs,
     893       10415 :                                                         desc_extent->cluster_idx[i]);
     894       10415 :                                         blob->active.num_allocated_clusters++;
     895       22916 :                                 } else if (spdk_blob_is_thin_provisioned(blob)) {
     896       12501 :                                         blob->active.clusters[blob->active.num_clusters++] = 0;
     897       12501 :                                 } else {
     898           0 :                                         return -EINVAL;
     899             :                                 }
     900       22916 :                         }
     901        1556 :                         assert(desc_extent->start_cluster_idx + cluster_count == blob->active.num_clusters);
     902        1556 :                         assert(blob->remaining_clusters_in_et >= cluster_count);
     903        1556 :                         blob->remaining_clusters_in_et -= cluster_count;
     904        3148 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
     905             :                         int rc;
     906             : 
     907         952 :                         rc = blob_deserialize_xattr(blob,
     908         476 :                                                     (struct spdk_blob_md_descriptor_xattr *) desc, false);
     909         476 :                         if (rc != 0) {
     910           0 :                                 return rc;
     911             :                         }
     912        1592 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
     913             :                         int rc;
     914             : 
     915        2232 :                         rc = blob_deserialize_xattr(blob,
     916        1116 :                                                     (struct spdk_blob_md_descriptor_xattr *) desc, true);
     917        1116 :                         if (rc != 0) {
     918           0 :                                 return rc;
     919             :                         }
     920        1116 :                 } else {
     921             :                         /* Unrecognized descriptor type.  Do not fail - just continue to the
     922             :                          *  next descriptor.  If this descriptor is associated with some feature
     923             :                          *  defined in a newer version of blobstore, that version of blobstore
     924             :                          *  should create and set an associated feature flag to specify if this
     925             :                          *  blob can be loaded or not.
     926             :                          */
     927             :                 }
     928             : 
     929             :                 /* Advance to the next descriptor */
     930       11459 :                 cur_desc += sizeof(*desc) + desc->length;
     931       11459 :                 if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
     932          50 :                         break;
     933             :                 }
     934       11409 :                 desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
     935             :         }
     936             : 
     937        5970 :         return 0;
     938        5980 : }
     939             : 
     940             : static bool bs_load_cur_extent_page_valid(struct spdk_blob_md_page *page);
     941             : 
     942             : static int
     943        1556 : blob_parse_extent_page(struct spdk_blob_md_page *extent_page, struct spdk_blob *blob)
     944             : {
     945        1556 :         assert(blob != NULL);
     946        1556 :         assert(blob->state == SPDK_BLOB_STATE_LOADING);
     947             : 
     948        1556 :         if (bs_load_cur_extent_page_valid(extent_page) == false) {
     949           0 :                 return -ENOENT;
     950             :         }
     951             : 
     952        1556 :         return blob_parse_page(extent_page, blob);
     953        1556 : }
     954             : 
     955             : static int
     956        4306 : blob_parse(const struct spdk_blob_md_page *pages, uint32_t page_count,
     957             :            struct spdk_blob *blob)
     958             : {
     959             :         const struct spdk_blob_md_page *page;
     960             :         uint32_t i;
     961             :         int rc;
     962             :         void *tmp;
     963             : 
     964        4306 :         assert(page_count > 0);
     965        4306 :         assert(pages[0].sequence_num == 0);
     966        4306 :         assert(blob != NULL);
     967        4306 :         assert(blob->state == SPDK_BLOB_STATE_LOADING);
     968        4306 :         assert(blob->active.clusters == NULL);
     969             : 
     970             :         /* The blobid provided doesn't match what's in the MD, this can
     971             :          * happen for example if a bogus blobid is passed in through open.
     972             :          */
     973        4306 :         if (blob->id != pages[0].id) {
     974           5 :                 SPDK_ERRLOG("Blobid (0x%" PRIx64 ") doesn't match what's in metadata "
     975             :                             "(0x%" PRIx64 ")\n", blob->id, pages[0].id);
     976           5 :                 return -ENOENT;
     977             :         }
     978             : 
     979        4301 :         tmp = realloc(blob->active.pages, page_count * sizeof(*blob->active.pages));
     980        4301 :         if (!tmp) {
     981           0 :                 return -ENOMEM;
     982             :         }
     983        4301 :         blob->active.pages = tmp;
     984             : 
     985        4301 :         blob->active.pages[0] = pages[0].id;
     986             : 
     987        4424 :         for (i = 1; i < page_count; i++) {
     988         123 :                 assert(spdk_bit_array_get(blob->bs->used_md_pages, pages[i - 1].next));
     989         123 :                 blob->active.pages[i] = pages[i - 1].next;
     990         123 :         }
     991        4301 :         blob->active.num_pages = page_count;
     992             : 
     993        8715 :         for (i = 0; i < page_count; i++) {
     994        4424 :                 page = &pages[i];
     995             : 
     996        4424 :                 assert(page->id == blob->id);
     997        4424 :                 assert(page->sequence_num == i);
     998             : 
     999        4424 :                 rc = blob_parse_page(page, blob);
    1000        4424 :                 if (rc != 0) {
    1001          10 :                         return rc;
    1002             :                 }
    1003        4414 :         }
    1004             : 
    1005        4291 :         return 0;
    1006        4306 : }
    1007             : 
    1008             : static int
    1009        5532 : blob_serialize_add_page(const struct spdk_blob *blob,
    1010             :                         struct spdk_blob_md_page **pages,
    1011             :                         uint32_t *page_count,
    1012             :                         struct spdk_blob_md_page **last_page)
    1013             : {
    1014             :         struct spdk_blob_md_page *page, *tmp_pages;
    1015             : 
    1016        5532 :         assert(pages != NULL);
    1017        5532 :         assert(page_count != NULL);
    1018             : 
    1019        5532 :         *last_page = NULL;
    1020        5532 :         if (*page_count == 0) {
    1021        5423 :                 assert(*pages == NULL);
    1022        5423 :                 *pages = spdk_malloc(blob->bs->md_page_size, 0,
    1023             :                                      NULL, SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    1024        5423 :                 if (*pages == NULL) {
    1025           0 :                         return -ENOMEM;
    1026             :                 }
    1027        5423 :                 *page_count = 1;
    1028        5423 :         } else {
    1029         109 :                 assert(*pages != NULL);
    1030         109 :                 tmp_pages = spdk_realloc(*pages, blob->bs->md_page_size * (*page_count + 1), 0);
    1031         109 :                 if (tmp_pages == NULL) {
    1032           0 :                         return -ENOMEM;
    1033             :                 }
    1034         109 :                 (*page_count)++;
    1035         109 :                 *pages = tmp_pages;
    1036             :         }
    1037             : 
    1038        5532 :         page = &(*pages)[*page_count - 1];
    1039        5532 :         memset(page, 0, sizeof(*page));
    1040        5532 :         page->id = blob->id;
    1041        5532 :         page->sequence_num = *page_count - 1;
    1042        5532 :         page->next = SPDK_INVALID_MD_PAGE;
    1043        5532 :         *last_page = page;
    1044             : 
    1045        5532 :         return 0;
    1046        5532 : }
    1047             : 
    1048             : /* Transform the in-memory representation 'xattr' into an on-disk xattr descriptor.
    1049             :  * Update required_sz on both success and failure.
    1050             :  *
    1051             :  */
    1052             : static int
    1053        2160 : blob_serialize_xattr(const struct spdk_xattr *xattr,
    1054             :                      uint8_t *buf, size_t buf_sz,
    1055             :                      size_t *required_sz, bool internal)
    1056             : {
    1057             :         struct spdk_blob_md_descriptor_xattr    *desc;
    1058             : 
    1059        4320 :         *required_sz = sizeof(struct spdk_blob_md_descriptor_xattr) +
    1060        4320 :                        strlen(xattr->name) +
    1061        2160 :                        xattr->value_len;
    1062             : 
    1063        2160 :         if (buf_sz < *required_sz) {
    1064          60 :                 return -1;
    1065             :         }
    1066             : 
    1067        2100 :         desc = (struct spdk_blob_md_descriptor_xattr *)buf;
    1068             : 
    1069        2100 :         desc->type = internal ? SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL : SPDK_MD_DESCRIPTOR_TYPE_XATTR;
    1070        2100 :         desc->length = sizeof(desc->name_length) +
    1071        2100 :                        sizeof(desc->value_length) +
    1072        4200 :                        strlen(xattr->name) +
    1073        2100 :                        xattr->value_len;
    1074        2100 :         desc->name_length = strlen(xattr->name);
    1075        2100 :         desc->value_length = xattr->value_len;
    1076             : 
    1077        2100 :         memcpy(desc->name, xattr->name, desc->name_length);
    1078        4200 :         memcpy((void *)((uintptr_t)desc->name + desc->name_length),
    1079        2100 :                xattr->value,
    1080        2100 :                desc->value_length);
    1081             : 
    1082        2100 :         return 0;
    1083        2160 : }
    1084             : 
    1085             : static void
    1086        2516 : blob_serialize_extent_table_entry(const struct spdk_blob *blob,
    1087             :                                   uint64_t start_ep, uint64_t *next_ep,
    1088             :                                   uint8_t **buf, size_t *remaining_sz)
    1089             : {
    1090             :         struct spdk_blob_md_descriptor_extent_table *desc;
    1091             :         size_t cur_sz;
    1092             :         uint64_t i, et_idx;
    1093             :         uint32_t extent_page, ep_len;
    1094             : 
    1095             :         /* The buffer must have room for at least num_clusters entry */
    1096        2516 :         cur_sz = sizeof(struct spdk_blob_md_descriptor) + sizeof(desc->num_clusters);
    1097        2516 :         if (*remaining_sz < cur_sz) {
    1098          30 :                 *next_ep = start_ep;
    1099          30 :                 return;
    1100             :         }
    1101             : 
    1102        2486 :         desc = (struct spdk_blob_md_descriptor_extent_table *)*buf;
    1103        2486 :         desc->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE;
    1104             : 
    1105        2486 :         desc->num_clusters = blob->active.num_clusters;
    1106             : 
    1107        2486 :         ep_len = 1;
    1108        2486 :         et_idx = 0;
    1109        6343 :         for (i = start_ep; i < blob->active.num_extent_pages; i++) {
    1110        3857 :                 if (*remaining_sz < cur_sz  + sizeof(desc->extent_page[0])) {
    1111             :                         /* If we ran out of buffer space, return */
    1112           0 :                         break;
    1113             :                 }
    1114             : 
    1115        3857 :                 extent_page = blob->active.extent_pages[i];
    1116             :                 /* Verify that next extent_page is unallocated */
    1117        5474 :                 if (extent_page == 0 &&
    1118        2283 :                     (i + 1 < blob->active.num_extent_pages && blob->active.extent_pages[i + 1] == 0)) {
    1119        1617 :                         ep_len++;
    1120        1617 :                         continue;
    1121             :                 }
    1122        2240 :                 desc->extent_page[et_idx].page_idx = extent_page;
    1123        2240 :                 desc->extent_page[et_idx].num_pages = ep_len;
    1124        2240 :                 et_idx++;
    1125             : 
    1126        2240 :                 ep_len = 1;
    1127        2240 :                 cur_sz += sizeof(desc->extent_page[et_idx]);
    1128        2240 :         }
    1129        2486 :         *next_ep = i;
    1130             : 
    1131        2486 :         desc->length = sizeof(desc->num_clusters) + sizeof(desc->extent_page[0]) * et_idx;
    1132        2486 :         *remaining_sz -= sizeof(struct spdk_blob_md_descriptor) + desc->length;
    1133        2486 :         *buf += sizeof(struct spdk_blob_md_descriptor) + desc->length;
    1134        2516 : }
    1135             : 
    1136             : static int
    1137        2489 : blob_serialize_extent_table(const struct spdk_blob *blob,
    1138             :                             struct spdk_blob_md_page **pages,
    1139             :                             struct spdk_blob_md_page *cur_page,
    1140             :                             uint32_t *page_count, uint8_t **buf,
    1141             :                             size_t *remaining_sz)
    1142             : {
    1143             :         uint64_t                                last_extent_page;
    1144             :         int                                     rc;
    1145             : 
    1146        2489 :         last_extent_page = 0;
    1147             :         /* At least single extent table entry has to be always persisted.
    1148             :          * Such case occurs with num_extent_pages == 0. */
    1149        2516 :         while (last_extent_page <= blob->active.num_extent_pages) {
    1150        5032 :                 blob_serialize_extent_table_entry(blob, last_extent_page, &last_extent_page, buf,
    1151        2516 :                                                   remaining_sz);
    1152             : 
    1153        2516 :                 if (last_extent_page == blob->active.num_extent_pages) {
    1154        2489 :                         break;
    1155             :                 }
    1156             : 
    1157          27 :                 rc = blob_serialize_add_page(blob, pages, page_count, &cur_page);
    1158          27 :                 if (rc < 0) {
    1159           0 :                         return rc;
    1160             :                 }
    1161             : 
    1162          27 :                 *buf = (uint8_t *)cur_page->descriptors;
    1163          27 :                 *remaining_sz = sizeof(cur_page->descriptors);
    1164             :         }
    1165             : 
    1166        2489 :         return 0;
    1167        2489 : }
    1168             : 
    1169             : static void
    1170        1751 : blob_serialize_extent_rle(const struct spdk_blob *blob,
    1171             :                           uint64_t start_cluster, uint64_t *next_cluster,
    1172             :                           uint8_t **buf, size_t *buf_sz)
    1173             : {
    1174             :         struct spdk_blob_md_descriptor_extent_rle *desc_extent_rle;
    1175             :         size_t cur_sz;
    1176             :         uint64_t i, extent_idx;
    1177             :         uint64_t lba, lba_per_cluster, lba_count;
    1178             : 
    1179             :         /* The buffer must have room for at least one extent */
    1180        1751 :         cur_sz = sizeof(struct spdk_blob_md_descriptor) + sizeof(desc_extent_rle->extents[0]);
    1181        1751 :         if (*buf_sz < cur_sz) {
    1182          18 :                 *next_cluster = start_cluster;
    1183          18 :                 return;
    1184             :         }
    1185             : 
    1186        1733 :         desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)*buf;
    1187        1733 :         desc_extent_rle->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE;
    1188             : 
    1189        1733 :         lba_per_cluster = bs_cluster_to_lba(blob->bs, 1);
    1190             :         /* Assert for scan-build false positive */
    1191        1733 :         assert(lba_per_cluster > 0);
    1192             : 
    1193        1733 :         lba = blob->active.clusters[start_cluster];
    1194        1733 :         lba_count = lba_per_cluster;
    1195        1733 :         extent_idx = 0;
    1196      810464 :         for (i = start_cluster + 1; i < blob->active.num_clusters; i++) {
    1197      808735 :                 if ((lba + lba_count) == blob->active.clusters[i] && lba != 0) {
    1198             :                         /* Run-length encode sequential non-zero LBA */
    1199        7276 :                         lba_count += lba_per_cluster;
    1200        7276 :                         continue;
    1201      801459 :                 } else if (lba == 0 && blob->active.clusters[i] == 0) {
    1202             :                         /* Run-length encode unallocated clusters */
    1203      800266 :                         lba_count += lba_per_cluster;
    1204      800266 :                         continue;
    1205             :                 }
    1206        1193 :                 desc_extent_rle->extents[extent_idx].cluster_idx = lba / lba_per_cluster;
    1207        1193 :                 desc_extent_rle->extents[extent_idx].length = lba_count / lba_per_cluster;
    1208        1193 :                 extent_idx++;
    1209             : 
    1210        1193 :                 cur_sz += sizeof(desc_extent_rle->extents[extent_idx]);
    1211             : 
    1212        1193 :                 if (*buf_sz < cur_sz) {
    1213             :                         /* If we ran out of buffer space, return */
    1214           4 :                         *next_cluster = i;
    1215           4 :                         break;
    1216             :                 }
    1217             : 
    1218        1189 :                 lba = blob->active.clusters[i];
    1219        1189 :                 lba_count = lba_per_cluster;
    1220        1189 :         }
    1221             : 
    1222        1733 :         if (*buf_sz >= cur_sz) {
    1223        1729 :                 desc_extent_rle->extents[extent_idx].cluster_idx = lba / lba_per_cluster;
    1224        1729 :                 desc_extent_rle->extents[extent_idx].length = lba_count / lba_per_cluster;
    1225        1729 :                 extent_idx++;
    1226             : 
    1227        1729 :                 *next_cluster = blob->active.num_clusters;
    1228        1729 :         }
    1229             : 
    1230        1733 :         desc_extent_rle->length = sizeof(desc_extent_rle->extents[0]) * extent_idx;
    1231        1733 :         *buf_sz -= sizeof(struct spdk_blob_md_descriptor) + desc_extent_rle->length;
    1232        1733 :         *buf += sizeof(struct spdk_blob_md_descriptor) + desc_extent_rle->length;
    1233        1751 : }
    1234             : 
    1235             : static int
    1236        1943 : blob_serialize_extents_rle(const struct spdk_blob *blob,
    1237             :                            struct spdk_blob_md_page **pages,
    1238             :                            struct spdk_blob_md_page *cur_page,
    1239             :                            uint32_t *page_count, uint8_t **buf,
    1240             :                            size_t *remaining_sz)
    1241             : {
    1242             :         uint64_t                                last_cluster;
    1243             :         int                                     rc;
    1244             : 
    1245        1943 :         last_cluster = 0;
    1246        1965 :         while (last_cluster < blob->active.num_clusters) {
    1247        1751 :                 blob_serialize_extent_rle(blob, last_cluster, &last_cluster, buf, remaining_sz);
    1248             : 
    1249        1751 :                 if (last_cluster == blob->active.num_clusters) {
    1250        1729 :                         break;
    1251             :                 }
    1252             : 
    1253          22 :                 rc = blob_serialize_add_page(blob, pages, page_count, &cur_page);
    1254          22 :                 if (rc < 0) {
    1255           0 :                         return rc;
    1256             :                 }
    1257             : 
    1258          22 :                 *buf = (uint8_t *)cur_page->descriptors;
    1259          22 :                 *remaining_sz = sizeof(cur_page->descriptors);
    1260             :         }
    1261             : 
    1262        1943 :         return 0;
    1263        1943 : }
    1264             : 
    1265             : static void
    1266        1648 : blob_serialize_extent_page(const struct spdk_blob *blob,
    1267             :                            uint64_t cluster, struct spdk_blob_md_page *page)
    1268             : {
    1269             :         struct spdk_blob_md_descriptor_extent_page *desc_extent;
    1270             :         uint64_t i, extent_idx;
    1271             :         uint64_t lba, lba_per_cluster;
    1272        1648 :         uint64_t start_cluster_idx = (cluster / SPDK_EXTENTS_PER_EP) * SPDK_EXTENTS_PER_EP;
    1273             : 
    1274        1648 :         desc_extent = (struct spdk_blob_md_descriptor_extent_page *) page->descriptors;
    1275        1648 :         desc_extent->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE;
    1276             : 
    1277        1648 :         lba_per_cluster = bs_cluster_to_lba(blob->bs, 1);
    1278             : 
    1279        1648 :         desc_extent->start_cluster_idx = start_cluster_idx;
    1280        1648 :         extent_idx = 0;
    1281       63582 :         for (i = start_cluster_idx; i < blob->active.num_clusters; i++) {
    1282       62033 :                 lba = blob->active.clusters[i];
    1283       62033 :                 desc_extent->cluster_idx[extent_idx++] = lba / lba_per_cluster;
    1284       62033 :                 if (extent_idx >= SPDK_EXTENTS_PER_EP) {
    1285          99 :                         break;
    1286             :                 }
    1287       61934 :         }
    1288        1648 :         desc_extent->length = sizeof(desc_extent->start_cluster_idx) +
    1289        1648 :                               sizeof(desc_extent->cluster_idx[0]) * extent_idx;
    1290        1648 : }
    1291             : 
    1292             : static void
    1293        4432 : blob_serialize_flags(const struct spdk_blob *blob,
    1294             :                      uint8_t *buf, size_t *buf_sz)
    1295             : {
    1296             :         struct spdk_blob_md_descriptor_flags *desc;
    1297             : 
    1298             :         /*
    1299             :          * Flags get serialized first, so we should always have room for the flags
    1300             :          *  descriptor.
    1301             :          */
    1302        4432 :         assert(*buf_sz >= sizeof(*desc));
    1303             : 
    1304        4432 :         desc = (struct spdk_blob_md_descriptor_flags *)buf;
    1305        4432 :         desc->type = SPDK_MD_DESCRIPTOR_TYPE_FLAGS;
    1306        4432 :         desc->length = sizeof(*desc) - sizeof(struct spdk_blob_md_descriptor);
    1307        4432 :         desc->invalid_flags = blob->invalid_flags;
    1308        4432 :         desc->data_ro_flags = blob->data_ro_flags;
    1309        4432 :         desc->md_ro_flags = blob->md_ro_flags;
    1310             : 
    1311        4432 :         *buf_sz -= sizeof(*desc);
    1312        4432 : }
    1313             : 
    1314             : static int
    1315        8864 : blob_serialize_xattrs(const struct spdk_blob *blob,
    1316             :                       const struct spdk_xattr_tailq *xattrs, bool internal,
    1317             :                       struct spdk_blob_md_page **pages,
    1318             :                       struct spdk_blob_md_page *cur_page,
    1319             :                       uint32_t *page_count, uint8_t **buf,
    1320             :                       size_t *remaining_sz)
    1321             : {
    1322             :         const struct spdk_xattr *xattr;
    1323             :         int     rc;
    1324             : 
    1325       10964 :         TAILQ_FOREACH(xattr, xattrs, link) {
    1326        2100 :                 size_t required_sz = 0;
    1327             : 
    1328        4200 :                 rc = blob_serialize_xattr(xattr,
    1329        2100 :                                           *buf, *remaining_sz,
    1330        2100 :                                           &required_sz, internal);
    1331        2100 :                 if (rc < 0) {
    1332             :                         /* Need to add a new page to the chain */
    1333          60 :                         rc = blob_serialize_add_page(blob, pages, page_count,
    1334             :                                                      &cur_page);
    1335          60 :                         if (rc < 0) {
    1336           0 :                                 spdk_free(*pages);
    1337           0 :                                 *pages = NULL;
    1338           0 :                                 *page_count = 0;
    1339           0 :                                 return rc;
    1340             :                         }
    1341             : 
    1342          60 :                         *buf = (uint8_t *)cur_page->descriptors;
    1343          60 :                         *remaining_sz = sizeof(cur_page->descriptors);
    1344             : 
    1345             :                         /* Try again */
    1346          60 :                         required_sz = 0;
    1347         120 :                         rc = blob_serialize_xattr(xattr,
    1348          60 :                                                   *buf, *remaining_sz,
    1349          60 :                                                   &required_sz, internal);
    1350             : 
    1351          60 :                         if (rc < 0) {
    1352           0 :                                 spdk_free(*pages);
    1353           0 :                                 *pages = NULL;
    1354           0 :                                 *page_count = 0;
    1355           0 :                                 return rc;
    1356             :                         }
    1357          60 :                 }
    1358             : 
    1359        2100 :                 *remaining_sz -= required_sz;
    1360        2100 :                 *buf += required_sz;
    1361        2100 :         }
    1362             : 
    1363        8864 :         return 0;
    1364        8864 : }
    1365             : 
    1366             : static int
    1367        4432 : blob_serialize(const struct spdk_blob *blob, struct spdk_blob_md_page **pages,
    1368             :                uint32_t *page_count)
    1369             : {
    1370             :         struct spdk_blob_md_page                *cur_page;
    1371             :         int                                     rc;
    1372             :         uint8_t                                 *buf;
    1373             :         size_t                                  remaining_sz;
    1374             : 
    1375        4432 :         assert(pages != NULL);
    1376        4432 :         assert(page_count != NULL);
    1377        4432 :         assert(blob != NULL);
    1378        4432 :         assert(blob->state == SPDK_BLOB_STATE_DIRTY);
    1379             : 
    1380        4432 :         *pages = NULL;
    1381        4432 :         *page_count = 0;
    1382             : 
    1383             :         /* A blob always has at least 1 page, even if it has no descriptors */
    1384        4432 :         rc = blob_serialize_add_page(blob, pages, page_count, &cur_page);
    1385        4432 :         if (rc < 0) {
    1386           0 :                 return rc;
    1387             :         }
    1388             : 
    1389        4432 :         buf = (uint8_t *)cur_page->descriptors;
    1390        4432 :         remaining_sz = sizeof(cur_page->descriptors);
    1391             : 
    1392             :         /* Serialize flags */
    1393        4432 :         blob_serialize_flags(blob, buf, &remaining_sz);
    1394        4432 :         buf += sizeof(struct spdk_blob_md_descriptor_flags);
    1395             : 
    1396             :         /* Serialize xattrs */
    1397        8864 :         rc = blob_serialize_xattrs(blob, &blob->xattrs, false,
    1398        4432 :                                    pages, cur_page, page_count, &buf, &remaining_sz);
    1399        4432 :         if (rc < 0) {
    1400           0 :                 return rc;
    1401             :         }
    1402             : 
    1403             :         /* Serialize internal xattrs */
    1404        8864 :         rc = blob_serialize_xattrs(blob, &blob->xattrs_internal, true,
    1405        4432 :                                    pages, cur_page, page_count, &buf, &remaining_sz);
    1406        4432 :         if (rc < 0) {
    1407           0 :                 return rc;
    1408             :         }
    1409             : 
    1410        4432 :         if (blob->use_extent_table) {
    1411             :                 /* Serialize extent table */
    1412        2489 :                 rc = blob_serialize_extent_table(blob, pages, cur_page, page_count, &buf, &remaining_sz);
    1413        2489 :         } else {
    1414             :                 /* Serialize extents */
    1415        1943 :                 rc = blob_serialize_extents_rle(blob, pages, cur_page, page_count, &buf, &remaining_sz);
    1416             :         }
    1417             : 
    1418        4432 :         return rc;
    1419        4432 : }
    1420             : 
    1421             : struct spdk_blob_load_ctx {
    1422             :         struct spdk_blob                *blob;
    1423             : 
    1424             :         struct spdk_blob_md_page        *pages;
    1425             :         uint32_t                        num_pages;
    1426             :         uint32_t                        next_extent_page;
    1427             :         spdk_bs_sequence_t              *seq;
    1428             : 
    1429             :         spdk_bs_sequence_cpl            cb_fn;
    1430             :         void                            *cb_arg;
    1431             : };
    1432             : 
    1433             : static uint32_t
    1434       25754 : blob_md_page_calc_crc(void *page)
    1435             : {
    1436             :         uint32_t                crc;
    1437             : 
    1438       25754 :         crc = BLOB_CRC32C_INITIAL;
    1439       25754 :         crc = spdk_crc32c_update(page, SPDK_BS_PAGE_SIZE - 4, crc);
    1440       25754 :         crc ^= BLOB_CRC32C_INITIAL;
    1441             : 
    1442       25754 :         return crc;
    1443             : 
    1444             : }
    1445             : 
    1446             : static void
    1447        4341 : blob_load_final(struct spdk_blob_load_ctx *ctx, int bserrno)
    1448             : {
    1449        4341 :         struct spdk_blob                *blob = ctx->blob;
    1450             : 
    1451        4341 :         if (bserrno == 0) {
    1452        4259 :                 blob_mark_clean(blob);
    1453        4259 :         }
    1454             : 
    1455        4341 :         ctx->cb_fn(ctx->seq, ctx->cb_arg, bserrno);
    1456             : 
    1457             :         /* Free the memory */
    1458        4341 :         spdk_free(ctx->pages);
    1459        4341 :         free(ctx);
    1460        4341 : }
    1461             : 
    1462             : static void
    1463         575 : blob_load_snapshot_cpl(void *cb_arg, struct spdk_blob *snapshot, int bserrno)
    1464             : {
    1465         575 :         struct spdk_blob_load_ctx       *ctx = cb_arg;
    1466         575 :         struct spdk_blob                *blob = ctx->blob;
    1467             : 
    1468         575 :         if (bserrno == 0) {
    1469         567 :                 blob->back_bs_dev = bs_create_blob_bs_dev(snapshot);
    1470         567 :                 if (blob->back_bs_dev == NULL) {
    1471           0 :                         bserrno = -ENOMEM;
    1472           0 :                 }
    1473         567 :         }
    1474         575 :         if (bserrno != 0) {
    1475           8 :                 SPDK_ERRLOG("Snapshot fail\n");
    1476           8 :         }
    1477             : 
    1478         575 :         blob_load_final(ctx, bserrno);
    1479         575 : }
    1480             : 
    1481             : static void blob_update_clear_method(struct spdk_blob *blob);
    1482             : 
    1483             : static int
    1484         150 : blob_load_esnap(struct spdk_blob *blob, void *blob_ctx)
    1485             : {
    1486         150 :         struct spdk_blob_store *bs = blob->bs;
    1487         150 :         struct spdk_bs_dev *bs_dev = NULL;
    1488         150 :         const void *esnap_id = NULL;
    1489         150 :         size_t id_len = 0;
    1490             :         int rc;
    1491             : 
    1492         150 :         if (bs->esnap_bs_dev_create == NULL) {
    1493          10 :                 SPDK_NOTICELOG("blob 0x%" PRIx64 " is an esnap clone but the blobstore was opened "
    1494             :                                "without support for esnap clones\n", blob->id);
    1495          10 :                 return -ENOTSUP;
    1496             :         }
    1497         140 :         assert(blob->back_bs_dev == NULL);
    1498             : 
    1499         140 :         rc = blob_get_xattr_value(blob, BLOB_EXTERNAL_SNAPSHOT_ID, &esnap_id, &id_len, true);
    1500         140 :         if (rc != 0) {
    1501           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " is an esnap clone but has no esnap ID\n", blob->id);
    1502           0 :                 return -EINVAL;
    1503             :         }
    1504         140 :         assert(id_len > 0 && id_len < UINT32_MAX);
    1505             : 
    1506         140 :         SPDK_INFOLOG(blob, "Creating external snapshot device\n");
    1507             : 
    1508         140 :         rc = bs->esnap_bs_dev_create(bs->esnap_ctx, blob_ctx, blob, esnap_id, (uint32_t)id_len,
    1509             :                                      &bs_dev);
    1510         140 :         if (rc != 0) {
    1511           0 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": failed to load back_bs_dev "
    1512             :                               "with error %d\n", blob->id, rc);
    1513           0 :                 return rc;
    1514             :         }
    1515             : 
    1516             :         /*
    1517             :          * Note: bs_dev might be NULL if the consumer chose to not open the external snapshot.
    1518             :          * This especially might happen during spdk_bs_load() iteration.
    1519             :          */
    1520         140 :         if (bs_dev != NULL) {
    1521         140 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": loaded back_bs_dev\n", blob->id);
    1522         140 :                 if ((bs->io_unit_size % bs_dev->blocklen) != 0) {
    1523           5 :                         SPDK_NOTICELOG("blob 0x%" PRIx64 " external snapshot device block size %u "
    1524             :                                        "is not compatible with blobstore block size %u\n",
    1525             :                                        blob->id, bs_dev->blocklen, bs->io_unit_size);
    1526           5 :                         bs_dev->destroy(bs_dev);
    1527           5 :                         return -EINVAL;
    1528             :                 }
    1529         135 :         }
    1530             : 
    1531         135 :         blob->back_bs_dev = bs_dev;
    1532         135 :         blob->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
    1533             : 
    1534         135 :         return 0;
    1535         150 : }
    1536             : 
    1537             : static void
    1538        4282 : blob_load_backing_dev(spdk_bs_sequence_t *seq, void *cb_arg)
    1539             : {
    1540        4282 :         struct spdk_blob_load_ctx       *ctx = cb_arg;
    1541        4282 :         struct spdk_blob                *blob = ctx->blob;
    1542             :         const void                      *value;
    1543             :         size_t                          len;
    1544             :         int                             rc;
    1545             : 
    1546        4282 :         if (blob_is_esnap_clone(blob)) {
    1547         150 :                 rc = blob_load_esnap(blob, seq->cpl.u.blob_handle.esnap_ctx);
    1548         150 :                 blob_load_final(ctx, rc);
    1549         150 :                 return;
    1550             :         }
    1551             : 
    1552        4132 :         if (spdk_blob_is_thin_provisioned(blob)) {
    1553        1301 :                 rc = blob_get_xattr_value(blob, BLOB_SNAPSHOT, &value, &len, true);
    1554        1301 :                 if (rc == 0) {
    1555         575 :                         if (len != sizeof(spdk_blob_id)) {
    1556           0 :                                 blob_load_final(ctx, -EINVAL);
    1557           0 :                                 return;
    1558             :                         }
    1559             :                         /* open snapshot blob and continue in the callback function */
    1560         575 :                         blob->parent_id = *(spdk_blob_id *)value;
    1561        1150 :                         spdk_bs_open_blob(blob->bs, blob->parent_id,
    1562         575 :                                           blob_load_snapshot_cpl, ctx);
    1563         575 :                         return;
    1564             :                 } else {
    1565             :                         /* add zeroes_dev for thin provisioned blob */
    1566         726 :                         blob->back_bs_dev = bs_create_zeroes_dev();
    1567             :                 }
    1568         726 :         } else {
    1569             :                 /* standard blob */
    1570        2831 :                 blob->back_bs_dev = NULL;
    1571             :         }
    1572        3557 :         blob_load_final(ctx, 0);
    1573        4282 : }
    1574             : 
    1575             : static void
    1576        4189 : blob_load_cpl_extents_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    1577             : {
    1578        4189 :         struct spdk_blob_load_ctx       *ctx = cb_arg;
    1579        4189 :         struct spdk_blob                *blob = ctx->blob;
    1580             :         struct spdk_blob_md_page        *page;
    1581             :         uint64_t                        i;
    1582             :         uint32_t                        crc;
    1583             :         uint64_t                        lba;
    1584             :         void                            *tmp;
    1585             :         uint64_t                        sz;
    1586             : 
    1587        4189 :         if (bserrno) {
    1588           9 :                 SPDK_ERRLOG("Extent page read failed: %d\n", bserrno);
    1589           9 :                 blob_load_final(ctx, bserrno);
    1590           9 :                 return;
    1591             :         }
    1592             : 
    1593        4180 :         if (ctx->pages == NULL) {
    1594             :                 /* First iteration of this function, allocate buffer for single EXTENT_PAGE */
    1595        2624 :                 ctx->pages = spdk_zmalloc(blob->bs->md_page_size, 0,
    1596             :                                           NULL, SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    1597        2624 :                 if (!ctx->pages) {
    1598           0 :                         blob_load_final(ctx, -ENOMEM);
    1599           0 :                         return;
    1600             :                 }
    1601        2624 :                 ctx->num_pages = 1;
    1602        2624 :                 ctx->next_extent_page = 0;
    1603        2624 :         } else {
    1604        1556 :                 page = &ctx->pages[0];
    1605        1556 :                 crc = blob_md_page_calc_crc(page);
    1606        1556 :                 if (crc != page->crc) {
    1607           0 :                         blob_load_final(ctx, -EINVAL);
    1608           0 :                         return;
    1609             :                 }
    1610             : 
    1611        1556 :                 if (page->next != SPDK_INVALID_MD_PAGE) {
    1612           0 :                         blob_load_final(ctx, -EINVAL);
    1613           0 :                         return;
    1614             :                 }
    1615             : 
    1616        1556 :                 bserrno = blob_parse_extent_page(page, blob);
    1617        1556 :                 if (bserrno) {
    1618           0 :                         blob_load_final(ctx, bserrno);
    1619           0 :                         return;
    1620             :                 }
    1621             :         }
    1622             : 
    1623        4816 :         for (i = ctx->next_extent_page; i < blob->active.num_extent_pages; i++) {
    1624        2201 :                 if (blob->active.extent_pages[i] != 0) {
    1625             :                         /* Extent page was allocated, read and parse it. */
    1626        1565 :                         lba = bs_md_page_to_lba(blob->bs, blob->active.extent_pages[i]);
    1627        1565 :                         ctx->next_extent_page = i + 1;
    1628             : 
    1629        3130 :                         bs_sequence_read_dev(seq, &ctx->pages[0], lba,
    1630        1565 :                                              bs_byte_to_lba(blob->bs, blob->bs->md_page_size),
    1631        1565 :                                              blob_load_cpl_extents_cpl, ctx);
    1632        1565 :                         return;
    1633             :                 } else {
    1634             :                         /* Thin provisioned blobs can point to unallocated extent pages.
    1635             :                          * In this case blob size should be increased by up to the amount left in remaining_clusters_in_et. */
    1636             : 
    1637         636 :                         sz = spdk_min(blob->remaining_clusters_in_et, SPDK_EXTENTS_PER_EP);
    1638         636 :                         blob->active.num_clusters += sz;
    1639         636 :                         blob->remaining_clusters_in_et -= sz;
    1640             : 
    1641         636 :                         assert(spdk_blob_is_thin_provisioned(blob));
    1642         636 :                         assert(i + 1 < blob->active.num_extent_pages || blob->remaining_clusters_in_et == 0);
    1643             : 
    1644         636 :                         tmp = realloc(blob->active.clusters, blob->active.num_clusters * sizeof(*blob->active.clusters));
    1645         636 :                         if (tmp == NULL) {
    1646           0 :                                 blob_load_final(ctx, -ENOMEM);
    1647           0 :                                 return;
    1648             :                         }
    1649        1272 :                         memset(tmp + sizeof(*blob->active.clusters) * blob->active.cluster_array_size, 0,
    1650         636 :                                sizeof(*blob->active.clusters) * (blob->active.num_clusters - blob->active.cluster_array_size));
    1651         636 :                         blob->active.clusters = tmp;
    1652         636 :                         blob->active.cluster_array_size = blob->active.num_clusters;
    1653             :                 }
    1654         636 :         }
    1655             : 
    1656        2615 :         blob_load_backing_dev(seq, ctx);
    1657        4189 : }
    1658             : 
    1659             : static void
    1660        4464 : blob_load_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    1661             : {
    1662        4464 :         struct spdk_blob_load_ctx       *ctx = cb_arg;
    1663        4464 :         struct spdk_blob                *blob = ctx->blob;
    1664             :         struct spdk_blob_md_page        *page;
    1665             :         int                             rc;
    1666             :         uint32_t                        crc;
    1667             :         uint32_t                        current_page;
    1668             : 
    1669        4464 :         if (ctx->num_pages == 1) {
    1670        4341 :                 current_page = bs_blobid_to_page(blob->id);
    1671        4341 :         } else {
    1672         123 :                 assert(ctx->num_pages != 0);
    1673         123 :                 page = &ctx->pages[ctx->num_pages - 2];
    1674         123 :                 current_page = page->next;
    1675             :         }
    1676             : 
    1677        4464 :         if (bserrno) {
    1678          25 :                 SPDK_ERRLOG("Metadata page %d read failed for blobid 0x%" PRIx64 ": %d\n",
    1679             :                             current_page, blob->id, bserrno);
    1680          25 :                 blob_load_final(ctx, bserrno);
    1681          25 :                 return;
    1682             :         }
    1683             : 
    1684        4439 :         page = &ctx->pages[ctx->num_pages - 1];
    1685        4439 :         crc = blob_md_page_calc_crc(page);
    1686        4439 :         if (crc != page->crc) {
    1687          10 :                 SPDK_ERRLOG("Metadata page %d crc mismatch for blobid 0x%" PRIx64 "\n",
    1688             :                             current_page, blob->id);
    1689          10 :                 blob_load_final(ctx, -EINVAL);
    1690          10 :                 return;
    1691             :         }
    1692             : 
    1693        4429 :         if (page->next != SPDK_INVALID_MD_PAGE) {
    1694             :                 struct spdk_blob_md_page *tmp_pages;
    1695         123 :                 uint32_t next_page = page->next;
    1696         123 :                 uint64_t next_lba = bs_md_page_to_lba(blob->bs, next_page);
    1697             : 
    1698             :                 /* Read the next page */
    1699         123 :                 tmp_pages = spdk_realloc(ctx->pages, (sizeof(*page) * (ctx->num_pages + 1)), 0);
    1700         123 :                 if (tmp_pages == NULL) {
    1701           0 :                         blob_load_final(ctx, -ENOMEM);
    1702           0 :                         return;
    1703             :                 }
    1704         123 :                 ctx->num_pages++;
    1705         123 :                 ctx->pages = tmp_pages;
    1706             : 
    1707         246 :                 bs_sequence_read_dev(seq, &ctx->pages[ctx->num_pages - 1],
    1708         123 :                                      next_lba,
    1709         123 :                                      bs_byte_to_lba(blob->bs, sizeof(*page)),
    1710         123 :                                      blob_load_cpl, ctx);
    1711         123 :                 return;
    1712             :         }
    1713             : 
    1714             :         /* Parse the pages */
    1715        4306 :         rc = blob_parse(ctx->pages, ctx->num_pages, blob);
    1716        4306 :         if (rc) {
    1717          15 :                 blob_load_final(ctx, rc);
    1718          15 :                 return;
    1719             :         }
    1720             : 
    1721        4291 :         if (blob->extent_table_found == true) {
    1722             :                 /* If EXTENT_TABLE was found, that means support for it should be enabled. */
    1723        2624 :                 assert(blob->extent_rle_found == false);
    1724        2624 :                 blob->use_extent_table = true;
    1725        2624 :         } else {
    1726             :                 /* If EXTENT_RLE or no extent_* descriptor was found disable support
    1727             :                  * for extent table. No extent_* descriptors means that blob has length of 0
    1728             :                  * and no extent_rle descriptors were persisted for it.
    1729             :                  * EXTENT_TABLE if used, is always present in metadata regardless of length. */
    1730        1667 :                 blob->use_extent_table = false;
    1731             :         }
    1732             : 
    1733             :         /* Check the clear_method stored in metadata vs what may have been passed
    1734             :          * via spdk_bs_open_blob_ext() and update accordingly.
    1735             :          */
    1736        4291 :         blob_update_clear_method(blob);
    1737             : 
    1738        4291 :         spdk_free(ctx->pages);
    1739        4291 :         ctx->pages = NULL;
    1740             : 
    1741        4291 :         if (blob->extent_table_found) {
    1742        2624 :                 blob_load_cpl_extents_cpl(seq, ctx, 0);
    1743        2624 :         } else {
    1744        1667 :                 blob_load_backing_dev(seq, ctx);
    1745             :         }
    1746        4464 : }
    1747             : 
    1748             : /* Load a blob from disk given a blobid */
    1749             : static void
    1750        4341 : blob_load(spdk_bs_sequence_t *seq, struct spdk_blob *blob,
    1751             :           spdk_bs_sequence_cpl cb_fn, void *cb_arg)
    1752             : {
    1753             :         struct spdk_blob_load_ctx *ctx;
    1754             :         struct spdk_blob_store *bs;
    1755             :         uint32_t page_num;
    1756             :         uint64_t lba;
    1757             : 
    1758        4341 :         blob_verify_md_op(blob);
    1759             : 
    1760        4341 :         bs = blob->bs;
    1761             : 
    1762        4341 :         ctx = calloc(1, sizeof(*ctx));
    1763        4341 :         if (!ctx) {
    1764           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    1765           0 :                 return;
    1766             :         }
    1767             : 
    1768        4341 :         ctx->blob = blob;
    1769        4341 :         ctx->pages = spdk_realloc(ctx->pages, bs->md_page_size, 0);
    1770        4341 :         if (!ctx->pages) {
    1771           0 :                 free(ctx);
    1772           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    1773           0 :                 return;
    1774             :         }
    1775        4341 :         ctx->num_pages = 1;
    1776        4341 :         ctx->cb_fn = cb_fn;
    1777        4341 :         ctx->cb_arg = cb_arg;
    1778        4341 :         ctx->seq = seq;
    1779             : 
    1780        4341 :         page_num = bs_blobid_to_page(blob->id);
    1781        4341 :         lba = bs_md_page_to_lba(blob->bs, page_num);
    1782             : 
    1783        4341 :         blob->state = SPDK_BLOB_STATE_LOADING;
    1784             : 
    1785        8682 :         bs_sequence_read_dev(seq, &ctx->pages[0], lba,
    1786        4341 :                              bs_byte_to_lba(bs, bs->md_page_size),
    1787        4341 :                              blob_load_cpl, ctx);
    1788        4341 : }
    1789             : 
    1790             : struct spdk_blob_persist_ctx {
    1791             :         struct spdk_blob                *blob;
    1792             : 
    1793             :         struct spdk_blob_md_page        *pages;
    1794             :         uint32_t                        next_extent_page;
    1795             :         struct spdk_blob_md_page        *extent_page;
    1796             : 
    1797             :         spdk_bs_sequence_t              *seq;
    1798             :         spdk_bs_sequence_cpl            cb_fn;
    1799             :         void                            *cb_arg;
    1800             :         TAILQ_ENTRY(spdk_blob_persist_ctx) link;
    1801             : };
    1802             : 
    1803             : static void
    1804        1584 : bs_batch_clear_dev(struct spdk_blob *blob, spdk_bs_batch_t *batch, uint64_t lba,
    1805             :                    uint64_t lba_count)
    1806             : {
    1807        1584 :         switch (blob->clear_method) {
    1808             :         case BLOB_CLEAR_WITH_DEFAULT:
    1809             :         case BLOB_CLEAR_WITH_UNMAP:
    1810        1584 :                 bs_batch_unmap_dev(batch, lba, lba_count);
    1811        1584 :                 break;
    1812             :         case BLOB_CLEAR_WITH_WRITE_ZEROES:
    1813           0 :                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    1814           0 :                 break;
    1815           0 :         case BLOB_CLEAR_WITH_NONE:
    1816             :         default:
    1817           0 :                 break;
    1818             :         }
    1819        1584 : }
    1820             : 
    1821             : static int
    1822        1462 : bs_super_validate(struct spdk_bs_super_block *super, struct spdk_blob_store *bs)
    1823             : {
    1824             :         uint32_t        crc;
    1825             :         static const char zeros[SPDK_BLOBSTORE_TYPE_LENGTH];
    1826             : 
    1827        1462 :         if (super->version > SPDK_BS_VERSION ||
    1828        1457 :             super->version < SPDK_BS_INITIAL_VERSION) {
    1829          10 :                 return -EILSEQ;
    1830             :         }
    1831             : 
    1832        2904 :         if (memcmp(super->signature, SPDK_BS_SUPER_BLOCK_SIG,
    1833        1452 :                    sizeof(super->signature)) != 0) {
    1834           0 :                 return -EILSEQ;
    1835             :         }
    1836             : 
    1837        1452 :         crc = blob_md_page_calc_crc(super);
    1838        1452 :         if (crc != super->crc) {
    1839           5 :                 return -EILSEQ;
    1840             :         }
    1841             : 
    1842        1447 :         if (memcmp(&bs->bstype, &super->bstype, SPDK_BLOBSTORE_TYPE_LENGTH) == 0) {
    1843        1430 :                 SPDK_DEBUGLOG(blob, "Bstype matched - loading blobstore\n");
    1844        1447 :         } else if (memcmp(&bs->bstype, zeros, SPDK_BLOBSTORE_TYPE_LENGTH) == 0) {
    1845           7 :                 SPDK_DEBUGLOG(blob, "Bstype wildcard used - loading blobstore regardless bstype\n");
    1846           7 :         } else {
    1847          10 :                 SPDK_DEBUGLOG(blob, "Unexpected bstype\n");
    1848          10 :                 SPDK_LOGDUMP(blob, "Expected:", bs->bstype.bstype, SPDK_BLOBSTORE_TYPE_LENGTH);
    1849          10 :                 SPDK_LOGDUMP(blob, "Found:", super->bstype.bstype, SPDK_BLOBSTORE_TYPE_LENGTH);
    1850          10 :                 return -ENXIO;
    1851             :         }
    1852             : 
    1853        1437 :         if (super->size > bs->dev->blockcnt * bs->dev->blocklen) {
    1854          10 :                 SPDK_NOTICELOG("Size mismatch, dev size: %" PRIu64 ", blobstore size: %" PRIu64 "\n",
    1855             :                                bs->dev->blockcnt * bs->dev->blocklen, super->size);
    1856          10 :                 return -EILSEQ;
    1857             :         }
    1858             : 
    1859        1427 :         return 0;
    1860        1462 : }
    1861             : 
    1862             : static void bs_mark_dirty(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
    1863             :                           spdk_bs_sequence_cpl cb_fn, void *cb_arg);
    1864             : 
    1865             : static void
    1866        6304 : blob_persist_complete_cb(void *arg)
    1867             : {
    1868        6304 :         struct spdk_blob_persist_ctx *ctx = arg;
    1869             : 
    1870             :         /* Call user callback */
    1871        6304 :         ctx->cb_fn(ctx->seq, ctx->cb_arg, 0);
    1872             : 
    1873             :         /* Free the memory */
    1874        6304 :         spdk_free(ctx->pages);
    1875        6304 :         free(ctx);
    1876        6304 : }
    1877             : 
    1878             : static void blob_persist_start(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno);
    1879             : 
    1880             : static void
    1881        6304 : blob_persist_complete(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx, int bserrno)
    1882             : {
    1883             :         struct spdk_blob_persist_ctx    *next_persist, *tmp;
    1884        6304 :         struct spdk_blob                *blob = ctx->blob;
    1885             : 
    1886        6304 :         if (bserrno == 0) {
    1887        6239 :                 blob_mark_clean(blob);
    1888        6239 :         }
    1889             : 
    1890        6304 :         assert(ctx == TAILQ_FIRST(&blob->persists_to_complete));
    1891             : 
    1892             :         /* Complete all persists that were pending when the current persist started */
    1893       12608 :         TAILQ_FOREACH_SAFE(next_persist, &blob->persists_to_complete, link, tmp) {
    1894        6304 :                 TAILQ_REMOVE(&blob->persists_to_complete, next_persist, link);
    1895        6304 :                 spdk_thread_send_msg(spdk_get_thread(), blob_persist_complete_cb, next_persist);
    1896        6304 :         }
    1897             : 
    1898        6304 :         if (TAILQ_EMPTY(&blob->pending_persists)) {
    1899        6276 :                 return;
    1900             :         }
    1901             : 
    1902             :         /* Queue up all pending persists for completion and start blob persist with first one */
    1903          28 :         TAILQ_SWAP(&blob->persists_to_complete, &blob->pending_persists, spdk_blob_persist_ctx, link);
    1904          28 :         next_persist = TAILQ_FIRST(&blob->persists_to_complete);
    1905             : 
    1906          28 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    1907          28 :         bs_mark_dirty(seq, blob->bs, blob_persist_start, next_persist);
    1908        6304 : }
    1909             : 
    1910             : static void
    1911        6239 : blob_persist_clear_extents_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    1912             : {
    1913        6239 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    1914        6239 :         struct spdk_blob                *blob = ctx->blob;
    1915        6239 :         struct spdk_blob_store          *bs = blob->bs;
    1916             :         size_t                          i;
    1917             : 
    1918        6239 :         if (bserrno != 0) {
    1919           0 :                 blob_persist_complete(seq, ctx, bserrno);
    1920           0 :                 return;
    1921             :         }
    1922             : 
    1923        6239 :         spdk_spin_lock(&bs->used_lock);
    1924             : 
    1925             :         /* Release all extent_pages that were truncated */
    1926        8837 :         for (i = blob->active.num_extent_pages; i < blob->active.extent_pages_array_size; i++) {
    1927             :                 /* Nothing to release if it was not allocated */
    1928        2598 :                 if (blob->active.extent_pages[i] != 0) {
    1929         936 :                         bs_release_md_page(bs, blob->active.extent_pages[i]);
    1930         936 :                 }
    1931        2598 :         }
    1932             : 
    1933        6239 :         spdk_spin_unlock(&bs->used_lock);
    1934             : 
    1935        6239 :         if (blob->active.num_extent_pages == 0) {
    1936        4134 :                 free(blob->active.extent_pages);
    1937        4134 :                 blob->active.extent_pages = NULL;
    1938        4134 :                 blob->active.extent_pages_array_size = 0;
    1939        6239 :         } else if (blob->active.num_extent_pages != blob->active.extent_pages_array_size) {
    1940             : #ifndef __clang_analyzer__
    1941             :                 void *tmp;
    1942             : 
    1943             :                 /* scan-build really can't figure reallocs, workaround it */
    1944           3 :                 tmp = realloc(blob->active.extent_pages, sizeof(uint32_t) * blob->active.num_extent_pages);
    1945           3 :                 assert(tmp != NULL);
    1946           3 :                 blob->active.extent_pages = tmp;
    1947             : #endif
    1948           3 :                 blob->active.extent_pages_array_size = blob->active.num_extent_pages;
    1949           3 :         }
    1950             : 
    1951        6239 :         blob_persist_complete(seq, ctx, bserrno);
    1952        6239 : }
    1953             : 
    1954             : static void
    1955        6239 : blob_persist_clear_extents(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx)
    1956             : {
    1957        6239 :         struct spdk_blob                *blob = ctx->blob;
    1958        6239 :         struct spdk_blob_store          *bs = blob->bs;
    1959             :         size_t                          i;
    1960             :         uint64_t                        lba;
    1961             :         uint64_t                        lba_count;
    1962             :         spdk_bs_batch_t                 *batch;
    1963             : 
    1964        6239 :         batch = bs_sequence_to_batch(seq, blob_persist_clear_extents_cpl, ctx);
    1965        6239 :         lba_count = bs_byte_to_lba(bs, bs->md_page_size);
    1966             : 
    1967             :         /* Clear all extent_pages that were truncated */
    1968        8837 :         for (i = blob->active.num_extent_pages; i < blob->active.extent_pages_array_size; i++) {
    1969             :                 /* Nothing to clear if it was not allocated */
    1970        2598 :                 if (blob->active.extent_pages[i] != 0) {
    1971         936 :                         lba = bs_md_page_to_lba(bs, blob->active.extent_pages[i]);
    1972         936 :                         bs_batch_write_zeroes_dev(batch, lba, lba_count);
    1973         936 :                 }
    1974        2598 :         }
    1975             : 
    1976        6239 :         bs_batch_close(batch);
    1977        6239 : }
    1978             : 
    1979             : static void
    1980        6239 : blob_persist_clear_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    1981             : {
    1982        6239 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    1983        6239 :         struct spdk_blob                *blob = ctx->blob;
    1984        6239 :         struct spdk_blob_store          *bs = blob->bs;
    1985             :         size_t                          i;
    1986             : 
    1987        6239 :         if (bserrno != 0) {
    1988           0 :                 blob_persist_complete(seq, ctx, bserrno);
    1989           0 :                 return;
    1990             :         }
    1991             : 
    1992        6239 :         spdk_spin_lock(&bs->used_lock);
    1993             :         /* Release all clusters that were truncated */
    1994     1342490 :         for (i = blob->active.num_clusters; i < blob->active.cluster_array_size; i++) {
    1995     1336251 :                 uint32_t cluster_num = bs_lba_to_cluster(bs, blob->active.clusters[i]);
    1996             : 
    1997             :                 /* Nothing to release if it was not allocated */
    1998     1336251 :                 if (blob->active.clusters[i] != 0) {
    1999        2926 :                         bs_release_cluster(bs, cluster_num);
    2000        2926 :                 }
    2001     1336251 :         }
    2002        6239 :         spdk_spin_unlock(&bs->used_lock);
    2003             : 
    2004        6239 :         if (blob->active.num_clusters == 0) {
    2005        2423 :                 free(blob->active.clusters);
    2006        2423 :                 blob->active.clusters = NULL;
    2007        2423 :                 blob->active.cluster_array_size = 0;
    2008        6239 :         } else if (blob->active.num_clusters != blob->active.cluster_array_size) {
    2009             : #ifndef __clang_analyzer__
    2010             :                 void *tmp;
    2011             : 
    2012             :                 /* scan-build really can't figure reallocs, workaround it */
    2013          22 :                 tmp = realloc(blob->active.clusters, sizeof(*blob->active.clusters) * blob->active.num_clusters);
    2014          22 :                 assert(tmp != NULL);
    2015          22 :                 blob->active.clusters = tmp;
    2016             : 
    2017             : #endif
    2018          22 :                 blob->active.cluster_array_size = blob->active.num_clusters;
    2019          22 :         }
    2020             : 
    2021             :         /* Move on to clearing extent pages */
    2022        6239 :         blob_persist_clear_extents(seq, ctx);
    2023        6239 : }
    2024             : 
    2025             : static void
    2026        6239 : blob_persist_clear_clusters(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx)
    2027             : {
    2028        6239 :         struct spdk_blob                *blob = ctx->blob;
    2029        6239 :         struct spdk_blob_store          *bs = blob->bs;
    2030             :         spdk_bs_batch_t                 *batch;
    2031             :         size_t                          i;
    2032             :         uint64_t                        lba;
    2033             :         uint64_t                        lba_count;
    2034             : 
    2035             :         /* Clusters don't move around in blobs. The list shrinks or grows
    2036             :          * at the end, but no changes ever occur in the middle of the list.
    2037             :          */
    2038             : 
    2039        6239 :         batch = bs_sequence_to_batch(seq, blob_persist_clear_clusters_cpl, ctx);
    2040             : 
    2041             :         /* Clear all clusters that were truncated */
    2042        6239 :         lba = 0;
    2043        6239 :         lba_count = 0;
    2044     1342490 :         for (i = blob->active.num_clusters; i < blob->active.cluster_array_size; i++) {
    2045     1336251 :                 uint64_t next_lba = blob->active.clusters[i];
    2046     1336251 :                 uint64_t next_lba_count = bs_cluster_to_lba(bs, 1);
    2047             : 
    2048     1336251 :                 if (next_lba > 0 && (lba + lba_count) == next_lba) {
    2049             :                         /* This cluster is contiguous with the previous one. */
    2050        1347 :                         lba_count += next_lba_count;
    2051        1347 :                         continue;
    2052     1334904 :                 } else if (next_lba == 0) {
    2053     1333325 :                         continue;
    2054             :                 }
    2055             : 
    2056             :                 /* This cluster is not contiguous with the previous one. */
    2057             : 
    2058             :                 /* If a run of LBAs previously existing, clear them now */
    2059        1579 :                 if (lba_count > 0) {
    2060          45 :                         bs_batch_clear_dev(ctx->blob, batch, lba, lba_count);
    2061          45 :                 }
    2062             : 
    2063             :                 /* Start building the next batch */
    2064        1579 :                 lba = next_lba;
    2065        1579 :                 if (next_lba > 0) {
    2066        1579 :                         lba_count = next_lba_count;
    2067        1579 :                 } else {
    2068           0 :                         lba_count = 0;
    2069             :                 }
    2070        1579 :         }
    2071             : 
    2072             :         /* If we ended with a contiguous set of LBAs, clear them now */
    2073        6239 :         if (lba_count > 0) {
    2074        1534 :                 bs_batch_clear_dev(ctx->blob, batch, lba, lba_count);
    2075        1534 :         }
    2076             : 
    2077        6239 :         bs_batch_close(batch);
    2078        6239 : }
    2079             : 
    2080             : static void
    2081        6244 : blob_persist_zero_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2082             : {
    2083        6244 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    2084        6244 :         struct spdk_blob                *blob = ctx->blob;
    2085        6244 :         struct spdk_blob_store          *bs = blob->bs;
    2086             :         size_t                          i;
    2087             : 
    2088        6244 :         if (bserrno != 0) {
    2089           5 :                 blob_persist_complete(seq, ctx, bserrno);
    2090           5 :                 return;
    2091             :         }
    2092             : 
    2093        6239 :         spdk_spin_lock(&bs->used_lock);
    2094             : 
    2095             :         /* This loop starts at 1 because the first page is special and handled
    2096             :          * below. The pages (except the first) are never written in place,
    2097             :          * so any pages in the clean list must be zeroed.
    2098             :          */
    2099        6324 :         for (i = 1; i < blob->clean.num_pages; i++) {
    2100          85 :                 bs_release_md_page(bs, blob->clean.pages[i]);
    2101          85 :         }
    2102             : 
    2103        6239 :         if (blob->active.num_pages == 0) {
    2104             :                 uint32_t page_num;
    2105             : 
    2106        1857 :                 page_num = bs_blobid_to_page(blob->id);
    2107        1857 :                 bs_release_md_page(bs, page_num);
    2108        1857 :         }
    2109             : 
    2110        6239 :         spdk_spin_unlock(&bs->used_lock);
    2111             : 
    2112             :         /* Move on to clearing clusters */
    2113        6239 :         blob_persist_clear_clusters(seq, ctx);
    2114        6244 : }
    2115             : 
    2116             : static void
    2117        6294 : blob_persist_zero_pages(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2118             : {
    2119        6294 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    2120        6294 :         struct spdk_blob                *blob = ctx->blob;
    2121        6294 :         struct spdk_blob_store          *bs = blob->bs;
    2122             :         uint64_t                        lba;
    2123             :         uint64_t                        lba_count;
    2124             :         spdk_bs_batch_t                 *batch;
    2125             :         size_t                          i;
    2126             : 
    2127        6294 :         if (bserrno != 0) {
    2128          50 :                 blob_persist_complete(seq, ctx, bserrno);
    2129          50 :                 return;
    2130             :         }
    2131             : 
    2132        6244 :         batch = bs_sequence_to_batch(seq, blob_persist_zero_pages_cpl, ctx);
    2133             : 
    2134        6244 :         lba_count = bs_byte_to_lba(bs, bs->md_page_size);
    2135             : 
    2136             :         /* This loop starts at 1 because the first page is special and handled
    2137             :          * below. The pages (except the first) are never written in place,
    2138             :          * so any pages in the clean list must be zeroed.
    2139             :          */
    2140        6329 :         for (i = 1; i < blob->clean.num_pages; i++) {
    2141          85 :                 lba = bs_md_page_to_lba(bs, blob->clean.pages[i]);
    2142             : 
    2143          85 :                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    2144          85 :         }
    2145             : 
    2146             :         /* The first page will only be zeroed if this is a delete. */
    2147        6244 :         if (blob->active.num_pages == 0) {
    2148             :                 uint32_t page_num;
    2149             : 
    2150             :                 /* The first page in the metadata goes where the blobid indicates */
    2151        1862 :                 page_num = bs_blobid_to_page(blob->id);
    2152        1862 :                 lba = bs_md_page_to_lba(bs, page_num);
    2153             : 
    2154        1862 :                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    2155        1862 :         }
    2156             : 
    2157        6244 :         bs_batch_close(batch);
    2158        6294 : }
    2159             : 
    2160             : static void
    2161        4432 : blob_persist_write_page_root(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2162             : {
    2163        4432 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    2164        4432 :         struct spdk_blob                *blob = ctx->blob;
    2165        4432 :         struct spdk_blob_store          *bs = blob->bs;
    2166             :         uint64_t                        lba;
    2167             :         uint32_t                        lba_count;
    2168             :         struct spdk_blob_md_page        *page;
    2169             : 
    2170        4432 :         if (bserrno != 0) {
    2171           0 :                 blob_persist_complete(seq, ctx, bserrno);
    2172           0 :                 return;
    2173             :         }
    2174             : 
    2175        4432 :         if (blob->active.num_pages == 0) {
    2176             :                 /* Move on to the next step */
    2177           0 :                 blob_persist_zero_pages(seq, ctx, 0);
    2178           0 :                 return;
    2179             :         }
    2180             : 
    2181        4432 :         lba_count = bs_byte_to_lba(bs, bs->md_page_size);
    2182             : 
    2183        4432 :         page = &ctx->pages[0];
    2184             :         /* The first page in the metadata goes where the blobid indicates */
    2185        4432 :         lba = bs_md_page_to_lba(bs, bs_blobid_to_page(blob->id));
    2186             : 
    2187        8864 :         bs_sequence_write_dev(seq, page, lba, lba_count,
    2188        4432 :                               blob_persist_zero_pages, ctx);
    2189        4432 : }
    2190             : 
    2191             : static void
    2192        4432 : blob_persist_write_page_chain(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx)
    2193             : {
    2194        4432 :         struct spdk_blob                *blob = ctx->blob;
    2195        4432 :         struct spdk_blob_store          *bs = blob->bs;
    2196             :         uint64_t                        lba;
    2197             :         uint32_t                        lba_count;
    2198             :         struct spdk_blob_md_page        *page;
    2199             :         spdk_bs_batch_t                 *batch;
    2200             :         size_t                          i;
    2201             : 
    2202             :         /* Clusters don't move around in blobs. The list shrinks or grows
    2203             :          * at the end, but no changes ever occur in the middle of the list.
    2204             :          */
    2205             : 
    2206        4432 :         lba_count = bs_byte_to_lba(bs, sizeof(*page));
    2207             : 
    2208        4432 :         batch = bs_sequence_to_batch(seq, blob_persist_write_page_root, ctx);
    2209             : 
    2210             :         /* This starts at 1. The root page is not written until
    2211             :          * all of the others are finished
    2212             :          */
    2213        4541 :         for (i = 1; i < blob->active.num_pages; i++) {
    2214         109 :                 page = &ctx->pages[i];
    2215         109 :                 assert(page->sequence_num == i);
    2216             : 
    2217         109 :                 lba = bs_md_page_to_lba(bs, blob->active.pages[i]);
    2218             : 
    2219         109 :                 bs_batch_write_dev(batch, page, lba, lba_count);
    2220         109 :         }
    2221             : 
    2222        4432 :         bs_batch_close(batch);
    2223        4432 : }
    2224             : 
    2225             : static int
    2226        4465 : blob_resize(struct spdk_blob *blob, uint64_t sz)
    2227             : {
    2228             :         uint64_t        i;
    2229             :         uint64_t        *tmp;
    2230             :         uint64_t        cluster;
    2231             :         uint32_t        lfmd; /*  lowest free md page */
    2232             :         uint64_t        num_clusters;
    2233             :         uint32_t        *ep_tmp;
    2234        4465 :         uint64_t        new_num_ep = 0, current_num_ep = 0;
    2235             :         struct spdk_blob_store *bs;
    2236             :         int             rc;
    2237             : 
    2238        4465 :         bs = blob->bs;
    2239             : 
    2240        4465 :         blob_verify_md_op(blob);
    2241             : 
    2242        4465 :         if (blob->active.num_clusters == sz) {
    2243         566 :                 return 0;
    2244             :         }
    2245             : 
    2246        3899 :         if (blob->active.num_clusters < blob->active.cluster_array_size) {
    2247             :                 /* If this blob was resized to be larger, then smaller, then
    2248             :                  * larger without syncing, then the cluster array already
    2249             :                  * contains spare assigned clusters we can use.
    2250             :                  */
    2251           0 :                 num_clusters = spdk_min(blob->active.cluster_array_size,
    2252             :                                         sz);
    2253           0 :         } else {
    2254        3899 :                 num_clusters = blob->active.num_clusters;
    2255             :         }
    2256             : 
    2257        3899 :         if (blob->use_extent_table) {
    2258             :                 /* Round up since every cluster beyond current Extent Table size,
    2259             :                  * requires new extent page. */
    2260        2359 :                 new_num_ep = spdk_divide_round_up(sz, SPDK_EXTENTS_PER_EP);
    2261        2359 :                 current_num_ep = spdk_divide_round_up(num_clusters, SPDK_EXTENTS_PER_EP);
    2262        2359 :         }
    2263             : 
    2264        3899 :         assert(!spdk_spin_held(&bs->used_lock));
    2265             : 
    2266             :         /* Check first that we have enough clusters and md pages before we start claiming them.
    2267             :          * bs->used_lock is held to ensure that clusters we think are free are still free when we go
    2268             :          * to claim them later in this function.
    2269             :          */
    2270        3899 :         if (sz > num_clusters && spdk_blob_is_thin_provisioned(blob) == false) {
    2271        1624 :                 spdk_spin_lock(&bs->used_lock);
    2272        1624 :                 if ((sz - num_clusters) > bs->num_free_clusters) {
    2273          10 :                         rc = -ENOSPC;
    2274          10 :                         goto out;
    2275             :                 }
    2276        1614 :                 lfmd = 0;
    2277        2572 :                 for (i = current_num_ep; i < new_num_ep ; i++) {
    2278         958 :                         lfmd = spdk_bit_array_find_first_clear(blob->bs->used_md_pages, lfmd);
    2279         958 :                         if (lfmd == UINT32_MAX) {
    2280             :                                 /* No more free md pages. Cannot satisfy the request */
    2281           0 :                                 rc = -ENOSPC;
    2282           0 :                                 goto out;
    2283             :                         }
    2284         958 :                 }
    2285        1614 :         }
    2286             : 
    2287        3889 :         if (sz > num_clusters) {
    2288             :                 /* Expand the cluster array if necessary.
    2289             :                  * We only shrink the array when persisting.
    2290             :                  */
    2291        2130 :                 tmp = realloc(blob->active.clusters, sizeof(*blob->active.clusters) * sz);
    2292        2130 :                 if (sz > 0 && tmp == NULL) {
    2293           0 :                         rc = -ENOMEM;
    2294           0 :                         goto out;
    2295             :                 }
    2296        4260 :                 memset(tmp + blob->active.cluster_array_size, 0,
    2297        2130 :                        sizeof(*blob->active.clusters) * (sz - blob->active.cluster_array_size));
    2298        2130 :                 blob->active.clusters = tmp;
    2299        2130 :                 blob->active.cluster_array_size = sz;
    2300             : 
    2301             :                 /* Expand the extents table, only if enough clusters were added */
    2302        2130 :                 if (new_num_ep > current_num_ep && blob->use_extent_table) {
    2303        1255 :                         ep_tmp = realloc(blob->active.extent_pages, sizeof(*blob->active.extent_pages) * new_num_ep);
    2304        1255 :                         if (new_num_ep > 0 && ep_tmp == NULL) {
    2305           0 :                                 rc = -ENOMEM;
    2306           0 :                                 goto out;
    2307             :                         }
    2308        2510 :                         memset(ep_tmp + blob->active.extent_pages_array_size, 0,
    2309        1255 :                                sizeof(*blob->active.extent_pages) * (new_num_ep - blob->active.extent_pages_array_size));
    2310        1255 :                         blob->active.extent_pages = ep_tmp;
    2311        1255 :                         blob->active.extent_pages_array_size = new_num_ep;
    2312        1255 :                 }
    2313        2130 :         }
    2314             : 
    2315        3889 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    2316             : 
    2317        3889 :         if (spdk_blob_is_thin_provisioned(blob) == false) {
    2318        3028 :                 cluster = 0;
    2319        3028 :                 lfmd = 0;
    2320       12281 :                 for (i = num_clusters; i < sz; i++) {
    2321        9253 :                         bs_allocate_cluster(blob, i, &cluster, &lfmd, true);
    2322             :                         /* Do not increment lfmd here.  lfmd will get updated
    2323             :                          * to the md_page allocated (if any) when a new extent
    2324             :                          * page is needed.  Just pass that value again,
    2325             :                          * bs_allocate_cluster will just start at that index
    2326             :                          * to find the next free md_page when needed.
    2327             :                          */
    2328        9253 :                 }
    2329        3028 :         }
    2330             : 
    2331             :         /* If we are shrinking the blob, we must adjust num_allocated_clusters */
    2332     1340190 :         for (i = sz; i < num_clusters; i++) {
    2333     1336301 :                 if (blob->active.clusters[i] != 0) {
    2334        2926 :                         blob->active.num_allocated_clusters--;
    2335        2926 :                 }
    2336     1336301 :         }
    2337             : 
    2338        3889 :         blob->active.num_clusters = sz;
    2339        3889 :         blob->active.num_extent_pages = new_num_ep;
    2340             : 
    2341        3889 :         rc = 0;
    2342             : out:
    2343        3899 :         if (spdk_spin_held(&bs->used_lock)) {
    2344        1624 :                 spdk_spin_unlock(&bs->used_lock);
    2345        1624 :         }
    2346             : 
    2347        3899 :         return rc;
    2348        4465 : }
    2349             : 
    2350             : static void
    2351        4432 : blob_persist_generate_new_md(struct spdk_blob_persist_ctx *ctx)
    2352             : {
    2353        4432 :         spdk_bs_sequence_t *seq = ctx->seq;
    2354        4432 :         struct spdk_blob *blob = ctx->blob;
    2355        4432 :         struct spdk_blob_store *bs = blob->bs;
    2356             :         uint64_t i;
    2357             :         uint32_t page_num;
    2358             :         void *tmp;
    2359             :         int rc;
    2360             : 
    2361             :         /* Generate the new metadata */
    2362        4432 :         rc = blob_serialize(blob, &ctx->pages, &blob->active.num_pages);
    2363        4432 :         if (rc < 0) {
    2364           0 :                 blob_persist_complete(seq, ctx, rc);
    2365           0 :                 return;
    2366             :         }
    2367             : 
    2368        4432 :         assert(blob->active.num_pages >= 1);
    2369             : 
    2370             :         /* Resize the cache of page indices */
    2371        4432 :         tmp = realloc(blob->active.pages, blob->active.num_pages * sizeof(*blob->active.pages));
    2372        4432 :         if (!tmp) {
    2373           0 :                 blob_persist_complete(seq, ctx, -ENOMEM);
    2374           0 :                 return;
    2375             :         }
    2376        4432 :         blob->active.pages = tmp;
    2377             : 
    2378             :         /* Assign this metadata to pages. This requires two passes - one to verify that there are
    2379             :          * enough pages and a second to actually claim them. The used_lock is held across
    2380             :          * both passes to ensure things don't change in the middle.
    2381             :          */
    2382        4432 :         spdk_spin_lock(&bs->used_lock);
    2383        4432 :         page_num = 0;
    2384             :         /* Note that this loop starts at one. The first page location is fixed by the blobid. */
    2385        4541 :         for (i = 1; i < blob->active.num_pages; i++) {
    2386         109 :                 page_num = spdk_bit_array_find_first_clear(bs->used_md_pages, page_num);
    2387         109 :                 if (page_num == UINT32_MAX) {
    2388           0 :                         spdk_spin_unlock(&bs->used_lock);
    2389           0 :                         blob_persist_complete(seq, ctx, -ENOMEM);
    2390           0 :                         return;
    2391             :                 }
    2392         109 :                 page_num++;
    2393         109 :         }
    2394             : 
    2395        4432 :         page_num = 0;
    2396        4432 :         blob->active.pages[0] = bs_blobid_to_page(blob->id);
    2397        4541 :         for (i = 1; i < blob->active.num_pages; i++) {
    2398         109 :                 page_num = spdk_bit_array_find_first_clear(bs->used_md_pages, page_num);
    2399         109 :                 ctx->pages[i - 1].next = page_num;
    2400             :                 /* Now that previous metadata page is complete, calculate the crc for it. */
    2401         109 :                 ctx->pages[i - 1].crc = blob_md_page_calc_crc(&ctx->pages[i - 1]);
    2402         109 :                 blob->active.pages[i] = page_num;
    2403         109 :                 bs_claim_md_page(bs, page_num);
    2404         109 :                 SPDK_DEBUGLOG(blob, "Claiming page %u for blob 0x%" PRIx64 "\n", page_num,
    2405             :                               blob->id);
    2406         109 :                 page_num++;
    2407         109 :         }
    2408        4432 :         spdk_spin_unlock(&bs->used_lock);
    2409        4432 :         ctx->pages[i - 1].crc = blob_md_page_calc_crc(&ctx->pages[i - 1]);
    2410             :         /* Start writing the metadata from last page to first */
    2411        4432 :         blob->state = SPDK_BLOB_STATE_CLEAN;
    2412        4432 :         blob_persist_write_page_chain(seq, ctx);
    2413        4432 : }
    2414             : 
    2415             : static void
    2416        3108 : blob_persist_write_extent_pages(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2417             : {
    2418        3108 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    2419        3108 :         struct spdk_blob                *blob = ctx->blob;
    2420             :         size_t                          i;
    2421             :         uint32_t                        extent_page_id;
    2422        3108 :         uint32_t                        page_count = 0;
    2423             :         int                             rc;
    2424             : 
    2425        3108 :         if (ctx->extent_page != NULL) {
    2426         991 :                 spdk_free(ctx->extent_page);
    2427         991 :                 ctx->extent_page = NULL;
    2428         991 :         }
    2429             : 
    2430        3108 :         if (bserrno != 0) {
    2431           0 :                 blob_persist_complete(seq, ctx, bserrno);
    2432           0 :                 return;
    2433             :         }
    2434             : 
    2435             :         /* Only write out Extent Pages when blob was resized. */
    2436        6492 :         for (i = ctx->next_extent_page; i < blob->active.extent_pages_array_size; i++) {
    2437        4375 :                 extent_page_id = blob->active.extent_pages[i];
    2438        4375 :                 if (extent_page_id == 0) {
    2439             :                         /* No Extent Page to persist */
    2440        3384 :                         assert(spdk_blob_is_thin_provisioned(blob));
    2441        3384 :                         continue;
    2442             :                 }
    2443         991 :                 assert(spdk_bit_array_get(blob->bs->used_md_pages, extent_page_id));
    2444         991 :                 ctx->next_extent_page = i + 1;
    2445         991 :                 rc = blob_serialize_add_page(ctx->blob, &ctx->extent_page, &page_count, &ctx->extent_page);
    2446         991 :                 if (rc < 0) {
    2447           0 :                         blob_persist_complete(seq, ctx, rc);
    2448           0 :                         return;
    2449             :                 }
    2450             : 
    2451         991 :                 blob->state = SPDK_BLOB_STATE_DIRTY;
    2452         991 :                 blob_serialize_extent_page(blob, i * SPDK_EXTENTS_PER_EP, ctx->extent_page);
    2453             : 
    2454         991 :                 ctx->extent_page->crc = blob_md_page_calc_crc(ctx->extent_page);
    2455             : 
    2456        1982 :                 bs_sequence_write_dev(seq, ctx->extent_page, bs_md_page_to_lba(blob->bs, extent_page_id),
    2457         991 :                                       bs_byte_to_lba(blob->bs, blob->bs->md_page_size),
    2458         991 :                                       blob_persist_write_extent_pages, ctx);
    2459         991 :                 return;
    2460             :         }
    2461             : 
    2462        2117 :         blob_persist_generate_new_md(ctx);
    2463        3108 : }
    2464             : 
    2465             : static void
    2466        6304 : blob_persist_start(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2467             : {
    2468        6304 :         struct spdk_blob_persist_ctx *ctx = cb_arg;
    2469        6304 :         struct spdk_blob *blob = ctx->blob;
    2470             : 
    2471        6304 :         if (bserrno != 0) {
    2472          10 :                 blob_persist_complete(seq, ctx, bserrno);
    2473          10 :                 return;
    2474             :         }
    2475             : 
    2476        6294 :         if (blob->active.num_pages == 0) {
    2477             :                 /* This is the signal that the blob should be deleted.
    2478             :                  * Immediately jump to the clean up routine. */
    2479        1862 :                 assert(blob->clean.num_pages > 0);
    2480        1862 :                 blob->state = SPDK_BLOB_STATE_CLEAN;
    2481        1862 :                 blob_persist_zero_pages(seq, ctx, 0);
    2482        1862 :                 return;
    2483             : 
    2484             :         }
    2485             : 
    2486        4432 :         if (blob->clean.num_clusters < blob->active.num_clusters) {
    2487             :                 /* Blob was resized up */
    2488        2095 :                 assert(blob->clean.num_extent_pages <= blob->active.num_extent_pages);
    2489        2095 :                 ctx->next_extent_page = spdk_max(1, blob->clean.num_extent_pages) - 1;
    2490        4432 :         } else if (blob->active.num_clusters < blob->active.cluster_array_size) {
    2491             :                 /* Blob was resized down */
    2492          22 :                 assert(blob->clean.num_extent_pages >= blob->active.num_extent_pages);
    2493          22 :                 ctx->next_extent_page = spdk_max(1, blob->active.num_extent_pages) - 1;
    2494          22 :         } else {
    2495             :                 /* No change in size occurred */
    2496        2315 :                 blob_persist_generate_new_md(ctx);
    2497        2315 :                 return;
    2498             :         }
    2499             : 
    2500        2117 :         blob_persist_write_extent_pages(seq, ctx, 0);
    2501        6304 : }
    2502             : 
    2503             : struct spdk_bs_mark_dirty {
    2504             :         struct spdk_blob_store          *bs;
    2505             :         struct spdk_bs_super_block      *super;
    2506             :         spdk_bs_sequence_cpl            cb_fn;
    2507             :         void                            *cb_arg;
    2508             : };
    2509             : 
    2510             : static void
    2511         197 : bs_mark_dirty_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2512             : {
    2513         197 :         struct spdk_bs_mark_dirty *ctx = cb_arg;
    2514             : 
    2515         197 :         if (bserrno == 0) {
    2516         187 :                 ctx->bs->clean = 0;
    2517         187 :         }
    2518             : 
    2519         197 :         ctx->cb_fn(seq, ctx->cb_arg, bserrno);
    2520             : 
    2521         197 :         spdk_free(ctx->super);
    2522         197 :         free(ctx);
    2523         197 : }
    2524             : 
    2525             : static void bs_write_super(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
    2526             :                            struct spdk_bs_super_block *super, spdk_bs_sequence_cpl cb_fn, void *cb_arg);
    2527             : 
    2528             : 
    2529             : static void
    2530         197 : bs_mark_dirty_write(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2531             : {
    2532         197 :         struct spdk_bs_mark_dirty *ctx = cb_arg;
    2533             :         int rc;
    2534             : 
    2535         197 :         if (bserrno != 0) {
    2536           5 :                 bs_mark_dirty_write_cpl(seq, ctx, bserrno);
    2537           5 :                 return;
    2538             :         }
    2539             : 
    2540         192 :         rc = bs_super_validate(ctx->super, ctx->bs);
    2541         192 :         if (rc != 0) {
    2542           0 :                 bs_mark_dirty_write_cpl(seq, ctx, rc);
    2543           0 :                 return;
    2544             :         }
    2545             : 
    2546         192 :         ctx->super->clean = 0;
    2547         192 :         if (ctx->super->size == 0) {
    2548           5 :                 ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    2549           5 :         }
    2550             : 
    2551         192 :         bs_write_super(seq, ctx->bs, ctx->super, bs_mark_dirty_write_cpl, ctx);
    2552         197 : }
    2553             : 
    2554             : static void
    2555        6961 : bs_mark_dirty(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
    2556             :               spdk_bs_sequence_cpl cb_fn, void *cb_arg)
    2557             : {
    2558             :         struct spdk_bs_mark_dirty *ctx;
    2559             : 
    2560             :         /* Blobstore is already marked dirty */
    2561        6961 :         if (bs->clean == 0) {
    2562        6764 :                 cb_fn(seq, cb_arg, 0);
    2563        6764 :                 return;
    2564             :         }
    2565             : 
    2566         197 :         ctx = calloc(1, sizeof(*ctx));
    2567         197 :         if (!ctx) {
    2568           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    2569           0 :                 return;
    2570             :         }
    2571         197 :         ctx->bs = bs;
    2572         197 :         ctx->cb_fn = cb_fn;
    2573         197 :         ctx->cb_arg = cb_arg;
    2574             : 
    2575         197 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    2576             :                                   SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    2577         197 :         if (!ctx->super) {
    2578           0 :                 free(ctx);
    2579           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    2580           0 :                 return;
    2581             :         }
    2582             : 
    2583         394 :         bs_sequence_read_dev(seq, ctx->super, bs_page_to_lba(bs, 0),
    2584         197 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    2585         197 :                              bs_mark_dirty_write, ctx);
    2586        6961 : }
    2587             : 
    2588             : /* Write a blob to disk */
    2589             : static void
    2590       11346 : blob_persist(spdk_bs_sequence_t *seq, struct spdk_blob *blob,
    2591             :              spdk_bs_sequence_cpl cb_fn, void *cb_arg)
    2592             : {
    2593             :         struct spdk_blob_persist_ctx *ctx;
    2594             : 
    2595       11346 :         blob_verify_md_op(blob);
    2596             : 
    2597       11346 :         if (blob->state == SPDK_BLOB_STATE_CLEAN && TAILQ_EMPTY(&blob->persists_to_complete)) {
    2598        5042 :                 cb_fn(seq, cb_arg, 0);
    2599        5042 :                 return;
    2600             :         }
    2601             : 
    2602        6304 :         ctx = calloc(1, sizeof(*ctx));
    2603        6304 :         if (!ctx) {
    2604           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    2605           0 :                 return;
    2606             :         }
    2607        6304 :         ctx->blob = blob;
    2608        6304 :         ctx->seq = seq;
    2609        6304 :         ctx->cb_fn = cb_fn;
    2610        6304 :         ctx->cb_arg = cb_arg;
    2611             : 
    2612             :         /* Multiple blob persists can affect one another, via blob->state or
    2613             :          * blob mutable data changes. To prevent it, queue up the persists. */
    2614        6304 :         if (!TAILQ_EMPTY(&blob->persists_to_complete)) {
    2615          28 :                 TAILQ_INSERT_TAIL(&blob->pending_persists, ctx, link);
    2616          28 :                 return;
    2617             :         }
    2618        6276 :         TAILQ_INSERT_HEAD(&blob->persists_to_complete, ctx, link);
    2619             : 
    2620        6276 :         bs_mark_dirty(seq, blob->bs, blob_persist_start, ctx);
    2621       11346 : }
    2622             : 
    2623             : struct spdk_blob_copy_cluster_ctx {
    2624             :         struct spdk_blob *blob;
    2625             :         uint8_t *buf;
    2626             :         uint64_t io_unit;
    2627             :         uint64_t new_cluster;
    2628             :         uint32_t new_extent_page;
    2629             :         spdk_bs_sequence_t *seq;
    2630             :         struct spdk_blob_md_page *new_cluster_page;
    2631             : };
    2632             : 
    2633             : struct spdk_blob_free_cluster_ctx {
    2634             :         struct spdk_blob *blob;
    2635             :         uint64_t page;
    2636             :         struct spdk_blob_md_page *md_page;
    2637             :         uint64_t cluster_num;
    2638             :         uint32_t extent_page;
    2639             :         spdk_bs_sequence_t *seq;
    2640             : };
    2641             : 
    2642             : static void
    2643        1025 : blob_allocate_and_copy_cluster_cpl(void *cb_arg, int bserrno)
    2644             : {
    2645        1025 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2646        1025 :         struct spdk_bs_request_set *set = (struct spdk_bs_request_set *)ctx->seq;
    2647             :         TAILQ_HEAD(, spdk_bs_request_set) requests;
    2648             :         spdk_bs_user_op_t *op;
    2649             : 
    2650        1025 :         TAILQ_INIT(&requests);
    2651        1025 :         TAILQ_SWAP(&set->channel->need_cluster_alloc, &requests, spdk_bs_request_set, link);
    2652             : 
    2653        2050 :         while (!TAILQ_EMPTY(&requests)) {
    2654        1025 :                 op = TAILQ_FIRST(&requests);
    2655        1025 :                 TAILQ_REMOVE(&requests, op, link);
    2656        1025 :                 if (bserrno == 0) {
    2657        1025 :                         bs_user_op_execute(op);
    2658        1025 :                 } else {
    2659           0 :                         bs_user_op_abort(op, bserrno);
    2660             :                 }
    2661             :         }
    2662             : 
    2663        1025 :         spdk_free(ctx->buf);
    2664        1025 :         free(ctx);
    2665        1025 : }
    2666             : 
    2667             : static void
    2668          75 : blob_free_cluster_cpl(void *cb_arg, int bserrno)
    2669             : {
    2670          75 :         struct spdk_blob_free_cluster_ctx *ctx = cb_arg;
    2671          75 :         spdk_bs_sequence_t *seq = ctx->seq;
    2672             : 
    2673          75 :         bs_sequence_finish(seq, bserrno);
    2674             : 
    2675          75 :         free(ctx);
    2676          75 : }
    2677             : 
    2678             : static void
    2679           5 : blob_insert_cluster_revert(struct spdk_blob_copy_cluster_ctx *ctx)
    2680             : {
    2681           5 :         spdk_spin_lock(&ctx->blob->bs->used_lock);
    2682           5 :         bs_release_cluster(ctx->blob->bs, ctx->new_cluster);
    2683           5 :         if (ctx->new_extent_page != 0) {
    2684           3 :                 bs_release_md_page(ctx->blob->bs, ctx->new_extent_page);
    2685           3 :         }
    2686           5 :         spdk_spin_unlock(&ctx->blob->bs->used_lock);
    2687           5 : }
    2688             : 
    2689             : static void
    2690           5 : blob_insert_cluster_clear_cpl(void *cb_arg, int bserrno)
    2691             : {
    2692           5 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2693             : 
    2694           5 :         if (bserrno) {
    2695           0 :                 SPDK_WARNLOG("Failed to clear cluster: %d\n", bserrno);
    2696           0 :         }
    2697             : 
    2698           5 :         blob_insert_cluster_revert(ctx);
    2699           5 :         bs_sequence_finish(ctx->seq, bserrno);
    2700           5 : }
    2701             : 
    2702             : static void
    2703           5 : blob_insert_cluster_clear(struct spdk_blob_copy_cluster_ctx *ctx)
    2704             : {
    2705             :         struct spdk_bs_cpl cpl;
    2706             :         spdk_bs_batch_t *batch;
    2707           5 :         struct spdk_io_channel *ch = spdk_io_channel_from_ctx(ctx->seq->channel);
    2708             : 
    2709             :         /*
    2710             :          * We allocated a cluster and we copied data to it. But now, we realized that we don't need
    2711             :          * this cluster and we want to release it. We must ensure that we clear the data on this
    2712             :          * cluster.
    2713             :          * The cluster may later be re-allocated by a thick-provisioned blob for example. When
    2714             :          * reading from this thick-provisioned blob before writing data, we should read zeroes.
    2715             :          */
    2716             : 
    2717           5 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    2718           5 :         cpl.u.blob_basic.cb_fn = blob_insert_cluster_clear_cpl;
    2719           5 :         cpl.u.blob_basic.cb_arg = ctx;
    2720             : 
    2721           5 :         batch = bs_batch_open(ch, &cpl, ctx->blob);
    2722           5 :         if (!batch) {
    2723           0 :                 blob_insert_cluster_clear_cpl(ctx, -ENOMEM);
    2724           0 :                 return;
    2725             :         }
    2726             : 
    2727          10 :         bs_batch_clear_dev(ctx->blob, batch, bs_cluster_to_lba(ctx->blob->bs, ctx->new_cluster),
    2728           5 :                            bs_cluster_to_lba(ctx->blob->bs, 1));
    2729           5 :         bs_batch_close(batch);
    2730           5 : }
    2731             : 
    2732             : static void
    2733        1025 : blob_insert_cluster_cpl(void *cb_arg, int bserrno)
    2734             : {
    2735        1025 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2736             : 
    2737        1025 :         if (bserrno) {
    2738           5 :                 if (bserrno == -EEXIST) {
    2739             :                         /* The metadata insert failed because another thread
    2740             :                          * allocated the cluster first. Clear and free our cluster
    2741             :                          * but continue without error. */
    2742           5 :                         blob_insert_cluster_clear(ctx);
    2743           5 :                         return;
    2744             :                 }
    2745             : 
    2746           0 :                 blob_insert_cluster_revert(ctx);
    2747           0 :         }
    2748             : 
    2749        1020 :         bs_sequence_finish(ctx->seq, bserrno);
    2750        1025 : }
    2751             : 
    2752             : static void
    2753         515 : blob_write_copy_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2754             : {
    2755         515 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2756             :         uint32_t cluster_number;
    2757             : 
    2758         515 :         if (bserrno) {
    2759             :                 /* The write failed, so jump to the final completion handler */
    2760           0 :                 bs_sequence_finish(seq, bserrno);
    2761           0 :                 return;
    2762             :         }
    2763             : 
    2764         515 :         cluster_number = bs_io_unit_to_cluster(ctx->blob->bs, ctx->io_unit);
    2765             : 
    2766        1030 :         blob_insert_cluster_on_md_thread(ctx->blob, cluster_number, ctx->new_cluster,
    2767         515 :                                          ctx->new_extent_page, ctx->new_cluster_page, blob_insert_cluster_cpl, ctx);
    2768         515 : }
    2769             : 
    2770             : static void
    2771         385 : blob_write_copy(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2772             : {
    2773         385 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2774             : 
    2775         385 :         if (bserrno != 0) {
    2776             :                 /* The read failed, so jump to the final completion handler */
    2777           0 :                 bs_sequence_finish(seq, bserrno);
    2778           0 :                 return;
    2779             :         }
    2780             : 
    2781             :         /* Write whole cluster */
    2782         770 :         bs_sequence_write_dev(seq, ctx->buf,
    2783         385 :                               bs_cluster_to_lba(ctx->blob->bs, ctx->new_cluster),
    2784         385 :                               bs_cluster_to_lba(ctx->blob->bs, 1),
    2785         385 :                               blob_write_copy_cpl, ctx);
    2786         385 : }
    2787             : 
    2788             : static bool
    2789        1005 : blob_can_copy(struct spdk_blob *blob, uint64_t cluster_start_io_unit, uint64_t *base_lba)
    2790             : {
    2791        1005 :         uint64_t lba = bs_dev_io_unit_to_lba(blob, blob->back_bs_dev, cluster_start_io_unit);
    2792             : 
    2793        1359 :         return (!blob_is_esnap_clone(blob) && blob->bs->dev->copy != NULL) &&
    2794         354 :                blob->back_bs_dev->translate_lba(blob->back_bs_dev, lba, base_lba);
    2795             : }
    2796             : 
    2797             : static void
    2798         130 : blob_copy(struct spdk_blob_copy_cluster_ctx *ctx, spdk_bs_user_op_t *op, uint64_t src_lba)
    2799             : {
    2800         130 :         struct spdk_blob *blob = ctx->blob;
    2801         130 :         uint64_t lba_count = bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz);
    2802             : 
    2803         260 :         bs_sequence_copy_dev(ctx->seq,
    2804         130 :                              bs_cluster_to_lba(blob->bs, ctx->new_cluster),
    2805         130 :                              src_lba,
    2806         130 :                              lba_count,
    2807         130 :                              blob_write_copy_cpl, ctx);
    2808         130 : }
    2809             : 
    2810             : static void
    2811        1025 : bs_allocate_and_copy_cluster(struct spdk_blob *blob,
    2812             :                              struct spdk_io_channel *_ch,
    2813             :                              uint64_t io_unit, spdk_bs_user_op_t *op)
    2814             : {
    2815             :         struct spdk_bs_cpl cpl;
    2816             :         struct spdk_bs_channel *ch;
    2817             :         struct spdk_blob_copy_cluster_ctx *ctx;
    2818             :         uint64_t cluster_start_io_unit;
    2819             :         uint32_t cluster_number;
    2820             :         bool is_zeroes;
    2821             :         bool can_copy;
    2822             :         bool is_valid_range;
    2823             :         uint64_t copy_src_lba;
    2824             :         int rc;
    2825             : 
    2826        1025 :         ch = spdk_io_channel_get_ctx(_ch);
    2827             : 
    2828        1025 :         if (!TAILQ_EMPTY(&ch->need_cluster_alloc)) {
    2829             :                 /* There are already operations pending. Queue this user op
    2830             :                  * and return because it will be re-executed when the outstanding
    2831             :                  * cluster allocation completes. */
    2832           0 :                 TAILQ_INSERT_TAIL(&ch->need_cluster_alloc, op, link);
    2833           0 :                 return;
    2834             :         }
    2835             : 
    2836             :         /* Round the io_unit offset down to the first io_unit in the cluster */
    2837        1025 :         cluster_start_io_unit = bs_io_unit_to_cluster_start(blob, io_unit);
    2838             : 
    2839             :         /* Calculate which index in the metadata cluster array the corresponding
    2840             :          * cluster is supposed to be at. */
    2841        1025 :         cluster_number = bs_io_unit_to_cluster_number(blob, io_unit);
    2842             : 
    2843        1025 :         ctx = calloc(1, sizeof(*ctx));
    2844        1025 :         if (!ctx) {
    2845           0 :                 bs_user_op_abort(op, -ENOMEM);
    2846           0 :                 return;
    2847             :         }
    2848             : 
    2849        1025 :         assert(blob->bs->cluster_sz % blob->back_bs_dev->blocklen == 0);
    2850             : 
    2851        1025 :         ctx->blob = blob;
    2852        1025 :         ctx->io_unit = cluster_start_io_unit;
    2853        1025 :         ctx->new_cluster_page = ch->new_cluster_page;
    2854        1025 :         memset(ctx->new_cluster_page, 0, blob->bs->md_page_size);
    2855             : 
    2856             :         /* Check if the cluster that we intend to do CoW for is valid for
    2857             :          * the backing dev. For zeroes backing dev, it'll be always valid.
    2858             :          * For other backing dev e.g. a snapshot, it could be invalid if
    2859             :          * the blob has been resized after snapshot was taken. */
    2860        2050 :         is_valid_range = blob->back_bs_dev->is_range_valid(blob->back_bs_dev,
    2861        1025 :                          bs_dev_io_unit_to_lba(blob, blob->back_bs_dev, cluster_start_io_unit),
    2862        1025 :                          bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz));
    2863             : 
    2864        1025 :         can_copy = is_valid_range && blob_can_copy(blob, cluster_start_io_unit, &copy_src_lba);
    2865             : 
    2866        1025 :         is_zeroes = is_valid_range && blob->back_bs_dev->is_zeroes(blob->back_bs_dev,
    2867        1005 :                         bs_dev_io_unit_to_lba(blob, blob->back_bs_dev, cluster_start_io_unit),
    2868        1005 :                         bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz));
    2869        1025 :         if (blob->parent_id != SPDK_BLOBID_INVALID && !is_zeroes && !can_copy) {
    2870         385 :                 ctx->buf = spdk_malloc(blob->bs->cluster_sz, blob->back_bs_dev->blocklen,
    2871             :                                        NULL, SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    2872         385 :                 if (!ctx->buf) {
    2873           0 :                         SPDK_ERRLOG("DMA allocation for cluster of size = %" PRIu32 " failed.\n",
    2874             :                                     blob->bs->cluster_sz);
    2875           0 :                         free(ctx);
    2876           0 :                         bs_user_op_abort(op, -ENOMEM);
    2877           0 :                         return;
    2878             :                 }
    2879         385 :         }
    2880             : 
    2881        1025 :         spdk_spin_lock(&blob->bs->used_lock);
    2882        1025 :         rc = bs_allocate_cluster(blob, cluster_number, &ctx->new_cluster, &ctx->new_extent_page,
    2883             :                                  false);
    2884        1025 :         spdk_spin_unlock(&blob->bs->used_lock);
    2885        1025 :         if (rc != 0) {
    2886           0 :                 spdk_free(ctx->buf);
    2887           0 :                 free(ctx);
    2888           0 :                 bs_user_op_abort(op, rc);
    2889           0 :                 return;
    2890             :         }
    2891             : 
    2892        1025 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    2893        1025 :         cpl.u.blob_basic.cb_fn = blob_allocate_and_copy_cluster_cpl;
    2894        1025 :         cpl.u.blob_basic.cb_arg = ctx;
    2895             : 
    2896        1025 :         ctx->seq = bs_sequence_start_blob(_ch, &cpl, blob);
    2897        1025 :         if (!ctx->seq) {
    2898           0 :                 spdk_spin_lock(&blob->bs->used_lock);
    2899           0 :                 bs_release_cluster(blob->bs, ctx->new_cluster);
    2900           0 :                 spdk_spin_unlock(&blob->bs->used_lock);
    2901           0 :                 spdk_free(ctx->buf);
    2902           0 :                 free(ctx);
    2903           0 :                 bs_user_op_abort(op, -ENOMEM);
    2904           0 :                 return;
    2905             :         }
    2906             : 
    2907             :         /* Queue the user op to block other incoming operations */
    2908        1025 :         TAILQ_INSERT_TAIL(&ch->need_cluster_alloc, op, link);
    2909             : 
    2910        1025 :         if (blob->parent_id != SPDK_BLOBID_INVALID && !is_zeroes) {
    2911         515 :                 if (can_copy) {
    2912         130 :                         blob_copy(ctx, op, copy_src_lba);
    2913         130 :                 } else {
    2914             :                         /* Read cluster from backing device */
    2915         770 :                         bs_sequence_read_bs_dev(ctx->seq, blob->back_bs_dev, ctx->buf,
    2916         385 :                                                 bs_dev_io_unit_to_lba(blob, blob->back_bs_dev, cluster_start_io_unit),
    2917         385 :                                                 bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz),
    2918         385 :                                                 blob_write_copy, ctx);
    2919             :                 }
    2920             : 
    2921         515 :         } else {
    2922        1020 :                 blob_insert_cluster_on_md_thread(ctx->blob, cluster_number, ctx->new_cluster,
    2923         510 :                                                  ctx->new_extent_page, ctx->new_cluster_page, blob_insert_cluster_cpl, ctx);
    2924             :         }
    2925        1025 : }
    2926             : 
    2927             : static inline bool
    2928       56755 : blob_calculate_lba_and_lba_count(struct spdk_blob *blob, uint64_t io_unit, uint64_t length,
    2929             :                                  uint64_t *lba, uint64_t *lba_count)
    2930             : {
    2931       56755 :         *lba_count = length;
    2932             : 
    2933       56755 :         if (!bs_io_unit_is_allocated(blob, io_unit)) {
    2934        5196 :                 assert(blob->back_bs_dev != NULL);
    2935        5196 :                 *lba = bs_io_unit_to_back_dev_lba(blob, io_unit);
    2936        5196 :                 *lba_count = bs_io_unit_to_back_dev_lba(blob, *lba_count);
    2937        5196 :                 return false;
    2938             :         } else {
    2939       51559 :                 *lba = bs_blob_io_unit_to_lba(blob, io_unit);
    2940       51559 :                 return true;
    2941             :         }
    2942       56755 : }
    2943             : 
    2944             : struct op_split_ctx {
    2945             :         struct spdk_blob *blob;
    2946             :         struct spdk_io_channel *channel;
    2947             :         uint64_t io_unit_offset;
    2948             :         uint64_t io_units_remaining;
    2949             :         void *curr_payload;
    2950             :         enum spdk_blob_op_type op_type;
    2951             :         spdk_bs_sequence_t *seq;
    2952             :         bool in_submit_ctx;
    2953             :         bool completed_in_submit_ctx;
    2954             :         bool done;
    2955             : };
    2956             : 
    2957             : static void
    2958         966 : blob_request_submit_op_split_next(void *cb_arg, int bserrno)
    2959             : {
    2960         966 :         struct op_split_ctx     *ctx = cb_arg;
    2961         966 :         struct spdk_blob        *blob = ctx->blob;
    2962         966 :         struct spdk_io_channel  *ch = ctx->channel;
    2963         966 :         enum spdk_blob_op_type  op_type = ctx->op_type;
    2964             :         uint8_t                 *buf;
    2965             :         uint64_t                offset;
    2966             :         uint64_t                length;
    2967             :         uint64_t                op_length;
    2968             : 
    2969         966 :         if (bserrno != 0 || ctx->io_units_remaining == 0) {
    2970         222 :                 bs_sequence_finish(ctx->seq, bserrno);
    2971         222 :                 if (ctx->in_submit_ctx) {
    2972             :                         /* Defer freeing of the ctx object, since it will be
    2973             :                          * accessed when this unwinds back to the submission
    2974             :                          * context.
    2975             :                          */
    2976          50 :                         ctx->done = true;
    2977          50 :                 } else {
    2978         172 :                         free(ctx);
    2979             :                 }
    2980         222 :                 return;
    2981             :         }
    2982             : 
    2983         744 :         if (ctx->in_submit_ctx) {
    2984             :                 /* If this split operation completed in the context
    2985             :                  * of its submission, mark the flag and return immediately
    2986             :                  * to avoid recursion.
    2987             :                  */
    2988          85 :                 ctx->completed_in_submit_ctx = true;
    2989          85 :                 return;
    2990             :         }
    2991             : 
    2992         659 :         while (true) {
    2993         744 :                 ctx->completed_in_submit_ctx = false;
    2994             : 
    2995         744 :                 offset = ctx->io_unit_offset;
    2996         744 :                 length = ctx->io_units_remaining;
    2997         744 :                 buf = ctx->curr_payload;
    2998         744 :                 op_length = spdk_min(length, bs_num_io_units_to_cluster_boundary(blob,
    2999             :                                      offset));
    3000             : 
    3001             :                 /* Update length and payload for next operation */
    3002         744 :                 ctx->io_units_remaining -= op_length;
    3003         744 :                 ctx->io_unit_offset += op_length;
    3004         744 :                 if (op_type == SPDK_BLOB_WRITE || op_type == SPDK_BLOB_READ) {
    3005         659 :                         ctx->curr_payload += op_length * blob->bs->io_unit_size;
    3006         659 :                 }
    3007             : 
    3008         744 :                 assert(!ctx->in_submit_ctx);
    3009         744 :                 ctx->in_submit_ctx = true;
    3010             : 
    3011         744 :                 switch (op_type) {
    3012             :                 case SPDK_BLOB_READ:
    3013        1044 :                         spdk_blob_io_read(blob, ch, buf, offset, op_length,
    3014         522 :                                           blob_request_submit_op_split_next, ctx);
    3015         522 :                         break;
    3016             :                 case SPDK_BLOB_WRITE:
    3017         274 :                         spdk_blob_io_write(blob, ch, buf, offset, op_length,
    3018         137 :                                            blob_request_submit_op_split_next, ctx);
    3019         137 :                         break;
    3020             :                 case SPDK_BLOB_UNMAP:
    3021          90 :                         spdk_blob_io_unmap(blob, ch, offset, op_length,
    3022          45 :                                            blob_request_submit_op_split_next, ctx);
    3023          45 :                         break;
    3024             :                 case SPDK_BLOB_WRITE_ZEROES:
    3025          80 :                         spdk_blob_io_write_zeroes(blob, ch, offset, op_length,
    3026          40 :                                                   blob_request_submit_op_split_next, ctx);
    3027          40 :                         break;
    3028             :                 case SPDK_BLOB_READV:
    3029             :                 case SPDK_BLOB_WRITEV:
    3030           0 :                         SPDK_ERRLOG("readv/write not valid\n");
    3031           0 :                         bs_sequence_finish(ctx->seq, -EINVAL);
    3032           0 :                         free(ctx);
    3033           0 :                         return;
    3034             :                 }
    3035             : 
    3036             : #ifndef __clang_analyzer__
    3037             :                 /* scan-build reports a false positive around accessing the ctx here. It
    3038             :                  * forms a path that recursively calls this function, but then says
    3039             :                  * "assuming ctx->in_submit_ctx is false", when that isn't possible.
    3040             :                  * This path does free(ctx), returns to here, and reports a use-after-free
    3041             :                  * bug.  Wrapping this bit of code so that scan-build doesn't see it
    3042             :                  * works around the scan-build bug.
    3043             :                  */
    3044         744 :                 assert(ctx->in_submit_ctx);
    3045         744 :                 ctx->in_submit_ctx = false;
    3046             : 
    3047             :                 /* If the operation completed immediately, loop back and submit the
    3048             :                  * next operation.  Otherwise we can return and the next split
    3049             :                  * operation will get submitted when this current operation is
    3050             :                  * later completed asynchronously.
    3051             :                  */
    3052         744 :                 if (ctx->completed_in_submit_ctx) {
    3053          85 :                         continue;
    3054         659 :                 } else if (ctx->done) {
    3055          50 :                         free(ctx);
    3056          50 :                 }
    3057             : #endif
    3058         659 :                 break;
    3059             :         }
    3060         966 : }
    3061             : 
    3062             : static void
    3063         222 : blob_request_submit_op_split(struct spdk_io_channel *ch, struct spdk_blob *blob,
    3064             :                              void *payload, uint64_t offset, uint64_t length,
    3065             :                              spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
    3066             : {
    3067             :         struct op_split_ctx *ctx;
    3068             :         spdk_bs_sequence_t *seq;
    3069             :         struct spdk_bs_cpl cpl;
    3070             : 
    3071         222 :         assert(blob != NULL);
    3072             : 
    3073         222 :         ctx = calloc(1, sizeof(struct op_split_ctx));
    3074         222 :         if (ctx == NULL) {
    3075           0 :                 cb_fn(cb_arg, -ENOMEM);
    3076           0 :                 return;
    3077             :         }
    3078             : 
    3079         222 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    3080         222 :         cpl.u.blob_basic.cb_fn = cb_fn;
    3081         222 :         cpl.u.blob_basic.cb_arg = cb_arg;
    3082             : 
    3083         222 :         seq = bs_sequence_start_blob(ch, &cpl, blob);
    3084         222 :         if (!seq) {
    3085           0 :                 free(ctx);
    3086           0 :                 cb_fn(cb_arg, -ENOMEM);
    3087           0 :                 return;
    3088             :         }
    3089             : 
    3090         222 :         ctx->blob = blob;
    3091         222 :         ctx->channel = ch;
    3092         222 :         ctx->curr_payload = payload;
    3093         222 :         ctx->io_unit_offset = offset;
    3094         222 :         ctx->io_units_remaining = length;
    3095         222 :         ctx->op_type = op_type;
    3096         222 :         ctx->seq = seq;
    3097             : 
    3098         222 :         blob_request_submit_op_split_next(ctx, 0);
    3099         222 : }
    3100             : 
    3101             : static void
    3102          75 : spdk_free_cluster_unmap_complete(void *cb_arg, int bserrno)
    3103             : {
    3104          75 :         struct spdk_blob_free_cluster_ctx *ctx = cb_arg;
    3105             : 
    3106          75 :         if (bserrno) {
    3107           0 :                 bs_sequence_finish(ctx->seq, bserrno);
    3108           0 :                 free(ctx);
    3109           0 :                 return;
    3110             :         }
    3111             : 
    3112         150 :         blob_free_cluster_on_md_thread(ctx->blob, ctx->cluster_num,
    3113          75 :                                        ctx->extent_page, ctx->md_page, blob_free_cluster_cpl, ctx);
    3114          75 : }
    3115             : 
    3116             : static void
    3117       52830 : blob_request_submit_op_single(struct spdk_io_channel *_ch, struct spdk_blob *blob,
    3118             :                               void *payload, uint64_t offset, uint64_t length,
    3119             :                               spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
    3120             : {
    3121             :         struct spdk_bs_cpl cpl;
    3122             :         uint64_t lba;
    3123             :         uint64_t lba_count;
    3124             :         bool is_allocated;
    3125             : 
    3126       52830 :         assert(blob != NULL);
    3127             : 
    3128       52830 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    3129       52830 :         cpl.u.blob_basic.cb_fn = cb_fn;
    3130       52830 :         cpl.u.blob_basic.cb_arg = cb_arg;
    3131             : 
    3132       52830 :         if (blob->frozen_refcnt) {
    3133             :                 /* This blob I/O is frozen */
    3134             :                 spdk_bs_user_op_t *op;
    3135           5 :                 struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_ch);
    3136             : 
    3137           5 :                 op = bs_user_op_alloc(_ch, &cpl, op_type, blob, payload, 0, offset, length);
    3138           5 :                 if (!op) {
    3139           0 :                         cb_fn(cb_arg, -ENOMEM);
    3140           0 :                         return;
    3141             :                 }
    3142             : 
    3143           5 :                 TAILQ_INSERT_TAIL(&bs_channel->queued_io, op, link);
    3144             : 
    3145           5 :                 return;
    3146             :         }
    3147             : 
    3148       52825 :         is_allocated = blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count);
    3149             : 
    3150       52825 :         switch (op_type) {
    3151             :         case SPDK_BLOB_READ: {
    3152             :                 spdk_bs_batch_t *batch;
    3153             : 
    3154       25377 :                 batch = bs_batch_open(_ch, &cpl, blob);
    3155       25377 :                 if (!batch) {
    3156           0 :                         cb_fn(cb_arg, -ENOMEM);
    3157           0 :                         return;
    3158             :                 }
    3159             : 
    3160       25377 :                 if (is_allocated) {
    3161             :                         /* Read from the blob */
    3162       23531 :                         bs_batch_read_dev(batch, payload, lba, lba_count);
    3163       23531 :                 } else {
    3164             :                         /* Read from the backing block device */
    3165        1846 :                         bs_batch_read_bs_dev(batch, blob->back_bs_dev, payload, lba, lba_count);
    3166             :                 }
    3167             : 
    3168       25377 :                 bs_batch_close(batch);
    3169       25377 :                 break;
    3170             :         }
    3171             :         case SPDK_BLOB_WRITE:
    3172             :         case SPDK_BLOB_WRITE_ZEROES: {
    3173       27328 :                 if (is_allocated) {
    3174             :                         /* Write to the blob */
    3175             :                         spdk_bs_batch_t *batch;
    3176             : 
    3177       26888 :                         if (lba_count == 0) {
    3178           0 :                                 cb_fn(cb_arg, 0);
    3179           0 :                                 return;
    3180             :                         }
    3181             : 
    3182       26888 :                         batch = bs_batch_open(_ch, &cpl, blob);
    3183       26888 :                         if (!batch) {
    3184           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3185           0 :                                 return;
    3186             :                         }
    3187             : 
    3188       26888 :                         if (op_type == SPDK_BLOB_WRITE) {
    3189       26848 :                                 bs_batch_write_dev(batch, payload, lba, lba_count);
    3190       26848 :                         } else {
    3191          40 :                                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    3192             :                         }
    3193             : 
    3194       26888 :                         bs_batch_close(batch);
    3195       26888 :                 } else {
    3196             :                         /* Queue this operation and allocate the cluster */
    3197             :                         spdk_bs_user_op_t *op;
    3198             : 
    3199         440 :                         op = bs_user_op_alloc(_ch, &cpl, op_type, blob, payload, 0, offset, length);
    3200         440 :                         if (!op) {
    3201           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3202           0 :                                 return;
    3203             :                         }
    3204             : 
    3205         440 :                         bs_allocate_and_copy_cluster(blob, _ch, offset, op);
    3206             :                 }
    3207       27328 :                 break;
    3208             :         }
    3209             :         case SPDK_BLOB_UNMAP: {
    3210         120 :                 struct spdk_blob_free_cluster_ctx *ctx = NULL;
    3211             :                 spdk_bs_batch_t *batch;
    3212             : 
    3213             :                 /* if aligned with cluster release cluster */
    3214         205 :                 if (spdk_blob_is_thin_provisioned(blob) && is_allocated &&
    3215          90 :                     blob_backed_with_zeroes_dev(blob) &&
    3216          85 :                     bs_io_units_per_cluster(blob) == length) {
    3217          75 :                         struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_ch);
    3218             :                         uint64_t cluster_start_page;
    3219             :                         uint32_t cluster_number;
    3220             : 
    3221          75 :                         assert(offset % bs_io_units_per_cluster(blob) == 0);
    3222             : 
    3223             :                         /* Round the io_unit offset down to the first page in the cluster */
    3224          75 :                         cluster_start_page = bs_io_unit_to_cluster_start(blob, offset);
    3225             : 
    3226             :                         /* Calculate which index in the metadata cluster array the corresponding
    3227             :                          * cluster is supposed to be at. */
    3228          75 :                         cluster_number = bs_io_unit_to_cluster_number(blob, offset);
    3229             : 
    3230          75 :                         ctx = calloc(1, sizeof(*ctx));
    3231          75 :                         if (!ctx) {
    3232           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3233           0 :                                 return;
    3234             :                         }
    3235             :                         /* When freeing a cluster the flow should be (in order):
    3236             :                          * 1. Unmap the underlying area (so if the cluster is reclaimed in the future, it won't leak
    3237             :                          * old data)
    3238             :                          * 2. Once the unmap completes (to avoid any races with incoming writes that may claim the
    3239             :                          * cluster), update and sync metadata freeing the cluster
    3240             :                          * 3. Once metadata update is done, complete the user unmap request
    3241             :                          */
    3242          75 :                         ctx->blob = blob;
    3243          75 :                         ctx->page = cluster_start_page;
    3244          75 :                         ctx->cluster_num = cluster_number;
    3245          75 :                         ctx->md_page = bs_channel->new_cluster_page;
    3246          75 :                         ctx->seq = bs_sequence_start_bs(_ch, &cpl);
    3247          75 :                         if (!ctx->seq) {
    3248           0 :                                 free(ctx);
    3249           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3250           0 :                                 return;
    3251             :                         }
    3252             : 
    3253          75 :                         if (blob->use_extent_table) {
    3254          45 :                                 ctx->extent_page = *bs_cluster_to_extent_page(blob, cluster_number);
    3255          45 :                         }
    3256             : 
    3257          75 :                         cpl.u.blob_basic.cb_fn = spdk_free_cluster_unmap_complete;
    3258          75 :                         cpl.u.blob_basic.cb_arg = ctx;
    3259          75 :                 }
    3260             : 
    3261         120 :                 batch = bs_batch_open(_ch, &cpl, blob);
    3262         120 :                 if (!batch) {
    3263           0 :                         free(ctx);
    3264           0 :                         cb_fn(cb_arg, -ENOMEM);
    3265           0 :                         return;
    3266             :                 }
    3267             : 
    3268         120 :                 if (is_allocated) {
    3269         120 :                         bs_batch_unmap_dev(batch, lba, lba_count);
    3270         120 :                 }
    3271             : 
    3272         120 :                 bs_batch_close(batch);
    3273         120 :                 break;
    3274             :         }
    3275             :         case SPDK_BLOB_READV:
    3276             :         case SPDK_BLOB_WRITEV:
    3277           0 :                 SPDK_ERRLOG("readv/write not valid\n");
    3278           0 :                 cb_fn(cb_arg, -EINVAL);
    3279           0 :                 break;
    3280             :         }
    3281       52830 : }
    3282             : 
    3283             : static void
    3284       53692 : blob_request_submit_op(struct spdk_blob *blob, struct spdk_io_channel *_channel,
    3285             :                        void *payload, uint64_t offset, uint64_t length,
    3286             :                        spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
    3287             : {
    3288       53692 :         assert(blob != NULL);
    3289             : 
    3290       53692 :         if (blob->data_ro && op_type != SPDK_BLOB_READ) {
    3291           5 :                 cb_fn(cb_arg, -EPERM);
    3292           5 :                 return;
    3293             :         }
    3294             : 
    3295       53687 :         if (length == 0) {
    3296         615 :                 cb_fn(cb_arg, 0);
    3297         615 :                 return;
    3298             :         }
    3299             : 
    3300       53072 :         if (offset + length > bs_cluster_to_lba(blob->bs, blob->active.num_clusters)) {
    3301          30 :                 cb_fn(cb_arg, -EINVAL);
    3302          30 :                 return;
    3303             :         }
    3304       53042 :         if (length <= bs_num_io_units_to_cluster_boundary(blob, offset)) {
    3305      105640 :                 blob_request_submit_op_single(_channel, blob, payload, offset, length,
    3306       52820 :                                               cb_fn, cb_arg, op_type);
    3307       52820 :         } else {
    3308         444 :                 blob_request_submit_op_split(_channel, blob, payload, offset, length,
    3309         222 :                                              cb_fn, cb_arg, op_type);
    3310             :         }
    3311       53692 : }
    3312             : 
    3313             : struct rw_iov_ctx {
    3314             :         struct spdk_blob *blob;
    3315             :         struct spdk_io_channel *channel;
    3316             :         spdk_blob_op_complete cb_fn;
    3317             :         void *cb_arg;
    3318             :         bool read;
    3319             :         int iovcnt;
    3320             :         struct iovec *orig_iov;
    3321             :         uint64_t io_unit_offset;
    3322             :         uint64_t io_units_remaining;
    3323             :         uint64_t io_units_done;
    3324             :         struct spdk_blob_ext_io_opts *ext_io_opts;
    3325             :         struct iovec iov[0];
    3326             : };
    3327             : 
    3328             : static void
    3329        3910 : rw_iov_done(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    3330             : {
    3331        3910 :         assert(cb_arg == NULL);
    3332        3910 :         bs_sequence_finish(seq, bserrno);
    3333        3910 : }
    3334             : 
    3335             : static void
    3336         930 : rw_iov_split_next(void *cb_arg, int bserrno)
    3337             : {
    3338         930 :         struct rw_iov_ctx *ctx = cb_arg;
    3339         930 :         struct spdk_blob *blob = ctx->blob;
    3340             :         struct iovec *iov, *orig_iov;
    3341             :         int iovcnt;
    3342             :         size_t orig_iovoff;
    3343             :         uint64_t io_units_count, io_units_to_boundary, io_unit_offset;
    3344             :         uint64_t byte_count;
    3345             : 
    3346         930 :         if (bserrno != 0 || ctx->io_units_remaining == 0) {
    3347         255 :                 ctx->cb_fn(ctx->cb_arg, bserrno);
    3348         255 :                 free(ctx);
    3349         255 :                 return;
    3350             :         }
    3351             : 
    3352         675 :         io_unit_offset = ctx->io_unit_offset;
    3353         675 :         io_units_to_boundary = bs_num_io_units_to_cluster_boundary(blob, io_unit_offset);
    3354         675 :         io_units_count = spdk_min(ctx->io_units_remaining, io_units_to_boundary);
    3355             :         /*
    3356             :          * Get index and offset into the original iov array for our current position in the I/O sequence.
    3357             :          *  byte_count will keep track of how many bytes remaining until orig_iov and orig_iovoff will
    3358             :          *  point to the current position in the I/O sequence.
    3359             :          */
    3360         675 :         byte_count = ctx->io_units_done * blob->bs->io_unit_size;
    3361         675 :         orig_iov = &ctx->orig_iov[0];
    3362         675 :         orig_iovoff = 0;
    3363        1435 :         while (byte_count > 0) {
    3364         760 :                 if (byte_count >= orig_iov->iov_len) {
    3365         440 :                         byte_count -= orig_iov->iov_len;
    3366         440 :                         orig_iov++;
    3367         440 :                 } else {
    3368         320 :                         orig_iovoff = byte_count;
    3369         320 :                         byte_count = 0;
    3370             :                 }
    3371             :         }
    3372             : 
    3373             :         /*
    3374             :          * Build an iov array for the next I/O in the sequence.  byte_count will keep track of how many
    3375             :          *  bytes of this next I/O remain to be accounted for in the new iov array.
    3376             :          */
    3377         675 :         byte_count = io_units_count * blob->bs->io_unit_size;
    3378         675 :         iov = &ctx->iov[0];
    3379         675 :         iovcnt = 0;
    3380        1725 :         while (byte_count > 0) {
    3381        1050 :                 assert(iovcnt < ctx->iovcnt);
    3382        1050 :                 iov->iov_len = spdk_min(byte_count, orig_iov->iov_len - orig_iovoff);
    3383        1050 :                 iov->iov_base = orig_iov->iov_base + orig_iovoff;
    3384        1050 :                 byte_count -= iov->iov_len;
    3385        1050 :                 orig_iovoff = 0;
    3386        1050 :                 orig_iov++;
    3387        1050 :                 iov++;
    3388        1050 :                 iovcnt++;
    3389             :         }
    3390             : 
    3391         675 :         ctx->io_unit_offset += io_units_count;
    3392         675 :         ctx->io_units_remaining -= io_units_count;
    3393         675 :         ctx->io_units_done += io_units_count;
    3394         675 :         iov = &ctx->iov[0];
    3395             : 
    3396         675 :         if (ctx->read) {
    3397        1020 :                 spdk_blob_io_readv_ext(ctx->blob, ctx->channel, iov, iovcnt, io_unit_offset,
    3398         510 :                                        io_units_count, rw_iov_split_next, ctx, ctx->ext_io_opts);
    3399         510 :         } else {
    3400         330 :                 spdk_blob_io_writev_ext(ctx->blob, ctx->channel, iov, iovcnt, io_unit_offset,
    3401         165 :                                         io_units_count, rw_iov_split_next, ctx, ctx->ext_io_opts);
    3402             :         }
    3403         930 : }
    3404             : 
    3405             : static void
    3406        4195 : blob_request_submit_rw_iov(struct spdk_blob *blob, struct spdk_io_channel *_channel,
    3407             :                            struct iovec *iov, int iovcnt,
    3408             :                            uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg, bool read,
    3409             :                            struct spdk_blob_ext_io_opts *ext_io_opts)
    3410             : {
    3411             :         struct spdk_bs_cpl      cpl;
    3412             : 
    3413        4195 :         assert(blob != NULL);
    3414             : 
    3415        4195 :         if (!read && blob->data_ro) {
    3416           5 :                 cb_fn(cb_arg, -EPERM);
    3417           5 :                 return;
    3418             :         }
    3419             : 
    3420        4190 :         if (length == 0) {
    3421           0 :                 cb_fn(cb_arg, 0);
    3422           0 :                 return;
    3423             :         }
    3424             : 
    3425        4190 :         if (offset + length > bs_cluster_to_lba(blob->bs, blob->active.num_clusters)) {
    3426           0 :                 cb_fn(cb_arg, -EINVAL);
    3427           0 :                 return;
    3428             :         }
    3429             : 
    3430             :         /*
    3431             :          * For now, we implement readv/writev using a sequence (instead of a batch) to account for having
    3432             :          *  to split a request that spans a cluster boundary.  For I/O that do not span a cluster boundary,
    3433             :          *  there will be no noticeable difference compared to using a batch.  For I/O that do span a cluster
    3434             :          *  boundary, the target LBAs (after blob offset to LBA translation) may not be contiguous, so we need
    3435             :          *  to allocate a separate iov array and split the I/O such that none of the resulting
    3436             :          *  smaller I/O cross a cluster boundary.  These smaller I/O will be issued in sequence (not in parallel)
    3437             :          *  but since this case happens very infrequently, any performance impact will be negligible.
    3438             :          *
    3439             :          * This could be optimized in the future to allocate a big enough iov array to account for all of the iovs
    3440             :          *  for all of the smaller I/Os, pre-build all of the iov arrays for the smaller I/Os, then issue them
    3441             :          *  in a batch.  That would also require creating an intermediate spdk_bs_cpl that would get called
    3442             :          *  when the batch was completed, to allow for freeing the memory for the iov arrays.
    3443             :          */
    3444        4190 :         if (spdk_likely(length <= bs_num_io_units_to_cluster_boundary(blob, offset))) {
    3445             :                 uint64_t lba_count;
    3446             :                 uint64_t lba;
    3447             :                 bool is_allocated;
    3448             : 
    3449        3930 :                 cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    3450        3930 :                 cpl.u.blob_basic.cb_fn = cb_fn;
    3451        3930 :                 cpl.u.blob_basic.cb_arg = cb_arg;
    3452             : 
    3453        3930 :                 if (blob->frozen_refcnt) {
    3454             :                         /* This blob I/O is frozen */
    3455             :                         enum spdk_blob_op_type op_type;
    3456             :                         spdk_bs_user_op_t *op;
    3457           0 :                         struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_channel);
    3458             : 
    3459           0 :                         op_type = read ? SPDK_BLOB_READV : SPDK_BLOB_WRITEV;
    3460           0 :                         op = bs_user_op_alloc(_channel, &cpl, op_type, blob, iov, iovcnt, offset, length);
    3461           0 :                         if (!op) {
    3462           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3463           0 :                                 return;
    3464             :                         }
    3465             : 
    3466           0 :                         TAILQ_INSERT_TAIL(&bs_channel->queued_io, op, link);
    3467             : 
    3468           0 :                         return;
    3469             :                 }
    3470             : 
    3471        3930 :                 is_allocated = blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count);
    3472             : 
    3473        3930 :                 if (read) {
    3474             :                         spdk_bs_sequence_t *seq;
    3475             : 
    3476        3565 :                         seq = bs_sequence_start_blob(_channel, &cpl, blob);
    3477        3565 :                         if (!seq) {
    3478           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3479           0 :                                 return;
    3480             :                         }
    3481             : 
    3482        3565 :                         seq->ext_io_opts = ext_io_opts;
    3483             : 
    3484        3565 :                         if (is_allocated) {
    3485         675 :                                 bs_sequence_readv_dev(seq, iov, iovcnt, lba, lba_count, rw_iov_done, NULL);
    3486         675 :                         } else {
    3487        2890 :                                 bs_sequence_readv_bs_dev(seq, blob->back_bs_dev, iov, iovcnt, lba, lba_count,
    3488             :                                                          rw_iov_done, NULL);
    3489             :                         }
    3490        3565 :                 } else {
    3491         365 :                         if (is_allocated) {
    3492             :                                 spdk_bs_sequence_t *seq;
    3493             : 
    3494         345 :                                 seq = bs_sequence_start_blob(_channel, &cpl, blob);
    3495         345 :                                 if (!seq) {
    3496           0 :                                         cb_fn(cb_arg, -ENOMEM);
    3497           0 :                                         return;
    3498             :                                 }
    3499             : 
    3500         345 :                                 seq->ext_io_opts = ext_io_opts;
    3501             : 
    3502         345 :                                 bs_sequence_writev_dev(seq, iov, iovcnt, lba, lba_count, rw_iov_done, NULL);
    3503         345 :                         } else {
    3504             :                                 /* Queue this operation and allocate the cluster */
    3505             :                                 spdk_bs_user_op_t *op;
    3506             : 
    3507          40 :                                 op = bs_user_op_alloc(_channel, &cpl, SPDK_BLOB_WRITEV, blob, iov, iovcnt, offset,
    3508          20 :                                                       length);
    3509          20 :                                 if (!op) {
    3510           0 :                                         cb_fn(cb_arg, -ENOMEM);
    3511           0 :                                         return;
    3512             :                                 }
    3513             : 
    3514          20 :                                 op->ext_io_opts = ext_io_opts;
    3515             : 
    3516          20 :                                 bs_allocate_and_copy_cluster(blob, _channel, offset, op);
    3517             :                         }
    3518             :                 }
    3519        3930 :         } else {
    3520             :                 struct rw_iov_ctx *ctx;
    3521             : 
    3522         260 :                 ctx = calloc(1, sizeof(struct rw_iov_ctx) + iovcnt * sizeof(struct iovec));
    3523         260 :                 if (ctx == NULL) {
    3524           5 :                         cb_fn(cb_arg, -ENOMEM);
    3525           5 :                         return;
    3526             :                 }
    3527             : 
    3528         255 :                 ctx->blob = blob;
    3529         255 :                 ctx->channel = _channel;
    3530         255 :                 ctx->cb_fn = cb_fn;
    3531         255 :                 ctx->cb_arg = cb_arg;
    3532         255 :                 ctx->read = read;
    3533         255 :                 ctx->orig_iov = iov;
    3534         255 :                 ctx->iovcnt = iovcnt;
    3535         255 :                 ctx->io_unit_offset = offset;
    3536         255 :                 ctx->io_units_remaining = length;
    3537         255 :                 ctx->io_units_done = 0;
    3538         255 :                 ctx->ext_io_opts = ext_io_opts;
    3539             : 
    3540         255 :                 rw_iov_split_next(ctx, 0);
    3541             :         }
    3542        4195 : }
    3543             : 
    3544             : static struct spdk_blob *
    3545        9668 : blob_lookup(struct spdk_blob_store *bs, spdk_blob_id blobid)
    3546             : {
    3547             :         struct spdk_blob find;
    3548             : 
    3549        9668 :         if (spdk_bit_array_get(bs->open_blobids, blobid) == 0) {
    3550        8681 :                 return NULL;
    3551             :         }
    3552             : 
    3553         987 :         find.id = blobid;
    3554         987 :         return RB_FIND(spdk_blob_tree, &bs->open_blobs, &find);
    3555        9668 : }
    3556             : 
    3557             : static void
    3558        2256 : blob_get_snapshot_and_clone_entries(struct spdk_blob *blob,
    3559             :                                     struct spdk_blob_list **snapshot_entry, struct spdk_blob_list **clone_entry)
    3560             : {
    3561        2256 :         assert(blob != NULL);
    3562        2256 :         *snapshot_entry = NULL;
    3563        2256 :         *clone_entry = NULL;
    3564             : 
    3565        2256 :         if (blob->parent_id == SPDK_BLOBID_INVALID) {
    3566        1901 :                 return;
    3567             :         }
    3568             : 
    3569         535 :         TAILQ_FOREACH(*snapshot_entry, &blob->bs->snapshots, link) {
    3570         470 :                 if ((*snapshot_entry)->id == blob->parent_id) {
    3571         290 :                         break;
    3572             :                 }
    3573         180 :         }
    3574             : 
    3575         355 :         if (*snapshot_entry != NULL) {
    3576         345 :                 TAILQ_FOREACH(*clone_entry, &(*snapshot_entry)->clones, link) {
    3577         345 :                         if ((*clone_entry)->id == blob->id) {
    3578         290 :                                 break;
    3579             :                         }
    3580          55 :                 }
    3581             : 
    3582         290 :                 assert(*clone_entry != NULL);
    3583         290 :         }
    3584        2256 : }
    3585             : 
    3586             : static int
    3587        1008 : bs_channel_create(void *io_device, void *ctx_buf)
    3588             : {
    3589        1008 :         struct spdk_blob_store          *bs = io_device;
    3590        1008 :         struct spdk_bs_channel          *channel = ctx_buf;
    3591             :         struct spdk_bs_dev              *dev;
    3592        1008 :         uint32_t                        max_ops = bs->max_channel_ops;
    3593             :         uint32_t                        i;
    3594             : 
    3595        1008 :         dev = bs->dev;
    3596             : 
    3597        1008 :         channel->req_mem = calloc(max_ops, sizeof(struct spdk_bs_request_set));
    3598        1008 :         if (!channel->req_mem) {
    3599           0 :                 return -1;
    3600             :         }
    3601             : 
    3602        1008 :         TAILQ_INIT(&channel->reqs);
    3603             : 
    3604      517104 :         for (i = 0; i < max_ops; i++) {
    3605      516096 :                 TAILQ_INSERT_TAIL(&channel->reqs, &channel->req_mem[i], link);
    3606      516096 :         }
    3607             : 
    3608        1008 :         channel->bs = bs;
    3609        1008 :         channel->dev = dev;
    3610        1008 :         channel->dev_channel = dev->create_channel(dev);
    3611             : 
    3612        1008 :         if (!channel->dev_channel) {
    3613           0 :                 SPDK_ERRLOG("Failed to create device channel.\n");
    3614           0 :                 free(channel->req_mem);
    3615           0 :                 return -1;
    3616             :         }
    3617             : 
    3618        1008 :         channel->new_cluster_page = spdk_zmalloc(bs->md_page_size, 0, NULL, SPDK_ENV_NUMA_ID_ANY,
    3619             :                                     SPDK_MALLOC_DMA);
    3620        1008 :         if (!channel->new_cluster_page) {
    3621           0 :                 SPDK_ERRLOG("Failed to allocate new cluster page\n");
    3622           0 :                 free(channel->req_mem);
    3623           0 :                 channel->dev->destroy_channel(channel->dev, channel->dev_channel);
    3624           0 :                 return -1;
    3625             :         }
    3626             : 
    3627        1008 :         TAILQ_INIT(&channel->need_cluster_alloc);
    3628        1008 :         TAILQ_INIT(&channel->queued_io);
    3629        1008 :         RB_INIT(&channel->esnap_channels);
    3630             : 
    3631        1008 :         return 0;
    3632        1008 : }
    3633             : 
    3634             : static void
    3635        1008 : bs_channel_destroy(void *io_device, void *ctx_buf)
    3636             : {
    3637        1008 :         struct spdk_bs_channel *channel = ctx_buf;
    3638             :         spdk_bs_user_op_t *op;
    3639             : 
    3640        1008 :         while (!TAILQ_EMPTY(&channel->need_cluster_alloc)) {
    3641           0 :                 op = TAILQ_FIRST(&channel->need_cluster_alloc);
    3642           0 :                 TAILQ_REMOVE(&channel->need_cluster_alloc, op, link);
    3643           0 :                 bs_user_op_abort(op, -EIO);
    3644             :         }
    3645             : 
    3646        1008 :         while (!TAILQ_EMPTY(&channel->queued_io)) {
    3647           0 :                 op = TAILQ_FIRST(&channel->queued_io);
    3648           0 :                 TAILQ_REMOVE(&channel->queued_io, op, link);
    3649           0 :                 bs_user_op_abort(op, -EIO);
    3650             :         }
    3651             : 
    3652        1008 :         blob_esnap_destroy_bs_channel(channel);
    3653             : 
    3654        1008 :         free(channel->req_mem);
    3655        1008 :         spdk_free(channel->new_cluster_page);
    3656        1008 :         channel->dev->destroy_channel(channel->dev, channel->dev_channel);
    3657        1008 : }
    3658             : 
    3659             : static void
    3660         988 : bs_dev_destroy(void *io_device)
    3661             : {
    3662         988 :         struct spdk_blob_store *bs = io_device;
    3663             :         struct spdk_blob        *blob, *blob_tmp;
    3664             : 
    3665         988 :         bs->dev->destroy(bs->dev);
    3666             : 
    3667         988 :         RB_FOREACH_SAFE(blob, spdk_blob_tree, &bs->open_blobs, blob_tmp) {
    3668           0 :                 RB_REMOVE(spdk_blob_tree, &bs->open_blobs, blob);
    3669           0 :                 spdk_bit_array_clear(bs->open_blobids, blob->id);
    3670           0 :                 blob_free(blob);
    3671           0 :         }
    3672             : 
    3673         988 :         spdk_spin_destroy(&bs->used_lock);
    3674             : 
    3675         988 :         spdk_bit_array_free(&bs->open_blobids);
    3676         988 :         spdk_bit_array_free(&bs->used_blobids);
    3677         988 :         spdk_bit_array_free(&bs->used_md_pages);
    3678         988 :         spdk_bit_pool_free(&bs->used_clusters);
    3679             :         /*
    3680             :          * If this function is called for any reason except a successful unload,
    3681             :          * the unload_cpl type will be NONE and this will be a nop.
    3682             :          */
    3683         988 :         bs_call_cpl(&bs->unload_cpl, bs->unload_err);
    3684             : 
    3685         988 :         free(bs);
    3686         988 : }
    3687             : 
    3688             : static int
    3689        1139 : bs_blob_list_add(struct spdk_blob *blob)
    3690             : {
    3691             :         spdk_blob_id snapshot_id;
    3692        1139 :         struct spdk_blob_list *snapshot_entry = NULL;
    3693        1139 :         struct spdk_blob_list *clone_entry = NULL;
    3694             : 
    3695        1139 :         assert(blob != NULL);
    3696             : 
    3697        1139 :         snapshot_id = blob->parent_id;
    3698        1139 :         if (snapshot_id == SPDK_BLOBID_INVALID ||
    3699         567 :             snapshot_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    3700         617 :                 return 0;
    3701             :         }
    3702             : 
    3703         522 :         snapshot_entry = bs_get_snapshot_entry(blob->bs, snapshot_id);
    3704         522 :         if (snapshot_entry == NULL) {
    3705             :                 /* Snapshot not found */
    3706         362 :                 snapshot_entry = calloc(1, sizeof(struct spdk_blob_list));
    3707         362 :                 if (snapshot_entry == NULL) {
    3708           0 :                         return -ENOMEM;
    3709             :                 }
    3710         362 :                 snapshot_entry->id = snapshot_id;
    3711         362 :                 TAILQ_INIT(&snapshot_entry->clones);
    3712         362 :                 TAILQ_INSERT_TAIL(&blob->bs->snapshots, snapshot_entry, link);
    3713         362 :         } else {
    3714         255 :                 TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
    3715          95 :                         if (clone_entry->id == blob->id) {
    3716           0 :                                 break;
    3717             :                         }
    3718          95 :                 }
    3719             :         }
    3720             : 
    3721         522 :         if (clone_entry == NULL) {
    3722             :                 /* Clone not found */
    3723         522 :                 clone_entry = calloc(1, sizeof(struct spdk_blob_list));
    3724         522 :                 if (clone_entry == NULL) {
    3725           0 :                         return -ENOMEM;
    3726             :                 }
    3727         522 :                 clone_entry->id = blob->id;
    3728         522 :                 TAILQ_INIT(&clone_entry->clones);
    3729         522 :                 TAILQ_INSERT_TAIL(&snapshot_entry->clones, clone_entry, link);
    3730         522 :                 snapshot_entry->clone_count++;
    3731         522 :         }
    3732             : 
    3733         522 :         return 0;
    3734        1139 : }
    3735             : 
    3736             : static void
    3737        2158 : bs_blob_list_remove(struct spdk_blob *blob)
    3738             : {
    3739        2158 :         struct spdk_blob_list *snapshot_entry = NULL;
    3740        2158 :         struct spdk_blob_list *clone_entry = NULL;
    3741             : 
    3742        2158 :         blob_get_snapshot_and_clone_entries(blob, &snapshot_entry, &clone_entry);
    3743             : 
    3744        2158 :         if (snapshot_entry == NULL) {
    3745        1888 :                 return;
    3746             :         }
    3747             : 
    3748         270 :         blob->parent_id = SPDK_BLOBID_INVALID;
    3749         270 :         TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
    3750         270 :         free(clone_entry);
    3751             : 
    3752         270 :         snapshot_entry->clone_count--;
    3753        2158 : }
    3754             : 
    3755             : static int
    3756         988 : bs_blob_list_free(struct spdk_blob_store *bs)
    3757             : {
    3758             :         struct spdk_blob_list *snapshot_entry;
    3759             :         struct spdk_blob_list *snapshot_entry_tmp;
    3760             :         struct spdk_blob_list *clone_entry;
    3761             :         struct spdk_blob_list *clone_entry_tmp;
    3762             : 
    3763        1170 :         TAILQ_FOREACH_SAFE(snapshot_entry, &bs->snapshots, link, snapshot_entry_tmp) {
    3764         374 :                 TAILQ_FOREACH_SAFE(clone_entry, &snapshot_entry->clones, link, clone_entry_tmp) {
    3765         192 :                         TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
    3766         192 :                         free(clone_entry);
    3767         192 :                 }
    3768         182 :                 TAILQ_REMOVE(&bs->snapshots, snapshot_entry, link);
    3769         182 :                 free(snapshot_entry);
    3770         182 :         }
    3771             : 
    3772         988 :         return 0;
    3773             : }
    3774             : 
    3775             : static void
    3776         988 : bs_free(struct spdk_blob_store *bs)
    3777             : {
    3778         988 :         bs_blob_list_free(bs);
    3779             : 
    3780         988 :         bs_unregister_md_thread(bs);
    3781         988 :         spdk_io_device_unregister(bs, bs_dev_destroy);
    3782         988 : }
    3783             : 
    3784             : void
    3785        1323 : spdk_bs_opts_init(struct spdk_bs_opts *opts, size_t opts_size)
    3786             : {
    3787             : 
    3788        1323 :         if (!opts) {
    3789           0 :                 SPDK_ERRLOG("opts should not be NULL\n");
    3790           0 :                 return;
    3791             :         }
    3792             : 
    3793        1323 :         if (!opts_size) {
    3794           0 :                 SPDK_ERRLOG("opts_size should not be zero value\n");
    3795           0 :                 return;
    3796             :         }
    3797             : 
    3798        1323 :         memset(opts, 0, opts_size);
    3799        1323 :         opts->opts_size = opts_size;
    3800             : 
    3801             : #define FIELD_OK(field) \
    3802             :         offsetof(struct spdk_bs_opts, field) + sizeof(opts->field) <= opts_size
    3803             : 
    3804             : #define SET_FIELD(field, value) \
    3805             :         if (FIELD_OK(field)) { \
    3806             :                 opts->field = value; \
    3807             :         } \
    3808             : 
    3809        1323 :         SET_FIELD(cluster_sz, SPDK_BLOB_OPTS_CLUSTER_SZ);
    3810        1323 :         SET_FIELD(num_md_pages, SPDK_BLOB_OPTS_NUM_MD_PAGES);
    3811        1323 :         SET_FIELD(max_md_ops, SPDK_BLOB_OPTS_NUM_MD_PAGES);
    3812        1323 :         SET_FIELD(max_channel_ops, SPDK_BLOB_OPTS_DEFAULT_CHANNEL_OPS);
    3813        1323 :         SET_FIELD(clear_method,  BS_CLEAR_WITH_UNMAP);
    3814             : 
    3815        1323 :         if (FIELD_OK(bstype)) {
    3816        1323 :                 memset(&opts->bstype, 0, sizeof(opts->bstype));
    3817        1323 :         }
    3818             : 
    3819        1323 :         SET_FIELD(iter_cb_fn, NULL);
    3820        1323 :         SET_FIELD(iter_cb_arg, NULL);
    3821        1323 :         SET_FIELD(force_recover, false);
    3822        1323 :         SET_FIELD(esnap_bs_dev_create, NULL);
    3823        1323 :         SET_FIELD(esnap_ctx, NULL);
    3824             : 
    3825             : #undef FIELD_OK
    3826             : #undef SET_FIELD
    3827        1323 : }
    3828             : 
    3829             : static int
    3830         607 : bs_opts_verify(struct spdk_bs_opts *opts)
    3831             : {
    3832         607 :         if (opts->cluster_sz == 0 || opts->num_md_pages == 0 || opts->max_md_ops == 0 ||
    3833         602 :             opts->max_channel_ops == 0) {
    3834           5 :                 SPDK_ERRLOG("Blobstore options cannot be set to 0\n");
    3835           5 :                 return -1;
    3836             :         }
    3837             : 
    3838         602 :         if ((opts->cluster_sz % SPDK_BS_PAGE_SIZE) != 0) {
    3839           5 :                 SPDK_ERRLOG("Cluster size %" PRIu32 " is not an integral multiple of blocklen %" PRIu32"\n",
    3840             :                             opts->cluster_sz, SPDK_BS_PAGE_SIZE);
    3841           5 :                 return -1;
    3842             :         }
    3843             : 
    3844         597 :         return 0;
    3845         607 : }
    3846             : 
    3847             : /* START spdk_bs_load */
    3848             : 
    3849             : /* spdk_bs_load_ctx is used for init, load, unload and dump code paths. */
    3850             : 
    3851             : struct spdk_bs_load_ctx {
    3852             :         struct spdk_blob_store          *bs;
    3853             :         struct spdk_bs_super_block      *super;
    3854             : 
    3855             :         struct spdk_bs_md_mask          *mask;
    3856             :         bool                            in_page_chain;
    3857             :         uint32_t                        page_index;
    3858             :         uint32_t                        cur_page;
    3859             :         struct spdk_blob_md_page        *page;
    3860             : 
    3861             :         uint64_t                        num_extent_pages;
    3862             :         uint32_t                        *extent_page_num;
    3863             :         struct spdk_blob_md_page        *extent_pages;
    3864             :         struct spdk_bit_array           *used_clusters;
    3865             : 
    3866             :         spdk_bs_sequence_t                      *seq;
    3867             :         spdk_blob_op_with_handle_complete       iter_cb_fn;
    3868             :         void                                    *iter_cb_arg;
    3869             :         struct spdk_blob                        *blob;
    3870             :         spdk_blob_id                            blobid;
    3871             : 
    3872             :         bool                                    force_recover;
    3873             : 
    3874             :         /* These fields are used in the spdk_bs_dump path. */
    3875             :         bool                                    dumping;
    3876             :         FILE                                    *fp;
    3877             :         spdk_bs_dump_print_xattr                print_xattr_fn;
    3878             :         char                                    xattr_name[4096];
    3879             : };
    3880             : 
    3881             : static void
    3882        1349 : bs_init_per_cluster_fields(struct spdk_blob_store *bs)
    3883             : {
    3884        1349 :         bs->pages_per_cluster = bs->cluster_sz / bs->md_page_size;
    3885        1349 :         if (spdk_u32_is_pow2(bs->pages_per_cluster)) {
    3886        1349 :                 bs->pages_per_cluster_shift = spdk_u32log2(bs->pages_per_cluster);
    3887        1349 :         }
    3888        1349 :         bs->io_units_per_cluster = bs->cluster_sz / bs->io_unit_size;
    3889        1349 :         if (spdk_u32_is_pow2(bs->io_units_per_cluster)) {
    3890        1349 :                 bs->io_units_per_cluster_shift = spdk_u32log2(bs->io_units_per_cluster);
    3891        1349 :         }
    3892        1349 : }
    3893             : 
    3894             : static int
    3895         988 : bs_alloc(struct spdk_bs_dev *dev, struct spdk_bs_opts *opts, struct spdk_blob_store **_bs,
    3896             :          struct spdk_bs_load_ctx **_ctx)
    3897             : {
    3898             :         struct spdk_blob_store  *bs;
    3899             :         struct spdk_bs_load_ctx *ctx;
    3900             :         uint64_t dev_size;
    3901             :         uint32_t md_page_size;
    3902             :         int rc;
    3903             : 
    3904         988 :         dev_size = dev->blocklen * dev->blockcnt;
    3905         988 :         if (dev_size < opts->cluster_sz) {
    3906             :                 /* Device size cannot be smaller than cluster size of blobstore */
    3907           0 :                 SPDK_INFOLOG(blob, "Device size %" PRIu64 " is smaller than cluster size %" PRIu32 "\n",
    3908             :                              dev_size, opts->cluster_sz);
    3909           0 :                 return -ENOSPC;
    3910             :         }
    3911             : 
    3912         988 :         md_page_size = spdk_max(spdk_max(dev->phys_blocklen, SPDK_BS_PAGE_SIZE),
    3913             :                                 opts->md_page_size);
    3914         988 :         if (opts->cluster_sz < md_page_size) {
    3915             :                 /* Cluster size cannot be smaller than page size */
    3916           0 :                 SPDK_ERRLOG("Cluster size %" PRIu32 " is smaller than page size %d\n",
    3917             :                             opts->cluster_sz, md_page_size);
    3918           0 :                 return -EINVAL;
    3919             :         }
    3920         988 :         bs = calloc(1, sizeof(struct spdk_blob_store));
    3921         988 :         if (!bs) {
    3922           0 :                 return -ENOMEM;
    3923             :         }
    3924             : 
    3925         988 :         ctx = calloc(1, sizeof(struct spdk_bs_load_ctx));
    3926         988 :         if (!ctx) {
    3927           0 :                 free(bs);
    3928           0 :                 return -ENOMEM;
    3929             :         }
    3930             : 
    3931         988 :         ctx->bs = bs;
    3932         988 :         ctx->iter_cb_fn = opts->iter_cb_fn;
    3933         988 :         ctx->iter_cb_arg = opts->iter_cb_arg;
    3934         988 :         ctx->force_recover = opts->force_recover;
    3935             : 
    3936         988 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    3937             :                                   SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    3938         988 :         if (!ctx->super) {
    3939           0 :                 free(ctx);
    3940           0 :                 free(bs);
    3941           0 :                 return -ENOMEM;
    3942             :         }
    3943             : 
    3944         988 :         RB_INIT(&bs->open_blobs);
    3945         988 :         TAILQ_INIT(&bs->snapshots);
    3946         988 :         bs->dev = dev;
    3947         988 :         bs->md_page_size = md_page_size;
    3948         988 :         bs->md_thread = spdk_get_thread();
    3949         988 :         assert(bs->md_thread != NULL);
    3950             : 
    3951             :         /*
    3952             :          * Do not use bs_lba_to_cluster() here since blockcnt may not be an
    3953             :          *  even multiple of the cluster size.
    3954             :          */
    3955         988 :         bs->cluster_sz = opts->cluster_sz;
    3956         988 :         bs->total_clusters = dev->blockcnt / (bs->cluster_sz / dev->blocklen);
    3957         988 :         ctx->used_clusters = spdk_bit_array_create(bs->total_clusters);
    3958         988 :         if (!ctx->used_clusters) {
    3959           0 :                 spdk_free(ctx->super);
    3960           0 :                 free(ctx);
    3961           0 :                 free(bs);
    3962           0 :                 return -ENOMEM;
    3963             :         }
    3964             : 
    3965         988 :         bs->num_free_clusters = bs->total_clusters;
    3966         988 :         bs->io_unit_size = dev->blocklen;
    3967         988 :         bs_init_per_cluster_fields(bs);
    3968             : 
    3969         988 :         bs->max_channel_ops = opts->max_channel_ops;
    3970         988 :         bs->super_blob = SPDK_BLOBID_INVALID;
    3971         988 :         memcpy(&bs->bstype, &opts->bstype, sizeof(opts->bstype));
    3972         988 :         bs->esnap_bs_dev_create = opts->esnap_bs_dev_create;
    3973         988 :         bs->esnap_ctx = opts->esnap_ctx;
    3974             : 
    3975             :         /* The metadata is assumed to be at least 1 page */
    3976         988 :         bs->used_md_pages = spdk_bit_array_create(1);
    3977         988 :         bs->used_blobids = spdk_bit_array_create(0);
    3978         988 :         bs->open_blobids = spdk_bit_array_create(0);
    3979             : 
    3980         988 :         spdk_spin_init(&bs->used_lock);
    3981             : 
    3982         988 :         spdk_io_device_register(bs, bs_channel_create, bs_channel_destroy,
    3983             :                                 sizeof(struct spdk_bs_channel), "blobstore");
    3984         988 :         rc = bs_register_md_thread(bs);
    3985         988 :         if (rc == -1) {
    3986           0 :                 spdk_io_device_unregister(bs, NULL);
    3987           0 :                 spdk_spin_destroy(&bs->used_lock);
    3988           0 :                 spdk_bit_array_free(&bs->open_blobids);
    3989           0 :                 spdk_bit_array_free(&bs->used_blobids);
    3990           0 :                 spdk_bit_array_free(&bs->used_md_pages);
    3991           0 :                 spdk_bit_array_free(&ctx->used_clusters);
    3992           0 :                 spdk_free(ctx->super);
    3993           0 :                 free(ctx);
    3994           0 :                 free(bs);
    3995             :                 /* FIXME: this is a lie but don't know how to get a proper error code here */
    3996           0 :                 return -ENOMEM;
    3997             :         }
    3998             : 
    3999         988 :         *_ctx = ctx;
    4000         988 :         *_bs = bs;
    4001         988 :         return 0;
    4002         988 : }
    4003             : 
    4004             : static void
    4005        1031 : bs_write_super(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
    4006             :                struct spdk_bs_super_block *super, spdk_bs_sequence_cpl cb_fn, void *cb_arg)
    4007             : {
    4008             :         /* Update the values in the super block */
    4009        1031 :         super->super_blob = bs->super_blob;
    4010        1031 :         memcpy(&super->bstype, &bs->bstype, sizeof(bs->bstype));
    4011        1031 :         super->crc = blob_md_page_calc_crc(super);
    4012        2062 :         bs_sequence_write_dev(seq, super, bs_page_to_lba(bs, 0),
    4013        1031 :                               bs_byte_to_lba(bs, sizeof(*super)),
    4014        1031 :                               cb_fn, cb_arg);
    4015        1031 : }
    4016             : 
    4017             : static void
    4018         953 : bs_write_used_clusters(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
    4019             : {
    4020         953 :         struct spdk_bs_load_ctx *ctx = arg;
    4021             :         uint64_t        mask_size, lba, lba_count;
    4022             : 
    4023             :         /* Write out the used clusters mask */
    4024         953 :         mask_size = ctx->super->used_cluster_mask_len * ctx->bs->md_page_size;
    4025         953 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL,
    4026             :                                  SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    4027         953 :         if (!ctx->mask) {
    4028           0 :                 cb_fn(seq, arg, -ENOMEM);
    4029           0 :                 return;
    4030             :         }
    4031             : 
    4032         953 :         ctx->mask->type = SPDK_MD_MASK_TYPE_USED_CLUSTERS;
    4033         953 :         ctx->mask->length = ctx->bs->total_clusters;
    4034             :         /* We could get here through the normal unload path, or through dirty
    4035             :          * shutdown recovery.  For the normal unload path, we use the mask from
    4036             :          * the bit pool.  For dirty shutdown recovery, we don't have a bit pool yet -
    4037             :          * only the bit array from the load ctx.
    4038             :          */
    4039         953 :         if (ctx->bs->used_clusters) {
    4040         819 :                 assert(ctx->mask->length == spdk_bit_pool_capacity(ctx->bs->used_clusters));
    4041         819 :                 spdk_bit_pool_store_mask(ctx->bs->used_clusters, ctx->mask->mask);
    4042         819 :         } else {
    4043         134 :                 assert(ctx->mask->length == spdk_bit_array_capacity(ctx->used_clusters));
    4044         134 :                 spdk_bit_array_store_mask(ctx->used_clusters, ctx->mask->mask);
    4045             :         }
    4046         953 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
    4047         953 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
    4048         953 :         bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
    4049         953 : }
    4050             : 
    4051             : static void
    4052         963 : bs_write_used_md(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
    4053             : {
    4054         963 :         struct spdk_bs_load_ctx *ctx = arg;
    4055             :         uint64_t        mask_size, lba, lba_count;
    4056             : 
    4057         963 :         mask_size = ctx->super->used_page_mask_len * ctx->bs->md_page_size;
    4058         963 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL,
    4059             :                                  SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    4060         963 :         if (!ctx->mask) {
    4061           5 :                 cb_fn(seq, arg, -ENOMEM);
    4062           5 :                 return;
    4063             :         }
    4064             : 
    4065         958 :         ctx->mask->type = SPDK_MD_MASK_TYPE_USED_PAGES;
    4066         958 :         ctx->mask->length = ctx->super->md_len;
    4067         958 :         assert(ctx->mask->length == spdk_bit_array_capacity(ctx->bs->used_md_pages));
    4068             : 
    4069         958 :         spdk_bit_array_store_mask(ctx->bs->used_md_pages, ctx->mask->mask);
    4070         958 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_start);
    4071         958 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_len);
    4072         958 :         bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
    4073         963 : }
    4074             : 
    4075             : static void
    4076         953 : bs_write_used_blobids(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
    4077             : {
    4078         953 :         struct spdk_bs_load_ctx *ctx = arg;
    4079             :         uint64_t        mask_size, lba, lba_count;
    4080             : 
    4081         953 :         if (ctx->super->used_blobid_mask_len == 0) {
    4082             :                 /*
    4083             :                  * This is a pre-v3 on-disk format where the blobid mask does not get
    4084             :                  *  written to disk.
    4085             :                  */
    4086          30 :                 cb_fn(seq, arg, 0);
    4087          30 :                 return;
    4088             :         }
    4089             : 
    4090         923 :         mask_size = ctx->super->used_blobid_mask_len * ctx->bs->md_page_size;
    4091         923 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_NUMA_ID_ANY,
    4092             :                                  SPDK_MALLOC_DMA);
    4093         923 :         if (!ctx->mask) {
    4094           0 :                 cb_fn(seq, arg, -ENOMEM);
    4095           0 :                 return;
    4096             :         }
    4097             : 
    4098         923 :         ctx->mask->type = SPDK_MD_MASK_TYPE_USED_BLOBIDS;
    4099         923 :         ctx->mask->length = ctx->super->md_len;
    4100         923 :         assert(ctx->mask->length == spdk_bit_array_capacity(ctx->bs->used_blobids));
    4101             : 
    4102         923 :         spdk_bit_array_store_mask(ctx->bs->used_blobids, ctx->mask->mask);
    4103         923 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_start);
    4104         923 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_len);
    4105         923 :         bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
    4106         953 : }
    4107             : 
    4108             : static void
    4109         882 : blob_set_thin_provision(struct spdk_blob *blob)
    4110             : {
    4111         882 :         blob_verify_md_op(blob);
    4112         882 :         blob->invalid_flags |= SPDK_BLOB_THIN_PROV;
    4113         882 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    4114         882 : }
    4115             : 
    4116             : static void
    4117        2617 : blob_set_clear_method(struct spdk_blob *blob, enum blob_clear_method clear_method)
    4118             : {
    4119        2617 :         blob_verify_md_op(blob);
    4120        2617 :         blob->clear_method = clear_method;
    4121        2617 :         blob->md_ro_flags |= (clear_method << SPDK_BLOB_CLEAR_METHOD_SHIFT);
    4122        2617 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    4123        2617 : }
    4124             : 
    4125             : static void bs_load_iter(void *arg, struct spdk_blob *blob, int bserrno);
    4126             : 
    4127             : static void
    4128          30 : bs_delete_corrupted_blob_cpl(void *cb_arg, int bserrno)
    4129             : {
    4130          30 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4131             :         spdk_blob_id id;
    4132             :         int64_t page_num;
    4133             : 
    4134             :         /* Iterate to next blob (we can't use spdk_bs_iter_next function as our
    4135             :          * last blob has been removed */
    4136          30 :         page_num = bs_blobid_to_page(ctx->blobid);
    4137          30 :         page_num++;
    4138          30 :         page_num = spdk_bit_array_find_first_set(ctx->bs->used_blobids, page_num);
    4139          30 :         if (page_num >= spdk_bit_array_capacity(ctx->bs->used_blobids)) {
    4140          30 :                 bs_load_iter(ctx, NULL, -ENOENT);
    4141          30 :                 return;
    4142             :         }
    4143             : 
    4144           0 :         id = bs_page_to_blobid(page_num);
    4145             : 
    4146           0 :         spdk_bs_open_blob(ctx->bs, id, bs_load_iter, ctx);
    4147          30 : }
    4148             : 
    4149             : static void
    4150          30 : bs_delete_corrupted_close_cb(void *cb_arg, int bserrno)
    4151             : {
    4152          30 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4153             : 
    4154          30 :         if (bserrno != 0) {
    4155           0 :                 SPDK_ERRLOG("Failed to close corrupted blob\n");
    4156           0 :                 spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4157           0 :                 return;
    4158             :         }
    4159             : 
    4160          30 :         spdk_bs_delete_blob(ctx->bs, ctx->blobid, bs_delete_corrupted_blob_cpl, ctx);
    4161          30 : }
    4162             : 
    4163             : static void
    4164          30 : bs_delete_corrupted_blob(void *cb_arg, int bserrno)
    4165             : {
    4166          30 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4167             :         uint64_t i;
    4168             : 
    4169          30 :         if (bserrno != 0) {
    4170           0 :                 SPDK_ERRLOG("Failed to close clone of a corrupted blob\n");
    4171           0 :                 spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4172           0 :                 return;
    4173             :         }
    4174             : 
    4175             :         /* Snapshot and clone have the same copy of cluster map and extent pages
    4176             :          * at this point. Let's clear both for snapshot now,
    4177             :          * so that it won't be cleared for clone later when we remove snapshot.
    4178             :          * Also set thin provision to pass data corruption check */
    4179         330 :         for (i = 0; i < ctx->blob->active.num_clusters; i++) {
    4180         300 :                 ctx->blob->active.clusters[i] = 0;
    4181         300 :         }
    4182          48 :         for (i = 0; i < ctx->blob->active.num_extent_pages; i++) {
    4183          18 :                 ctx->blob->active.extent_pages[i] = 0;
    4184          18 :         }
    4185             : 
    4186          30 :         ctx->blob->active.num_allocated_clusters = 0;
    4187             : 
    4188          30 :         ctx->blob->md_ro = false;
    4189             : 
    4190          30 :         blob_set_thin_provision(ctx->blob);
    4191             : 
    4192          30 :         ctx->blobid = ctx->blob->id;
    4193             : 
    4194          30 :         spdk_blob_close(ctx->blob, bs_delete_corrupted_close_cb, ctx);
    4195          30 : }
    4196             : 
    4197             : static void
    4198          15 : bs_update_corrupted_blob(void *cb_arg, int bserrno)
    4199             : {
    4200          15 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4201             : 
    4202          15 :         if (bserrno != 0) {
    4203           0 :                 SPDK_ERRLOG("Failed to close clone of a corrupted blob\n");
    4204           0 :                 spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4205           0 :                 return;
    4206             :         }
    4207             : 
    4208          15 :         ctx->blob->md_ro = false;
    4209          15 :         blob_remove_xattr(ctx->blob, SNAPSHOT_PENDING_REMOVAL, true);
    4210          15 :         blob_remove_xattr(ctx->blob, SNAPSHOT_IN_PROGRESS, true);
    4211          15 :         spdk_blob_set_read_only(ctx->blob);
    4212             : 
    4213          15 :         if (ctx->iter_cb_fn) {
    4214           0 :                 ctx->iter_cb_fn(ctx->iter_cb_arg, ctx->blob, 0);
    4215           0 :         }
    4216          15 :         bs_blob_list_add(ctx->blob);
    4217             : 
    4218          15 :         spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4219          15 : }
    4220             : 
    4221             : static void
    4222          45 : bs_examine_clone(void *cb_arg, struct spdk_blob *blob, int bserrno)
    4223             : {
    4224          45 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4225             : 
    4226          45 :         if (bserrno != 0) {
    4227           0 :                 SPDK_ERRLOG("Failed to open clone of a corrupted blob\n");
    4228           0 :                 spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4229           0 :                 return;
    4230             :         }
    4231             : 
    4232          45 :         if (blob->parent_id == ctx->blob->id) {
    4233             :                 /* Power failure occurred before updating clone (snapshot delete case)
    4234             :                  * or after updating clone (creating snapshot case) - keep snapshot */
    4235          15 :                 spdk_blob_close(blob, bs_update_corrupted_blob, ctx);
    4236          15 :         } else {
    4237             :                 /* Power failure occurred after updating clone (snapshot delete case)
    4238             :                  * or before updating clone (creating snapshot case) - remove snapshot */
    4239          30 :                 spdk_blob_close(blob, bs_delete_corrupted_blob, ctx);
    4240             :         }
    4241          45 : }
    4242             : 
    4243             : static void
    4244         903 : bs_load_iter(void *arg, struct spdk_blob *blob, int bserrno)
    4245             : {
    4246         903 :         struct spdk_bs_load_ctx *ctx = arg;
    4247             :         const void *value;
    4248             :         size_t len;
    4249         903 :         int rc = 0;
    4250             : 
    4251         903 :         if (bserrno == 0) {
    4252             :                 /* Examine blob if it is corrupted after power failure. Fix
    4253             :                  * the ones that can be fixed and remove any other corrupted
    4254             :                  * ones. If it is not corrupted just process it */
    4255         552 :                 rc = blob_get_xattr_value(blob, SNAPSHOT_PENDING_REMOVAL, &value, &len, true);
    4256         552 :                 if (rc != 0) {
    4257         527 :                         rc = blob_get_xattr_value(blob, SNAPSHOT_IN_PROGRESS, &value, &len, true);
    4258         527 :                         if (rc != 0) {
    4259             :                                 /* Not corrupted - process it and continue with iterating through blobs */
    4260         507 :                                 if (ctx->iter_cb_fn) {
    4261          42 :                                         ctx->iter_cb_fn(ctx->iter_cb_arg, blob, 0);
    4262          42 :                                 }
    4263         507 :                                 bs_blob_list_add(blob);
    4264         507 :                                 spdk_bs_iter_next(ctx->bs, blob, bs_load_iter, ctx);
    4265         507 :                                 return;
    4266             :                         }
    4267             : 
    4268          20 :                 }
    4269             : 
    4270          45 :                 assert(len == sizeof(spdk_blob_id));
    4271             : 
    4272          45 :                 ctx->blob = blob;
    4273             : 
    4274             :                 /* Open clone to check if we are able to fix this blob or should we remove it */
    4275          45 :                 spdk_bs_open_blob(ctx->bs, *(spdk_blob_id *)value, bs_examine_clone, ctx);
    4276          45 :                 return;
    4277         351 :         } else if (bserrno == -ENOENT) {
    4278         351 :                 bserrno = 0;
    4279         351 :         } else {
    4280             :                 /*
    4281             :                  * This case needs to be looked at further.  Same problem
    4282             :                  *  exists with applications that rely on explicit blob
    4283             :                  *  iteration.  We should just skip the blob that failed
    4284             :                  *  to load and continue on to the next one.
    4285             :                  */
    4286           0 :                 SPDK_ERRLOG("Error in iterating blobs\n");
    4287             :         }
    4288             : 
    4289         351 :         ctx->iter_cb_fn = NULL;
    4290             : 
    4291         351 :         spdk_free(ctx->super);
    4292         351 :         bs_sequence_finish(ctx->seq, bserrno);
    4293         351 :         free(ctx);
    4294         903 : }
    4295             : 
    4296             : static void bs_dump_read_md_page(spdk_bs_sequence_t *seq, void *cb_arg);
    4297             : 
    4298             : static void
    4299         351 : bs_load_complete(struct spdk_bs_load_ctx *ctx)
    4300             : {
    4301         351 :         ctx->bs->used_clusters = spdk_bit_pool_create_from_array(ctx->used_clusters);
    4302         351 :         if (ctx->dumping) {
    4303           0 :                 bs_dump_read_md_page(ctx->seq, ctx);
    4304           0 :                 return;
    4305             :         }
    4306         351 :         spdk_bs_iter_first(ctx->bs, bs_load_iter, ctx);
    4307         351 : }
    4308             : 
    4309             : static void
    4310          40 : bs_load_ctx_fail(struct spdk_bs_load_ctx *ctx, int bserrno)
    4311             : {
    4312          40 :         assert(bserrno != 0);
    4313             : 
    4314          40 :         spdk_free(ctx->mask);
    4315          40 :         spdk_free(ctx->super);
    4316          40 :         bs_sequence_finish(ctx->seq, bserrno);
    4317          40 :         bs_free(ctx->bs);
    4318          40 :         spdk_bit_array_free(&ctx->used_clusters);
    4319          40 :         free(ctx);
    4320          40 : }
    4321             : 
    4322             : static void
    4323         217 : bs_load_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4324             : {
    4325         217 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4326             :         int rc;
    4327             : 
    4328             :         /* The type must be correct */
    4329         217 :         assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_BLOBIDS);
    4330             : 
    4331             :         /* The length of the mask (in bits) must not be greater than
    4332             :          * the length of the buffer (converted to bits) */
    4333         217 :         assert(ctx->mask->length <= (ctx->super->used_blobid_mask_len * ctx->super->md_page_size * 8));
    4334             : 
    4335             :         /* The length of the mask must be exactly equal to the size
    4336             :          * (in pages) of the metadata region */
    4337         217 :         assert(ctx->mask->length == ctx->super->md_len);
    4338             : 
    4339         217 :         rc = spdk_bit_array_resize(&ctx->bs->used_blobids, ctx->mask->length);
    4340         217 :         if (rc < 0) {
    4341           0 :                 bs_load_ctx_fail(ctx, rc);
    4342           0 :                 return;
    4343             :         }
    4344             : 
    4345         217 :         spdk_bit_array_load_mask(ctx->bs->used_blobids, ctx->mask->mask);
    4346         217 :         spdk_free(ctx->mask);
    4347             : 
    4348         217 :         bs_load_complete(ctx);
    4349         217 : }
    4350             : 
    4351             : static void
    4352         217 : bs_load_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4353             : {
    4354         217 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4355             :         uint64_t                lba, lba_count, mask_size;
    4356             :         int                     rc;
    4357             : 
    4358         217 :         if (bserrno != 0) {
    4359           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4360           0 :                 return;
    4361             :         }
    4362             : 
    4363             :         /* The type must be correct */
    4364         217 :         assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_CLUSTERS);
    4365             :         /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
    4366         217 :         assert(ctx->mask->length <= (ctx->super->used_cluster_mask_len * sizeof(
    4367             :                                              struct spdk_blob_md_page) * 8));
    4368             :         /*
    4369             :          * The length of the mask must be equal to or larger than the total number of clusters. It may be
    4370             :          * larger than the total number of clusters due to a failure spdk_bs_grow.
    4371             :          */
    4372         217 :         assert(ctx->mask->length >= ctx->bs->total_clusters);
    4373         217 :         if (ctx->mask->length > ctx->bs->total_clusters) {
    4374           5 :                 SPDK_WARNLOG("Shrink the used_custers mask length to total_clusters");
    4375           5 :                 ctx->mask->length = ctx->bs->total_clusters;
    4376           5 :         }
    4377             : 
    4378         217 :         rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->mask->length);
    4379         217 :         if (rc < 0) {
    4380           0 :                 spdk_free(ctx->mask);
    4381           0 :                 bs_load_ctx_fail(ctx, rc);
    4382           0 :                 return;
    4383             :         }
    4384             : 
    4385         217 :         spdk_bit_array_load_mask(ctx->used_clusters, ctx->mask->mask);
    4386         217 :         ctx->bs->num_free_clusters = spdk_bit_array_count_clear(ctx->used_clusters);
    4387         217 :         assert(ctx->bs->num_free_clusters <= ctx->bs->total_clusters);
    4388             : 
    4389         217 :         spdk_free(ctx->mask);
    4390             : 
    4391             :         /* Read the used blobids mask */
    4392         217 :         mask_size = ctx->super->used_blobid_mask_len * ctx->super->md_page_size;
    4393         217 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_NUMA_ID_ANY,
    4394             :                                  SPDK_MALLOC_DMA);
    4395         217 :         if (!ctx->mask) {
    4396           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4397           0 :                 return;
    4398             :         }
    4399         217 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_start);
    4400         217 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_len);
    4401         434 :         bs_sequence_read_dev(seq, ctx->mask, lba, lba_count,
    4402         217 :                              bs_load_used_blobids_cpl, ctx);
    4403         217 : }
    4404             : 
    4405             : static void
    4406         222 : bs_load_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4407             : {
    4408         222 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4409             :         uint64_t                lba, lba_count, mask_size;
    4410             :         int                     rc;
    4411             : 
    4412         222 :         if (bserrno != 0) {
    4413           5 :                 bs_load_ctx_fail(ctx, bserrno);
    4414           5 :                 return;
    4415             :         }
    4416             : 
    4417             :         /* The type must be correct */
    4418         217 :         assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_PAGES);
    4419             :         /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
    4420         217 :         assert(ctx->mask->length <= (ctx->super->used_page_mask_len * ctx->super->md_page_size *
    4421             :                                      8));
    4422             :         /* The length of the mask must be exactly equal to the size (in pages) of the metadata region */
    4423         217 :         if (ctx->mask->length != ctx->super->md_len) {
    4424           0 :                 SPDK_ERRLOG("mismatched md_len in used_pages mask: "
    4425             :                             "mask->length=%" PRIu32 " super->md_len=%" PRIu32 "\n",
    4426             :                             ctx->mask->length, ctx->super->md_len);
    4427           0 :                 assert(false);
    4428             :         }
    4429             : 
    4430         217 :         rc = spdk_bit_array_resize(&ctx->bs->used_md_pages, ctx->mask->length);
    4431         217 :         if (rc < 0) {
    4432           0 :                 bs_load_ctx_fail(ctx, rc);
    4433           0 :                 return;
    4434             :         }
    4435             : 
    4436         217 :         spdk_bit_array_load_mask(ctx->bs->used_md_pages, ctx->mask->mask);
    4437         217 :         spdk_free(ctx->mask);
    4438             : 
    4439             :         /* Read the used clusters mask */
    4440         217 :         mask_size = ctx->super->used_cluster_mask_len * ctx->super->md_page_size;
    4441         217 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_NUMA_ID_ANY,
    4442             :                                  SPDK_MALLOC_DMA);
    4443         217 :         if (!ctx->mask) {
    4444           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4445           0 :                 return;
    4446             :         }
    4447         217 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
    4448         217 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
    4449         434 :         bs_sequence_read_dev(seq, ctx->mask, lba, lba_count,
    4450         217 :                              bs_load_used_clusters_cpl, ctx);
    4451         222 : }
    4452             : 
    4453             : static void
    4454         227 : bs_load_read_used_pages(struct spdk_bs_load_ctx *ctx)
    4455             : {
    4456             :         uint64_t lba, lba_count, mask_size;
    4457             : 
    4458             :         /* Read the used pages mask */
    4459         227 :         mask_size = ctx->super->used_page_mask_len * ctx->super->md_page_size;
    4460         227 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL,
    4461             :                                  SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    4462         227 :         if (!ctx->mask) {
    4463           5 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4464           5 :                 return;
    4465             :         }
    4466             : 
    4467         222 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_start);
    4468         222 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_len);
    4469         444 :         bs_sequence_read_dev(ctx->seq, ctx->mask, lba, lba_count,
    4470         222 :                              bs_load_used_pages_cpl, ctx);
    4471         227 : }
    4472             : 
    4473             : static int
    4474         323 : bs_load_replay_md_parse_page(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_page *page)
    4475             : {
    4476         323 :         struct spdk_blob_store *bs = ctx->bs;
    4477             :         struct spdk_blob_md_descriptor *desc;
    4478         323 :         size_t  cur_desc = 0;
    4479             : 
    4480         323 :         desc = (struct spdk_blob_md_descriptor *)page->descriptors;
    4481         933 :         while (cur_desc < sizeof(page->descriptors)) {
    4482         933 :                 if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
    4483         298 :                         if (desc->length == 0) {
    4484             :                                 /* If padding and length are 0, this terminates the page */
    4485         298 :                                 break;
    4486             :                         }
    4487         635 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE) {
    4488             :                         struct spdk_blob_md_descriptor_extent_rle       *desc_extent_rle;
    4489             :                         unsigned int                            i, j;
    4490          68 :                         unsigned int                            cluster_count = 0;
    4491             :                         uint32_t                                cluster_idx;
    4492             : 
    4493          68 :                         desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)desc;
    4494             : 
    4495         136 :                         for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
    4496         828 :                                 for (j = 0; j < desc_extent_rle->extents[i].length; j++) {
    4497         760 :                                         cluster_idx = desc_extent_rle->extents[i].cluster_idx;
    4498             :                                         /*
    4499             :                                          * cluster_idx = 0 means an unallocated cluster - don't mark that
    4500             :                                          * in the used cluster map.
    4501             :                                          */
    4502         760 :                                         if (cluster_idx != 0) {
    4503         540 :                                                 SPDK_NOTICELOG("Recover: cluster %" PRIu32 "\n", cluster_idx + j);
    4504         540 :                                                 spdk_bit_array_set(ctx->used_clusters, cluster_idx + j);
    4505         540 :                                                 if (bs->num_free_clusters == 0) {
    4506           0 :                                                         return -ENOSPC;
    4507             :                                                 }
    4508         540 :                                                 bs->num_free_clusters--;
    4509         540 :                                         }
    4510         760 :                                         cluster_count++;
    4511         760 :                                 }
    4512          68 :                         }
    4513          68 :                         if (cluster_count == 0) {
    4514           0 :                                 return -EINVAL;
    4515             :                         }
    4516         635 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
    4517             :                         struct spdk_blob_md_descriptor_extent_page      *desc_extent;
    4518             :                         uint32_t                                        i;
    4519          78 :                         uint32_t                                        cluster_count = 0;
    4520             :                         uint32_t                                        cluster_idx;
    4521             :                         size_t                                          cluster_idx_length;
    4522             : 
    4523          78 :                         desc_extent = (struct spdk_blob_md_descriptor_extent_page *)desc;
    4524          78 :                         cluster_idx_length = desc_extent->length - sizeof(desc_extent->start_cluster_idx);
    4525             : 
    4526          78 :                         if (desc_extent->length <= sizeof(desc_extent->start_cluster_idx) ||
    4527          78 :                             (cluster_idx_length % sizeof(desc_extent->cluster_idx[0]) != 0)) {
    4528           0 :                                 return -EINVAL;
    4529             :                         }
    4530             : 
    4531         978 :                         for (i = 0; i < cluster_idx_length / sizeof(desc_extent->cluster_idx[0]); i++) {
    4532         900 :                                 cluster_idx = desc_extent->cluster_idx[i];
    4533             :                                 /*
    4534             :                                  * cluster_idx = 0 means an unallocated cluster - don't mark that
    4535             :                                  * in the used cluster map.
    4536             :                                  */
    4537         900 :                                 if (cluster_idx != 0) {
    4538         900 :                                         if (cluster_idx < desc_extent->start_cluster_idx &&
    4539           0 :                                             cluster_idx >= desc_extent->start_cluster_idx + cluster_count) {
    4540           0 :                                                 return -EINVAL;
    4541             :                                         }
    4542         900 :                                         spdk_bit_array_set(ctx->used_clusters, cluster_idx);
    4543         900 :                                         if (bs->num_free_clusters == 0) {
    4544           0 :                                                 return -ENOSPC;
    4545             :                                         }
    4546         900 :                                         bs->num_free_clusters--;
    4547         900 :                                 }
    4548         900 :                                 cluster_count++;
    4549         900 :                         }
    4550             : 
    4551          78 :                         if (cluster_count == 0) {
    4552           0 :                                 return -EINVAL;
    4553             :                         }
    4554         567 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
    4555             :                         /* Skip this item */
    4556         489 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
    4557             :                         /* Skip this item */
    4558         394 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
    4559             :                         /* Skip this item */
    4560         318 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
    4561             :                         struct spdk_blob_md_descriptor_extent_table *desc_extent_table;
    4562         123 :                         uint32_t num_extent_pages = ctx->num_extent_pages;
    4563             :                         uint32_t i;
    4564             :                         size_t extent_pages_length;
    4565             :                         void *tmp;
    4566             : 
    4567         123 :                         desc_extent_table = (struct spdk_blob_md_descriptor_extent_table *)desc;
    4568         123 :                         extent_pages_length = desc_extent_table->length - sizeof(desc_extent_table->num_clusters);
    4569             : 
    4570         123 :                         if (desc_extent_table->length == 0 ||
    4571         123 :                             (extent_pages_length % sizeof(desc_extent_table->extent_page[0]) != 0)) {
    4572           0 :                                 return -EINVAL;
    4573             :                         }
    4574             : 
    4575         240 :                         for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
    4576         117 :                                 if (desc_extent_table->extent_page[i].page_idx != 0) {
    4577          78 :                                         if (desc_extent_table->extent_page[i].num_pages != 1) {
    4578           0 :                                                 return -EINVAL;
    4579             :                                         }
    4580          78 :                                         num_extent_pages += 1;
    4581          78 :                                 }
    4582         117 :                         }
    4583             : 
    4584         123 :                         if (num_extent_pages > 0) {
    4585          78 :                                 tmp = realloc(ctx->extent_page_num, num_extent_pages * sizeof(uint32_t));
    4586          78 :                                 if (tmp == NULL) {
    4587           0 :                                         return -ENOMEM;
    4588             :                                 }
    4589          78 :                                 ctx->extent_page_num = tmp;
    4590             : 
    4591             :                                 /* Extent table entries contain md page numbers for extent pages.
    4592             :                                  * Zeroes represent unallocated extent pages, those are run-length-encoded.
    4593             :                                  */
    4594         156 :                                 for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
    4595          78 :                                         if (desc_extent_table->extent_page[i].page_idx != 0) {
    4596          78 :                                                 ctx->extent_page_num[ctx->num_extent_pages] = desc_extent_table->extent_page[i].page_idx;
    4597          78 :                                                 ctx->num_extent_pages += 1;
    4598          78 :                                         }
    4599          78 :                                 }
    4600          78 :                         }
    4601         123 :                 } else {
    4602             :                         /* Error */
    4603           0 :                         return -EINVAL;
    4604             :                 }
    4605             :                 /* Advance to the next descriptor */
    4606         635 :                 cur_desc += sizeof(*desc) + desc->length;
    4607         635 :                 if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
    4608          25 :                         break;
    4609             :                 }
    4610         610 :                 desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
    4611             :         }
    4612         323 :         return 0;
    4613         323 : }
    4614             : 
    4615             : static bool
    4616        1884 : bs_load_cur_extent_page_valid(struct spdk_blob_md_page *page)
    4617             : {
    4618             :         uint32_t crc;
    4619        1884 :         struct spdk_blob_md_descriptor *desc = (struct spdk_blob_md_descriptor *)page->descriptors;
    4620             :         size_t desc_len;
    4621             : 
    4622        1884 :         crc = blob_md_page_calc_crc(page);
    4623        1884 :         if (crc != page->crc) {
    4624           0 :                 return false;
    4625             :         }
    4626             : 
    4627             :         /* Extent page should always be of sequence num 0. */
    4628        1884 :         if (page->sequence_num != 0) {
    4629          55 :                 return false;
    4630             :         }
    4631             : 
    4632             :         /* Descriptor type must be EXTENT_PAGE. */
    4633        1829 :         if (desc->type != SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
    4634         195 :                 return false;
    4635             :         }
    4636             : 
    4637             :         /* Descriptor length cannot exceed the page. */
    4638        1634 :         desc_len = sizeof(*desc) + desc->length;
    4639        1634 :         if (desc_len > sizeof(page->descriptors)) {
    4640           0 :                 return false;
    4641             :         }
    4642             : 
    4643             :         /* It has to be the only descriptor in the page. */
    4644        1634 :         if (desc_len + sizeof(*desc) <= sizeof(page->descriptors)) {
    4645        1634 :                 desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + desc_len);
    4646        1634 :                 if (desc->length != 0) {
    4647           0 :                         return false;
    4648             :                 }
    4649        1634 :         }
    4650             : 
    4651        1634 :         return true;
    4652        1884 : }
    4653             : 
    4654             : static bool
    4655        8531 : bs_load_cur_md_page_valid(struct spdk_bs_load_ctx *ctx)
    4656             : {
    4657             :         uint32_t crc;
    4658        8531 :         struct spdk_blob_md_page *page = ctx->page;
    4659             : 
    4660        8531 :         crc = blob_md_page_calc_crc(page);
    4661        8531 :         if (crc != page->crc) {
    4662        8254 :                 return false;
    4663             :         }
    4664             : 
    4665             :         /* First page of a sequence should match the blobid. */
    4666         277 :         if (page->sequence_num == 0 &&
    4667         222 :             bs_page_to_blobid(ctx->cur_page) != page->id) {
    4668          27 :                 return false;
    4669             :         }
    4670         250 :         assert(bs_load_cur_extent_page_valid(page) == false);
    4671             : 
    4672         250 :         return true;
    4673        8531 : }
    4674             : 
    4675             : static void bs_load_replay_cur_md_page(struct spdk_bs_load_ctx *ctx);
    4676             : 
    4677             : static void
    4678         134 : bs_load_write_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4679             : {
    4680         134 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4681             : 
    4682         134 :         spdk_free(ctx->mask);
    4683         134 :         ctx->mask = NULL;
    4684             : 
    4685         134 :         if (bserrno != 0) {
    4686           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4687           0 :                 return;
    4688             :         }
    4689             : 
    4690         134 :         bs_load_complete(ctx);
    4691         134 : }
    4692             : 
    4693             : static void
    4694         134 : bs_load_write_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4695             : {
    4696         134 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4697             : 
    4698         134 :         spdk_free(ctx->mask);
    4699         134 :         ctx->mask = NULL;
    4700             : 
    4701         134 :         if (bserrno != 0) {
    4702           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4703           0 :                 return;
    4704             :         }
    4705             : 
    4706         134 :         bs_write_used_clusters(seq, ctx, bs_load_write_used_clusters_cpl);
    4707         134 : }
    4708             : 
    4709             : static void
    4710         134 : bs_load_write_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4711             : {
    4712         134 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4713             : 
    4714         134 :         spdk_free(ctx->mask);
    4715         134 :         ctx->mask = NULL;
    4716             : 
    4717         134 :         if (bserrno != 0) {
    4718           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4719           0 :                 return;
    4720             :         }
    4721             : 
    4722         134 :         bs_write_used_blobids(seq, ctx, bs_load_write_used_blobids_cpl);
    4723         134 : }
    4724             : 
    4725             : static void
    4726         134 : bs_load_write_used_md(struct spdk_bs_load_ctx *ctx)
    4727             : {
    4728         134 :         bs_write_used_md(ctx->seq, ctx, bs_load_write_used_pages_cpl);
    4729         134 : }
    4730             : 
    4731             : static void
    4732        8481 : bs_load_replay_md_chain_cpl(struct spdk_bs_load_ctx *ctx)
    4733             : {
    4734             :         uint64_t num_md_clusters;
    4735             :         uint64_t i;
    4736             : 
    4737        8481 :         ctx->in_page_chain = false;
    4738             : 
    4739        8481 :         do {
    4740        8576 :                 ctx->page_index++;
    4741        8576 :         } while (spdk_bit_array_get(ctx->bs->used_md_pages, ctx->page_index) == true);
    4742             : 
    4743        8481 :         if (ctx->page_index < ctx->super->md_len) {
    4744        8347 :                 ctx->cur_page = ctx->page_index;
    4745        8347 :                 bs_load_replay_cur_md_page(ctx);
    4746        8347 :         } else {
    4747             :                 /* Claim all of the clusters used by the metadata */
    4748         134 :                 num_md_clusters = spdk_divide_round_up(
    4749         134 :                                           ctx->super->md_start + ctx->super->md_len, ctx->bs->pages_per_cluster);
    4750         629 :                 for (i = 0; i < num_md_clusters; i++) {
    4751         495 :                         spdk_bit_array_set(ctx->used_clusters, i);
    4752         495 :                 }
    4753         134 :                 ctx->bs->num_free_clusters -= num_md_clusters;
    4754         134 :                 spdk_free(ctx->page);
    4755         134 :                 bs_load_write_used_md(ctx);
    4756             :         }
    4757        8481 : }
    4758             : 
    4759             : static void
    4760          78 : bs_load_replay_extent_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4761             : {
    4762          78 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4763             :         uint32_t page_num;
    4764             :         uint64_t i;
    4765             : 
    4766          78 :         if (bserrno != 0) {
    4767           0 :                 spdk_free(ctx->extent_pages);
    4768           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4769           0 :                 return;
    4770             :         }
    4771             : 
    4772         156 :         for (i = 0; i < ctx->num_extent_pages; i++) {
    4773             :                 /* Extent pages are only read when present within in chain md.
    4774             :                  * Integrity of md is not right if that page was not a valid extent page. */
    4775          78 :                 if (bs_load_cur_extent_page_valid(&ctx->extent_pages[i]) != true) {
    4776           0 :                         spdk_free(ctx->extent_pages);
    4777           0 :                         bs_load_ctx_fail(ctx, -EILSEQ);
    4778           0 :                         return;
    4779             :                 }
    4780             : 
    4781          78 :                 page_num = ctx->extent_page_num[i];
    4782          78 :                 spdk_bit_array_set(ctx->bs->used_md_pages, page_num);
    4783          78 :                 if (bs_load_replay_md_parse_page(ctx, &ctx->extent_pages[i])) {
    4784           0 :                         spdk_free(ctx->extent_pages);
    4785           0 :                         bs_load_ctx_fail(ctx, -EILSEQ);
    4786           0 :                         return;
    4787             :                 }
    4788          78 :         }
    4789             : 
    4790          78 :         spdk_free(ctx->extent_pages);
    4791          78 :         free(ctx->extent_page_num);
    4792          78 :         ctx->extent_page_num = NULL;
    4793          78 :         ctx->num_extent_pages = 0;
    4794             : 
    4795          78 :         bs_load_replay_md_chain_cpl(ctx);
    4796          78 : }
    4797             : 
    4798             : static void
    4799          78 : bs_load_replay_extent_pages(struct spdk_bs_load_ctx *ctx)
    4800             : {
    4801             :         spdk_bs_batch_t *batch;
    4802             :         uint32_t page;
    4803             :         uint64_t lba;
    4804             :         uint64_t i;
    4805             : 
    4806          78 :         ctx->extent_pages = spdk_zmalloc(ctx->super->md_page_size * ctx->num_extent_pages, 0,
    4807             :                                          NULL, SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    4808          78 :         if (!ctx->extent_pages) {
    4809           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4810           0 :                 return;
    4811             :         }
    4812             : 
    4813          78 :         batch = bs_sequence_to_batch(ctx->seq, bs_load_replay_extent_page_cpl, ctx);
    4814             : 
    4815         156 :         for (i = 0; i < ctx->num_extent_pages; i++) {
    4816          78 :                 page = ctx->extent_page_num[i];
    4817          78 :                 assert(page < ctx->super->md_len);
    4818          78 :                 lba = bs_md_page_to_lba(ctx->bs, page);
    4819         156 :                 bs_batch_read_dev(batch, &ctx->extent_pages[i], lba,
    4820          78 :                                   bs_byte_to_lba(ctx->bs, ctx->super->md_page_size));
    4821          78 :         }
    4822             : 
    4823          78 :         bs_batch_close(batch);
    4824          78 : }
    4825             : 
    4826             : static void
    4827        8531 : bs_load_replay_md_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4828             : {
    4829        8531 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4830             :         uint32_t page_num;
    4831             :         struct spdk_blob_md_page *page;
    4832             : 
    4833        8531 :         if (bserrno != 0) {
    4834           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4835           0 :                 return;
    4836             :         }
    4837             : 
    4838        8531 :         page_num = ctx->cur_page;
    4839        8531 :         page = ctx->page;
    4840        8531 :         if (bs_load_cur_md_page_valid(ctx) == true) {
    4841         250 :                 if (page->sequence_num == 0 || ctx->in_page_chain == true) {
    4842         245 :                         spdk_spin_lock(&ctx->bs->used_lock);
    4843         245 :                         bs_claim_md_page(ctx->bs, page_num);
    4844         245 :                         spdk_spin_unlock(&ctx->bs->used_lock);
    4845         245 :                         if (page->sequence_num == 0) {
    4846         195 :                                 SPDK_NOTICELOG("Recover: blob 0x%" PRIx32 "\n", page_num);
    4847         195 :                                 spdk_bit_array_set(ctx->bs->used_blobids, page_num);
    4848         195 :                         }
    4849         245 :                         if (bs_load_replay_md_parse_page(ctx, page)) {
    4850           0 :                                 bs_load_ctx_fail(ctx, -EILSEQ);
    4851           0 :                                 return;
    4852             :                         }
    4853         245 :                         if (page->next != SPDK_INVALID_MD_PAGE) {
    4854          50 :                                 ctx->in_page_chain = true;
    4855          50 :                                 ctx->cur_page = page->next;
    4856          50 :                                 bs_load_replay_cur_md_page(ctx);
    4857          50 :                                 return;
    4858             :                         }
    4859         195 :                         if (ctx->num_extent_pages != 0) {
    4860          78 :                                 bs_load_replay_extent_pages(ctx);
    4861          78 :                                 return;
    4862             :                         }
    4863         117 :                 }
    4864         122 :         }
    4865        8403 :         bs_load_replay_md_chain_cpl(ctx);
    4866        8531 : }
    4867             : 
    4868             : static void
    4869        8531 : bs_load_replay_cur_md_page(struct spdk_bs_load_ctx *ctx)
    4870             : {
    4871             :         uint64_t lba;
    4872             : 
    4873        8531 :         assert(ctx->cur_page < ctx->super->md_len);
    4874        8531 :         lba = bs_md_page_to_lba(ctx->bs, ctx->cur_page);
    4875       17062 :         bs_sequence_read_dev(ctx->seq, ctx->page, lba,
    4876        8531 :                              bs_byte_to_lba(ctx->bs, ctx->super->md_page_size),
    4877        8531 :                              bs_load_replay_md_cpl, ctx);
    4878        8531 : }
    4879             : 
    4880             : static void
    4881         134 : bs_load_replay_md(struct spdk_bs_load_ctx *ctx)
    4882             : {
    4883         134 :         ctx->page_index = 0;
    4884         134 :         ctx->cur_page = 0;
    4885         134 :         ctx->page = spdk_zmalloc(ctx->bs->md_page_size, 0,
    4886             :                                  NULL, SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    4887         134 :         if (!ctx->page) {
    4888           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4889           0 :                 return;
    4890             :         }
    4891         134 :         bs_load_replay_cur_md_page(ctx);
    4892         134 : }
    4893             : 
    4894             : static void
    4895         134 : bs_recover(struct spdk_bs_load_ctx *ctx)
    4896             : {
    4897             :         int             rc;
    4898             : 
    4899         134 :         SPDK_NOTICELOG("Performing recovery on blobstore\n");
    4900         134 :         rc = spdk_bit_array_resize(&ctx->bs->used_md_pages, ctx->super->md_len);
    4901         134 :         if (rc < 0) {
    4902           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4903           0 :                 return;
    4904             :         }
    4905             : 
    4906         134 :         rc = spdk_bit_array_resize(&ctx->bs->used_blobids, ctx->super->md_len);
    4907         134 :         if (rc < 0) {
    4908           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4909           0 :                 return;
    4910             :         }
    4911             : 
    4912         134 :         rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->bs->total_clusters);
    4913         134 :         if (rc < 0) {
    4914           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4915           0 :                 return;
    4916             :         }
    4917             : 
    4918         134 :         rc = spdk_bit_array_resize(&ctx->bs->open_blobids, ctx->super->md_len);
    4919         134 :         if (rc < 0) {
    4920           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4921           0 :                 return;
    4922             :         }
    4923             : 
    4924         134 :         ctx->bs->num_free_clusters = ctx->bs->total_clusters;
    4925         134 :         bs_load_replay_md(ctx);
    4926         134 : }
    4927             : 
    4928             : static int
    4929         356 : bs_parse_super(struct spdk_bs_load_ctx *ctx)
    4930             : {
    4931             :         int rc;
    4932             : 
    4933         356 :         if (ctx->super->size == 0) {
    4934          10 :                 ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    4935          10 :         }
    4936             : 
    4937         356 :         if (ctx->super->io_unit_size == 0) {
    4938          10 :                 ctx->super->io_unit_size = SPDK_BS_PAGE_SIZE;
    4939          10 :         }
    4940         356 :         if (ctx->super->md_page_size == 0) {
    4941           5 :                 ctx->super->md_page_size = SPDK_BS_PAGE_SIZE;
    4942           5 :         }
    4943             : 
    4944         356 :         ctx->bs->clean = 1;
    4945         356 :         ctx->bs->cluster_sz = ctx->super->cluster_size;
    4946         356 :         ctx->bs->total_clusters = ctx->super->size / ctx->super->cluster_size;
    4947         356 :         ctx->bs->io_unit_size = ctx->super->io_unit_size;
    4948         356 :         ctx->bs->md_page_size = ctx->super->md_page_size;
    4949         356 :         bs_init_per_cluster_fields(ctx->bs);
    4950         356 :         rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->bs->total_clusters);
    4951         356 :         if (rc < 0) {
    4952           0 :                 return -ENOMEM;
    4953             :         }
    4954         356 :         ctx->bs->md_start = ctx->super->md_start;
    4955         356 :         ctx->bs->md_len = ctx->super->md_len;
    4956         356 :         rc = spdk_bit_array_resize(&ctx->bs->open_blobids, ctx->bs->md_len);
    4957         356 :         if (rc < 0) {
    4958           0 :                 return -ENOMEM;
    4959             :         }
    4960             : 
    4961         712 :         ctx->bs->total_data_clusters = ctx->bs->total_clusters - spdk_divide_round_up(
    4962         356 :                                                ctx->bs->md_start + ctx->bs->md_len, ctx->bs->pages_per_cluster);
    4963         356 :         ctx->bs->super_blob = ctx->super->super_blob;
    4964         356 :         memcpy(&ctx->bs->bstype, &ctx->super->bstype, sizeof(ctx->super->bstype));
    4965             : 
    4966         356 :         return 0;
    4967         356 : }
    4968             : 
    4969             : static void
    4970         386 : bs_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4971             : {
    4972         386 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4973             :         int rc;
    4974             : 
    4975         386 :         rc = bs_super_validate(ctx->super, ctx->bs);
    4976         386 :         if (rc != 0) {
    4977          30 :                 bs_load_ctx_fail(ctx, rc);
    4978          30 :                 return;
    4979             :         }
    4980             : 
    4981         356 :         rc = bs_parse_super(ctx);
    4982         356 :         if (rc < 0) {
    4983           0 :                 bs_load_ctx_fail(ctx, rc);
    4984           0 :                 return;
    4985             :         }
    4986             : 
    4987         356 :         if (ctx->super->used_blobid_mask_len == 0 || ctx->super->clean == 0 || ctx->force_recover) {
    4988         134 :                 bs_recover(ctx);
    4989         134 :         } else {
    4990         222 :                 bs_load_read_used_pages(ctx);
    4991             :         }
    4992         386 : }
    4993             : 
    4994             : static inline int
    4995         395 : bs_opts_copy(struct spdk_bs_opts *src, struct spdk_bs_opts *dst)
    4996             : {
    4997             : 
    4998         395 :         if (!src->opts_size) {
    4999           0 :                 SPDK_ERRLOG("opts_size should not be zero value\n");
    5000           0 :                 return -1;
    5001             :         }
    5002             : 
    5003             : #define FIELD_OK(field) \
    5004             :         offsetof(struct spdk_bs_opts, field) + sizeof(src->field) <= src->opts_size
    5005             : 
    5006             : #define SET_FIELD(field) \
    5007             :         if (FIELD_OK(field)) { \
    5008             :                 dst->field = src->field; \
    5009             :         } \
    5010             : 
    5011         395 :         SET_FIELD(cluster_sz);
    5012         395 :         SET_FIELD(num_md_pages);
    5013         395 :         SET_FIELD(max_md_ops);
    5014         395 :         SET_FIELD(max_channel_ops);
    5015         395 :         SET_FIELD(clear_method);
    5016             : 
    5017         395 :         if (FIELD_OK(bstype)) {
    5018         395 :                 memcpy(&dst->bstype, &src->bstype, sizeof(dst->bstype));
    5019         395 :         }
    5020         395 :         SET_FIELD(md_page_size);
    5021         395 :         SET_FIELD(iter_cb_fn);
    5022         395 :         SET_FIELD(iter_cb_arg);
    5023         395 :         SET_FIELD(force_recover);
    5024         395 :         SET_FIELD(esnap_bs_dev_create);
    5025         395 :         SET_FIELD(esnap_ctx);
    5026             : 
    5027         395 :         dst->opts_size = src->opts_size;
    5028             : 
    5029             :         /* You should not remove this statement, but need to update the assert statement
    5030             :          * if you add a new field, and also add a corresponding SET_FIELD statement */
    5031             :         SPDK_STATIC_ASSERT(sizeof(struct spdk_bs_opts) == 88, "Incorrect size");
    5032             : 
    5033             : #undef FIELD_OK
    5034             : #undef SET_FIELD
    5035             : 
    5036         395 :         return 0;
    5037         395 : }
    5038             : 
    5039             : void
    5040         401 : spdk_bs_load(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
    5041             :              spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
    5042             : {
    5043             :         struct spdk_blob_store  *bs;
    5044             :         struct spdk_bs_cpl      cpl;
    5045             :         struct spdk_bs_load_ctx *ctx;
    5046         401 :         struct spdk_bs_opts     opts = {};
    5047             :         int err;
    5048             : 
    5049         401 :         SPDK_DEBUGLOG(blob, "Loading blobstore from dev %p\n", dev);
    5050             : 
    5051         401 :         if ((dev->phys_blocklen % dev->blocklen) != 0) {
    5052           5 :                 SPDK_DEBUGLOG(blob, "unsupported dev block length of %d\n", dev->blocklen);
    5053           5 :                 dev->destroy(dev);
    5054           5 :                 cb_fn(cb_arg, NULL, -EINVAL);
    5055           5 :                 return;
    5056             :         }
    5057             : 
    5058         396 :         spdk_bs_opts_init(&opts, sizeof(opts));
    5059         396 :         if (o) {
    5060         162 :                 if (bs_opts_copy(o, &opts)) {
    5061           0 :                         dev->destroy(dev);
    5062           0 :                         cb_fn(cb_arg, NULL, -EINVAL);
    5063           0 :                         return;
    5064             :                 }
    5065         162 :         }
    5066             : 
    5067         396 :         if (opts.max_md_ops == 0 || opts.max_channel_ops == 0) {
    5068          10 :                 dev->destroy(dev);
    5069          10 :                 cb_fn(cb_arg, NULL, -EINVAL);
    5070          10 :                 return;
    5071             :         }
    5072             : 
    5073         386 :         err = bs_alloc(dev, &opts, &bs, &ctx);
    5074         386 :         if (err) {
    5075           0 :                 dev->destroy(dev);
    5076           0 :                 cb_fn(cb_arg, NULL, err);
    5077           0 :                 return;
    5078             :         }
    5079             : 
    5080         386 :         cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
    5081         386 :         cpl.u.bs_handle.cb_fn = cb_fn;
    5082         386 :         cpl.u.bs_handle.cb_arg = cb_arg;
    5083         386 :         cpl.u.bs_handle.bs = bs;
    5084             : 
    5085         386 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5086         386 :         if (!ctx->seq) {
    5087           0 :                 spdk_free(ctx->super);
    5088           0 :                 free(ctx);
    5089           0 :                 bs_free(bs);
    5090           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5091           0 :                 return;
    5092             :         }
    5093             : 
    5094             :         /* Read the super block */
    5095         772 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    5096         386 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    5097         386 :                              bs_load_super_cpl, ctx);
    5098         401 : }
    5099             : 
    5100             : /* END spdk_bs_load */
    5101             : 
    5102             : /* START spdk_bs_dump */
    5103             : 
    5104             : static void
    5105           0 : bs_dump_finish(spdk_bs_sequence_t *seq, struct spdk_bs_load_ctx *ctx, int bserrno)
    5106             : {
    5107           0 :         spdk_free(ctx->super);
    5108             : 
    5109             :         /*
    5110             :          * We need to defer calling bs_call_cpl() until after
    5111             :          * dev destruction, so tuck these away for later use.
    5112             :          */
    5113           0 :         ctx->bs->unload_err = bserrno;
    5114           0 :         memcpy(&ctx->bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
    5115           0 :         seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
    5116             : 
    5117           0 :         bs_sequence_finish(seq, 0);
    5118           0 :         bs_free(ctx->bs);
    5119           0 :         free(ctx);
    5120           0 : }
    5121             : 
    5122             : static void
    5123           0 : bs_dump_print_xattr(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_descriptor *desc)
    5124             : {
    5125             :         struct spdk_blob_md_descriptor_xattr *desc_xattr;
    5126             :         uint32_t i;
    5127             :         const char *type;
    5128             : 
    5129           0 :         desc_xattr = (struct spdk_blob_md_descriptor_xattr *)desc;
    5130             : 
    5131           0 :         if (desc_xattr->length !=
    5132           0 :             sizeof(desc_xattr->name_length) + sizeof(desc_xattr->value_length) +
    5133           0 :             desc_xattr->name_length + desc_xattr->value_length) {
    5134           0 :         }
    5135             : 
    5136           0 :         memcpy(ctx->xattr_name, desc_xattr->name, desc_xattr->name_length);
    5137           0 :         ctx->xattr_name[desc_xattr->name_length] = '\0';
    5138           0 :         if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
    5139           0 :                 type = "XATTR";
    5140           0 :         } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
    5141           0 :                 type = "XATTR_INTERNAL";
    5142           0 :         } else {
    5143           0 :                 assert(false);
    5144             :                 type = "XATTR_?";
    5145             :         }
    5146           0 :         fprintf(ctx->fp, "%s: name = \"%s\"\n", type, ctx->xattr_name);
    5147           0 :         fprintf(ctx->fp, "       value = \"");
    5148           0 :         ctx->print_xattr_fn(ctx->fp, ctx->super->bstype.bstype, ctx->xattr_name,
    5149           0 :                             (void *)((uintptr_t)desc_xattr->name + desc_xattr->name_length),
    5150           0 :                             desc_xattr->value_length);
    5151           0 :         fprintf(ctx->fp, "\"\n");
    5152           0 :         for (i = 0; i < desc_xattr->value_length; i++) {
    5153           0 :                 if (i % 16 == 0) {
    5154           0 :                         fprintf(ctx->fp, "               ");
    5155           0 :                 }
    5156           0 :                 fprintf(ctx->fp, "%02" PRIx8 " ", *((uint8_t *)desc_xattr->name + desc_xattr->name_length + i));
    5157           0 :                 if ((i + 1) % 16 == 0) {
    5158           0 :                         fprintf(ctx->fp, "\n");
    5159           0 :                 }
    5160           0 :         }
    5161           0 :         if (i % 16 != 0) {
    5162           0 :                 fprintf(ctx->fp, "\n");
    5163           0 :         }
    5164           0 : }
    5165             : 
    5166             : struct type_flag_desc {
    5167             :         uint64_t mask;
    5168             :         uint64_t val;
    5169             :         const char *name;
    5170             : };
    5171             : 
    5172             : static void
    5173           0 : bs_dump_print_type_bits(struct spdk_bs_load_ctx *ctx, uint64_t flags,
    5174             :                         struct type_flag_desc *desc, size_t numflags)
    5175             : {
    5176           0 :         uint64_t covered = 0;
    5177             :         size_t i;
    5178             : 
    5179           0 :         for (i = 0; i < numflags; i++) {
    5180           0 :                 if ((desc[i].mask & flags) != desc[i].val) {
    5181           0 :                         continue;
    5182             :                 }
    5183           0 :                 fprintf(ctx->fp, "\t\t 0x%016" PRIx64 " %s", desc[i].val, desc[i].name);
    5184           0 :                 if (desc[i].mask != desc[i].val) {
    5185           0 :                         fprintf(ctx->fp, " (mask 0x%" PRIx64 " value 0x%" PRIx64 ")",
    5186           0 :                                 desc[i].mask, desc[i].val);
    5187           0 :                 }
    5188           0 :                 fprintf(ctx->fp, "\n");
    5189           0 :                 covered |= desc[i].mask;
    5190           0 :         }
    5191           0 :         if ((flags & ~covered) != 0) {
    5192           0 :                 fprintf(ctx->fp, "\t\t 0x%016" PRIx64 " Unknown\n", flags & ~covered);
    5193           0 :         }
    5194           0 : }
    5195             : 
    5196             : static void
    5197           0 : bs_dump_print_type_flags(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_descriptor *desc)
    5198             : {
    5199             :         struct spdk_blob_md_descriptor_flags *type_desc;
    5200             : #define ADD_FLAG(f) { f, f, #f }
    5201             : #define ADD_MASK_VAL(m, v) { m, v, #v }
    5202             :         static struct type_flag_desc invalid[] = {
    5203             :                 ADD_FLAG(SPDK_BLOB_THIN_PROV),
    5204             :                 ADD_FLAG(SPDK_BLOB_INTERNAL_XATTR),
    5205             :                 ADD_FLAG(SPDK_BLOB_EXTENT_TABLE),
    5206             :         };
    5207             :         static struct type_flag_desc data_ro[] = {
    5208             :                 ADD_FLAG(SPDK_BLOB_READ_ONLY),
    5209             :         };
    5210             :         static struct type_flag_desc md_ro[] = {
    5211             :                 ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_DEFAULT),
    5212             :                 ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_NONE),
    5213             :                 ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_UNMAP),
    5214             :                 ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_WRITE_ZEROES),
    5215             :         };
    5216             : #undef ADD_FLAG
    5217             : #undef ADD_MASK_VAL
    5218             : 
    5219           0 :         type_desc = (struct spdk_blob_md_descriptor_flags *)desc;
    5220           0 :         fprintf(ctx->fp, "Flags:\n");
    5221           0 :         fprintf(ctx->fp, "\tinvalid: 0x%016" PRIx64 "\n", type_desc->invalid_flags);
    5222           0 :         bs_dump_print_type_bits(ctx, type_desc->invalid_flags, invalid,
    5223             :                                 SPDK_COUNTOF(invalid));
    5224           0 :         fprintf(ctx->fp, "\tdata_ro: 0x%016" PRIx64 "\n", type_desc->data_ro_flags);
    5225           0 :         bs_dump_print_type_bits(ctx, type_desc->data_ro_flags, data_ro,
    5226             :                                 SPDK_COUNTOF(data_ro));
    5227           0 :         fprintf(ctx->fp, "\t  md_ro: 0x%016" PRIx64 "\n", type_desc->md_ro_flags);
    5228           0 :         bs_dump_print_type_bits(ctx, type_desc->md_ro_flags, md_ro,
    5229             :                                 SPDK_COUNTOF(md_ro));
    5230           0 : }
    5231             : 
    5232             : static void
    5233           0 : bs_dump_print_extent_table(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_descriptor *desc)
    5234             : {
    5235             :         struct spdk_blob_md_descriptor_extent_table *et_desc;
    5236             :         uint64_t num_extent_pages;
    5237             :         uint32_t et_idx;
    5238             : 
    5239           0 :         et_desc = (struct spdk_blob_md_descriptor_extent_table *)desc;
    5240           0 :         num_extent_pages = (et_desc->length - sizeof(et_desc->num_clusters)) /
    5241             :                            sizeof(et_desc->extent_page[0]);
    5242             : 
    5243           0 :         fprintf(ctx->fp, "Extent table:\n");
    5244           0 :         for (et_idx = 0; et_idx < num_extent_pages; et_idx++) {
    5245           0 :                 if (et_desc->extent_page[et_idx].page_idx == 0) {
    5246             :                         /* Zeroes represent unallocated extent pages. */
    5247           0 :                         continue;
    5248             :                 }
    5249           0 :                 fprintf(ctx->fp, "\tExtent page: %5" PRIu32 " length %3" PRIu32
    5250           0 :                         " at LBA %" PRIu64 "\n", et_desc->extent_page[et_idx].page_idx,
    5251           0 :                         et_desc->extent_page[et_idx].num_pages,
    5252           0 :                         bs_md_page_to_lba(ctx->bs, et_desc->extent_page[et_idx].page_idx));
    5253           0 :         }
    5254           0 : }
    5255             : 
    5256             : static void
    5257           0 : bs_dump_print_md_page(struct spdk_bs_load_ctx *ctx)
    5258             : {
    5259           0 :         uint32_t page_idx = ctx->cur_page;
    5260           0 :         struct spdk_blob_md_page *page = ctx->page;
    5261             :         struct spdk_blob_md_descriptor *desc;
    5262           0 :         size_t cur_desc = 0;
    5263             :         uint32_t crc;
    5264             : 
    5265           0 :         fprintf(ctx->fp, "=========\n");
    5266           0 :         fprintf(ctx->fp, "Metadata Page Index: %" PRIu32 " (0x%" PRIx32 ")\n", page_idx, page_idx);
    5267           0 :         fprintf(ctx->fp, "Start LBA: %" PRIu64 "\n", bs_md_page_to_lba(ctx->bs, page_idx));
    5268           0 :         fprintf(ctx->fp, "Blob ID: 0x%" PRIx64 "\n", page->id);
    5269           0 :         fprintf(ctx->fp, "Sequence: %" PRIu32 "\n", page->sequence_num);
    5270           0 :         if (page->next == SPDK_INVALID_MD_PAGE) {
    5271           0 :                 fprintf(ctx->fp, "Next: None\n");
    5272           0 :         } else {
    5273           0 :                 fprintf(ctx->fp, "Next: %" PRIu32 "\n", page->next);
    5274             :         }
    5275           0 :         fprintf(ctx->fp, "In used bit array%s:", ctx->super->clean ? "" : " (not clean: dubious)");
    5276           0 :         if (spdk_bit_array_get(ctx->bs->used_md_pages, page_idx)) {
    5277           0 :                 fprintf(ctx->fp, " md");
    5278           0 :         }
    5279           0 :         if (spdk_bit_array_get(ctx->bs->used_blobids, page_idx)) {
    5280           0 :                 fprintf(ctx->fp, " blob");
    5281           0 :         }
    5282           0 :         fprintf(ctx->fp, "\n");
    5283             : 
    5284           0 :         crc = blob_md_page_calc_crc(page);
    5285           0 :         fprintf(ctx->fp, "CRC: 0x%" PRIx32 " (%s)\n", page->crc, crc == page->crc ? "OK" : "Mismatch");
    5286             : 
    5287           0 :         desc = (struct spdk_blob_md_descriptor *)page->descriptors;
    5288           0 :         while (cur_desc < sizeof(page->descriptors)) {
    5289           0 :                 if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
    5290           0 :                         if (desc->length == 0) {
    5291             :                                 /* If padding and length are 0, this terminates the page */
    5292           0 :                                 break;
    5293             :                         }
    5294           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE) {
    5295             :                         struct spdk_blob_md_descriptor_extent_rle       *desc_extent_rle;
    5296             :                         unsigned int                            i;
    5297             : 
    5298           0 :                         desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)desc;
    5299             : 
    5300           0 :                         for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
    5301           0 :                                 if (desc_extent_rle->extents[i].cluster_idx != 0) {
    5302           0 :                                         fprintf(ctx->fp, "Allocated Extent - Start: %" PRIu32,
    5303           0 :                                                 desc_extent_rle->extents[i].cluster_idx);
    5304           0 :                                 } else {
    5305           0 :                                         fprintf(ctx->fp, "Unallocated Extent - ");
    5306             :                                 }
    5307           0 :                                 fprintf(ctx->fp, " Length: %" PRIu32, desc_extent_rle->extents[i].length);
    5308           0 :                                 fprintf(ctx->fp, "\n");
    5309           0 :                         }
    5310           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
    5311             :                         struct spdk_blob_md_descriptor_extent_page      *desc_extent;
    5312             :                         unsigned int                                    i;
    5313             : 
    5314           0 :                         desc_extent = (struct spdk_blob_md_descriptor_extent_page *)desc;
    5315             : 
    5316           0 :                         for (i = 0; i < desc_extent->length / sizeof(desc_extent->cluster_idx[0]); i++) {
    5317           0 :                                 if (desc_extent->cluster_idx[i] != 0) {
    5318           0 :                                         fprintf(ctx->fp, "Allocated Extent - Start: %" PRIu32,
    5319           0 :                                                 desc_extent->cluster_idx[i]);
    5320           0 :                                 } else {
    5321           0 :                                         fprintf(ctx->fp, "Unallocated Extent");
    5322             :                                 }
    5323           0 :                                 fprintf(ctx->fp, "\n");
    5324           0 :                         }
    5325           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
    5326           0 :                         bs_dump_print_xattr(ctx, desc);
    5327           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
    5328           0 :                         bs_dump_print_xattr(ctx, desc);
    5329           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
    5330           0 :                         bs_dump_print_type_flags(ctx, desc);
    5331           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
    5332           0 :                         bs_dump_print_extent_table(ctx, desc);
    5333           0 :                 } else {
    5334             :                         /* Error */
    5335           0 :                         fprintf(ctx->fp, "Unknown descriptor type %" PRIu8 "\n", desc->type);
    5336             :                 }
    5337             :                 /* Advance to the next descriptor */
    5338           0 :                 cur_desc += sizeof(*desc) + desc->length;
    5339           0 :                 if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
    5340           0 :                         break;
    5341             :                 }
    5342           0 :                 desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
    5343             :         }
    5344           0 : }
    5345             : 
    5346             : static void
    5347           0 : bs_dump_read_md_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5348             : {
    5349           0 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5350             : 
    5351           0 :         if (bserrno != 0) {
    5352           0 :                 bs_dump_finish(seq, ctx, bserrno);
    5353           0 :                 return;
    5354             :         }
    5355             : 
    5356           0 :         if (ctx->page->id != 0) {
    5357           0 :                 bs_dump_print_md_page(ctx);
    5358           0 :         }
    5359             : 
    5360           0 :         ctx->cur_page++;
    5361             : 
    5362           0 :         if (ctx->cur_page < ctx->super->md_len) {
    5363           0 :                 bs_dump_read_md_page(seq, ctx);
    5364           0 :         } else {
    5365           0 :                 spdk_free(ctx->page);
    5366           0 :                 bs_dump_finish(seq, ctx, 0);
    5367             :         }
    5368           0 : }
    5369             : 
    5370             : static void
    5371           0 : bs_dump_read_md_page(spdk_bs_sequence_t *seq, void *cb_arg)
    5372             : {
    5373           0 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5374             :         uint64_t lba;
    5375             : 
    5376           0 :         assert(ctx->cur_page < ctx->super->md_len);
    5377           0 :         lba = bs_page_to_lba(ctx->bs, ctx->super->md_start + ctx->cur_page);
    5378           0 :         bs_sequence_read_dev(seq, ctx->page, lba,
    5379           0 :                              bs_byte_to_lba(ctx->bs, ctx->super->md_page_size),
    5380           0 :                              bs_dump_read_md_page_cpl, ctx);
    5381           0 : }
    5382             : 
    5383             : static void
    5384           0 : bs_dump_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5385             : {
    5386           0 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5387             :         int rc;
    5388             : 
    5389           0 :         fprintf(ctx->fp, "Signature: \"%.8s\" ", ctx->super->signature);
    5390           0 :         if (memcmp(ctx->super->signature, SPDK_BS_SUPER_BLOCK_SIG,
    5391           0 :                    sizeof(ctx->super->signature)) != 0) {
    5392           0 :                 fprintf(ctx->fp, "(Mismatch)\n");
    5393           0 :                 bs_dump_finish(seq, ctx, bserrno);
    5394           0 :                 return;
    5395             :         } else {
    5396           0 :                 fprintf(ctx->fp, "(OK)\n");
    5397             :         }
    5398           0 :         fprintf(ctx->fp, "Version: %" PRIu32 "\n", ctx->super->version);
    5399           0 :         fprintf(ctx->fp, "CRC: 0x%x (%s)\n", ctx->super->crc,
    5400           0 :                 (ctx->super->crc == blob_md_page_calc_crc(ctx->super)) ? "OK" : "Mismatch");
    5401           0 :         fprintf(ctx->fp, "Blobstore Type: %.*s\n", SPDK_BLOBSTORE_TYPE_LENGTH, ctx->super->bstype.bstype);
    5402           0 :         fprintf(ctx->fp, "Cluster Size: %" PRIu32 "\n", ctx->super->cluster_size);
    5403           0 :         fprintf(ctx->fp, "Super Blob ID: ");
    5404           0 :         if (ctx->super->super_blob == SPDK_BLOBID_INVALID) {
    5405           0 :                 fprintf(ctx->fp, "(None)\n");
    5406           0 :         } else {
    5407           0 :                 fprintf(ctx->fp, "0x%" PRIx64 "\n", ctx->super->super_blob);
    5408             :         }
    5409           0 :         fprintf(ctx->fp, "Clean: %" PRIu32 "\n", ctx->super->clean);
    5410           0 :         fprintf(ctx->fp, "Used Metadata Page Mask Start: %" PRIu32 "\n", ctx->super->used_page_mask_start);
    5411           0 :         fprintf(ctx->fp, "Used Metadata Page Mask Length: %" PRIu32 "\n", ctx->super->used_page_mask_len);
    5412           0 :         fprintf(ctx->fp, "Used Cluster Mask Start: %" PRIu32 "\n", ctx->super->used_cluster_mask_start);
    5413           0 :         fprintf(ctx->fp, "Used Cluster Mask Length: %" PRIu32 "\n", ctx->super->used_cluster_mask_len);
    5414           0 :         fprintf(ctx->fp, "Used Blob ID Mask Start: %" PRIu32 "\n", ctx->super->used_blobid_mask_start);
    5415           0 :         fprintf(ctx->fp, "Used Blob ID Mask Length: %" PRIu32 "\n", ctx->super->used_blobid_mask_len);
    5416           0 :         fprintf(ctx->fp, "Metadata Start: %" PRIu32 "\n", ctx->super->md_start);
    5417           0 :         fprintf(ctx->fp, "Metadata Length: %" PRIu32 "\n", ctx->super->md_len);
    5418             : 
    5419           0 :         ctx->cur_page = 0;
    5420           0 :         ctx->page = spdk_zmalloc(ctx->super->md_page_size, 0,
    5421             :                                  NULL, SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    5422           0 :         if (!ctx->page) {
    5423           0 :                 bs_dump_finish(seq, ctx, -ENOMEM);
    5424           0 :                 return;
    5425             :         }
    5426             : 
    5427           0 :         rc = bs_parse_super(ctx);
    5428           0 :         if (rc < 0) {
    5429           0 :                 bs_load_ctx_fail(ctx, rc);
    5430           0 :                 return;
    5431             :         }
    5432             : 
    5433           0 :         bs_load_read_used_pages(ctx);
    5434           0 : }
    5435             : 
    5436             : void
    5437           0 : spdk_bs_dump(struct spdk_bs_dev *dev, FILE *fp, spdk_bs_dump_print_xattr print_xattr_fn,
    5438             :              spdk_bs_op_complete cb_fn, void *cb_arg)
    5439             : {
    5440             :         struct spdk_blob_store  *bs;
    5441             :         struct spdk_bs_cpl      cpl;
    5442             :         struct spdk_bs_load_ctx *ctx;
    5443           0 :         struct spdk_bs_opts     opts = {};
    5444             :         int err;
    5445             : 
    5446           0 :         SPDK_DEBUGLOG(blob, "Dumping blobstore from dev %p\n", dev);
    5447             : 
    5448           0 :         spdk_bs_opts_init(&opts, sizeof(opts));
    5449             : 
    5450           0 :         err = bs_alloc(dev, &opts, &bs, &ctx);
    5451           0 :         if (err) {
    5452           0 :                 dev->destroy(dev);
    5453           0 :                 cb_fn(cb_arg, err);
    5454           0 :                 return;
    5455             :         }
    5456             : 
    5457           0 :         ctx->dumping = true;
    5458           0 :         ctx->fp = fp;
    5459           0 :         ctx->print_xattr_fn = print_xattr_fn;
    5460             : 
    5461           0 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    5462           0 :         cpl.u.bs_basic.cb_fn = cb_fn;
    5463           0 :         cpl.u.bs_basic.cb_arg = cb_arg;
    5464             : 
    5465           0 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5466           0 :         if (!ctx->seq) {
    5467           0 :                 spdk_free(ctx->super);
    5468           0 :                 free(ctx);
    5469           0 :                 bs_free(bs);
    5470           0 :                 cb_fn(cb_arg, -ENOMEM);
    5471           0 :                 return;
    5472             :         }
    5473             : 
    5474             :         /* Read the super block */
    5475           0 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    5476           0 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    5477           0 :                              bs_dump_super_cpl, ctx);
    5478           0 : }
    5479             : 
    5480             : /* END spdk_bs_dump */
    5481             : 
    5482             : /* START spdk_bs_init */
    5483             : 
    5484             : static void
    5485         592 : bs_init_persist_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5486             : {
    5487         592 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5488             : 
    5489         592 :         ctx->bs->used_clusters = spdk_bit_pool_create_from_array(ctx->used_clusters);
    5490         592 :         spdk_free(ctx->super);
    5491         592 :         free(ctx);
    5492             : 
    5493         592 :         bs_sequence_finish(seq, bserrno);
    5494         592 : }
    5495             : 
    5496             : static void
    5497         592 : bs_init_trim_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5498             : {
    5499         592 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5500             : 
    5501             :         /* Write super block */
    5502        1184 :         bs_sequence_write_dev(seq, ctx->super, bs_page_to_lba(ctx->bs, 0),
    5503         592 :                               bs_byte_to_lba(ctx->bs, sizeof(*ctx->super)),
    5504         592 :                               bs_init_persist_super_cpl, ctx);
    5505         592 : }
    5506             : 
    5507             : void
    5508         612 : spdk_bs_init(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
    5509             :              spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
    5510             : {
    5511             :         struct spdk_bs_load_ctx *ctx;
    5512             :         struct spdk_blob_store  *bs;
    5513             :         struct spdk_bs_cpl      cpl;
    5514             :         spdk_bs_sequence_t      *seq;
    5515             :         spdk_bs_batch_t         *batch;
    5516             :         uint64_t                num_md_lba;
    5517             :         uint64_t                num_md_pages;
    5518             :         uint64_t                num_md_clusters;
    5519             :         uint64_t                max_used_cluster_mask_len;
    5520             :         uint32_t                i;
    5521         612 :         struct spdk_bs_opts     opts = {};
    5522             :         int                     rc;
    5523             :         uint64_t                lba, lba_count;
    5524             : 
    5525         612 :         SPDK_DEBUGLOG(blob, "Initializing blobstore on dev %p\n", dev);
    5526         612 :         if ((dev->phys_blocklen % dev->blocklen) != 0) {
    5527           5 :                 SPDK_ERRLOG("unsupported dev block length of %d\n",
    5528             :                             dev->blocklen);
    5529           5 :                 dev->destroy(dev);
    5530           5 :                 cb_fn(cb_arg, NULL, -EINVAL);
    5531           5 :                 return;
    5532             :         }
    5533             : 
    5534         607 :         spdk_bs_opts_init(&opts, sizeof(opts));
    5535         607 :         if (o) {
    5536         228 :                 if (bs_opts_copy(o, &opts)) {
    5537           0 :                         dev->destroy(dev);
    5538           0 :                         cb_fn(cb_arg, NULL, -EINVAL);
    5539           0 :                         return;
    5540             :                 }
    5541         228 :         }
    5542             : 
    5543         607 :         if (bs_opts_verify(&opts) != 0) {
    5544          10 :                 dev->destroy(dev);
    5545          10 :                 cb_fn(cb_arg, NULL, -EINVAL);
    5546          10 :                 return;
    5547             :         }
    5548             : 
    5549         597 :         rc = bs_alloc(dev, &opts, &bs, &ctx);
    5550         597 :         if (rc) {
    5551           0 :                 dev->destroy(dev);
    5552           0 :                 cb_fn(cb_arg, NULL, rc);
    5553           0 :                 return;
    5554             :         }
    5555             : 
    5556         597 :         if (opts.num_md_pages == SPDK_BLOB_OPTS_NUM_MD_PAGES) {
    5557             :                 /* By default, allocate 1 page per cluster.
    5558             :                  * Technically, this over-allocates metadata
    5559             :                  * because more metadata will reduce the number
    5560             :                  * of usable clusters. This can be addressed with
    5561             :                  * more complex math in the future.
    5562             :                  */
    5563         587 :                 bs->md_len = bs->total_clusters;
    5564         587 :         } else {
    5565          10 :                 bs->md_len = opts.num_md_pages;
    5566             :         }
    5567         597 :         rc = spdk_bit_array_resize(&bs->used_md_pages, bs->md_len);
    5568         597 :         if (rc < 0) {
    5569           0 :                 spdk_free(ctx->super);
    5570           0 :                 free(ctx);
    5571           0 :                 bs_free(bs);
    5572           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5573           0 :                 return;
    5574             :         }
    5575             : 
    5576         597 :         rc = spdk_bit_array_resize(&bs->used_blobids, bs->md_len);
    5577         597 :         if (rc < 0) {
    5578           0 :                 spdk_free(ctx->super);
    5579           0 :                 free(ctx);
    5580           0 :                 bs_free(bs);
    5581           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5582           0 :                 return;
    5583             :         }
    5584             : 
    5585         597 :         rc = spdk_bit_array_resize(&bs->open_blobids, bs->md_len);
    5586         597 :         if (rc < 0) {
    5587           0 :                 spdk_free(ctx->super);
    5588           0 :                 free(ctx);
    5589           0 :                 bs_free(bs);
    5590           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5591           0 :                 return;
    5592             :         }
    5593             : 
    5594         597 :         memcpy(ctx->super->signature, SPDK_BS_SUPER_BLOCK_SIG,
    5595             :                sizeof(ctx->super->signature));
    5596         597 :         ctx->super->version = SPDK_BS_VERSION;
    5597         597 :         ctx->super->length = sizeof(*ctx->super);
    5598         597 :         ctx->super->super_blob = bs->super_blob;
    5599         597 :         ctx->super->clean = 0;
    5600         597 :         ctx->super->cluster_size = bs->cluster_sz;
    5601         597 :         ctx->super->io_unit_size = bs->io_unit_size;
    5602         597 :         ctx->super->md_page_size = bs->md_page_size;
    5603         597 :         memcpy(&ctx->super->bstype, &bs->bstype, sizeof(bs->bstype));
    5604             : 
    5605             :         /* Calculate how many pages the metadata consumes at the front
    5606             :          * of the disk.
    5607             :          */
    5608             : 
    5609             :         /* The super block uses 1 page */
    5610         597 :         num_md_pages = 1;
    5611             : 
    5612             :         /* The used_md_pages mask requires 1 bit per metadata page, rounded
    5613             :          * up to the nearest page, plus a header.
    5614             :          */
    5615         597 :         ctx->super->used_page_mask_start = num_md_pages;
    5616        1194 :         ctx->super->used_page_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    5617         597 :                                          spdk_divide_round_up(bs->md_len, 8),
    5618         597 :                                          ctx->super->md_page_size);
    5619         597 :         num_md_pages += ctx->super->used_page_mask_len;
    5620             : 
    5621             :         /* The used_clusters mask requires 1 bit per cluster, rounded
    5622             :          * up to the nearest page, plus a header.
    5623             :          */
    5624         597 :         ctx->super->used_cluster_mask_start = num_md_pages;
    5625        1194 :         ctx->super->used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    5626         597 :                                             spdk_divide_round_up(bs->total_clusters, 8),
    5627         597 :                                             ctx->super->md_page_size);
    5628             :         /* The blobstore might be extended, then the used_cluster bitmap will need more space.
    5629             :          * Here we calculate the max clusters we can support according to the
    5630             :          * num_md_pages (bs->md_len).
    5631             :          */
    5632        1194 :         max_used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    5633         597 :                                     spdk_divide_round_up(bs->md_len, 8),
    5634         597 :                                     ctx->super->md_page_size);
    5635         597 :         max_used_cluster_mask_len = spdk_max(max_used_cluster_mask_len,
    5636             :                                              ctx->super->used_cluster_mask_len);
    5637         597 :         num_md_pages += max_used_cluster_mask_len;
    5638             : 
    5639             :         /* The used_blobids mask requires 1 bit per metadata page, rounded
    5640             :          * up to the nearest page, plus a header.
    5641             :          */
    5642         597 :         ctx->super->used_blobid_mask_start = num_md_pages;
    5643        1194 :         ctx->super->used_blobid_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    5644         597 :                                            spdk_divide_round_up(bs->md_len, 8),
    5645         597 :                                            ctx->super->md_page_size);
    5646         597 :         num_md_pages += ctx->super->used_blobid_mask_len;
    5647             : 
    5648             :         /* The metadata region size was chosen above */
    5649         597 :         ctx->super->md_start = bs->md_start = num_md_pages;
    5650         597 :         ctx->super->md_len = bs->md_len;
    5651         597 :         num_md_pages += bs->md_len;
    5652             : 
    5653         597 :         num_md_lba = bs_page_to_lba(bs, num_md_pages);
    5654             : 
    5655         597 :         ctx->super->size = dev->blockcnt * dev->blocklen;
    5656             : 
    5657         597 :         ctx->super->crc = blob_md_page_calc_crc(ctx->super);
    5658             : 
    5659         597 :         num_md_clusters = spdk_divide_round_up(num_md_pages, bs->pages_per_cluster);
    5660         597 :         if (num_md_clusters > bs->total_clusters) {
    5661           5 :                 SPDK_ERRLOG("Blobstore metadata cannot use more clusters than is available, "
    5662             :                             "please decrease number of pages reserved for metadata "
    5663             :                             "or increase cluster size.\n");
    5664           5 :                 spdk_free(ctx->super);
    5665           5 :                 spdk_bit_array_free(&ctx->used_clusters);
    5666           5 :                 free(ctx);
    5667           5 :                 bs_free(bs);
    5668           5 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5669           5 :                 return;
    5670             :         }
    5671             :         /* Claim all of the clusters used by the metadata */
    5672       79368 :         for (i = 0; i < num_md_clusters; i++) {
    5673       78776 :                 spdk_bit_array_set(ctx->used_clusters, i);
    5674       78776 :         }
    5675             : 
    5676         592 :         bs->num_free_clusters -= num_md_clusters;
    5677         592 :         bs->total_data_clusters = bs->num_free_clusters;
    5678             : 
    5679         592 :         cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
    5680         592 :         cpl.u.bs_handle.cb_fn = cb_fn;
    5681         592 :         cpl.u.bs_handle.cb_arg = cb_arg;
    5682         592 :         cpl.u.bs_handle.bs = bs;
    5683             : 
    5684         592 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5685         592 :         if (!seq) {
    5686           0 :                 spdk_free(ctx->super);
    5687           0 :                 free(ctx);
    5688           0 :                 bs_free(bs);
    5689           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5690           0 :                 return;
    5691             :         }
    5692             : 
    5693         592 :         batch = bs_sequence_to_batch(seq, bs_init_trim_cpl, ctx);
    5694             : 
    5695             :         /* Clear metadata space */
    5696         592 :         bs_batch_write_zeroes_dev(batch, 0, num_md_lba);
    5697             : 
    5698         592 :         lba = num_md_lba;
    5699         592 :         lba_count = ctx->bs->dev->blockcnt - lba;
    5700         592 :         switch (opts.clear_method) {
    5701             :         case BS_CLEAR_WITH_UNMAP:
    5702             :                 /* Trim data clusters */
    5703         572 :                 bs_batch_unmap_dev(batch, lba, lba_count);
    5704         572 :                 break;
    5705             :         case BS_CLEAR_WITH_WRITE_ZEROES:
    5706             :                 /* Write_zeroes to data clusters */
    5707           0 :                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    5708           0 :                 break;
    5709          20 :         case BS_CLEAR_WITH_NONE:
    5710             :         default:
    5711          20 :                 break;
    5712             :         }
    5713             : 
    5714         592 :         bs_batch_close(batch);
    5715         612 : }
    5716             : 
    5717             : /* END spdk_bs_init */
    5718             : 
    5719             : /* START spdk_bs_destroy */
    5720             : 
    5721             : static void
    5722          10 : bs_destroy_trim_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5723             : {
    5724          10 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5725          10 :         struct spdk_blob_store *bs = ctx->bs;
    5726             : 
    5727          10 :         free(ctx);
    5728             : 
    5729          10 :         if (bserrno != 0) {
    5730           5 :                 bs_sequence_finish(seq, bserrno);
    5731           5 :                 return;
    5732             :         }
    5733             : 
    5734             :         /*
    5735             :          * We need to defer calling bs_call_cpl() until after
    5736             :          * dev destruction, so tuck these away for later use.
    5737             :          */
    5738           5 :         bs->unload_err = bserrno;
    5739           5 :         memcpy(&bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
    5740           5 :         seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
    5741           5 :         bs_sequence_finish(seq, bserrno);
    5742             : 
    5743           5 :         bs_free(bs);
    5744          10 : }
    5745             : 
    5746             : void
    5747          10 : spdk_bs_destroy(struct spdk_blob_store *bs, spdk_bs_op_complete cb_fn,
    5748             :                 void *cb_arg)
    5749             : {
    5750             :         struct spdk_bs_cpl      cpl;
    5751             :         spdk_bs_sequence_t      *seq;
    5752             :         struct spdk_bs_load_ctx *ctx;
    5753             : 
    5754          10 :         SPDK_DEBUGLOG(blob, "Destroying blobstore\n");
    5755             : 
    5756          10 :         if (!RB_EMPTY(&bs->open_blobs)) {
    5757           0 :                 SPDK_ERRLOG("Blobstore still has open blobs\n");
    5758           0 :                 cb_fn(cb_arg, -EBUSY);
    5759           0 :                 return;
    5760             :         }
    5761             : 
    5762          10 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    5763          10 :         cpl.u.bs_basic.cb_fn = cb_fn;
    5764          10 :         cpl.u.bs_basic.cb_arg = cb_arg;
    5765             : 
    5766          10 :         ctx = calloc(1, sizeof(*ctx));
    5767          10 :         if (!ctx) {
    5768           0 :                 cb_fn(cb_arg, -ENOMEM);
    5769           0 :                 return;
    5770             :         }
    5771             : 
    5772          10 :         ctx->bs = bs;
    5773             : 
    5774          10 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5775          10 :         if (!seq) {
    5776           0 :                 free(ctx);
    5777           0 :                 cb_fn(cb_arg, -ENOMEM);
    5778           0 :                 return;
    5779             :         }
    5780             : 
    5781             :         /* Write zeroes to the super block */
    5782          20 :         bs_sequence_write_zeroes_dev(seq,
    5783          10 :                                      bs_page_to_lba(bs, 0),
    5784          10 :                                      bs_byte_to_lba(bs, sizeof(struct spdk_bs_super_block)),
    5785          10 :                                      bs_destroy_trim_cpl, ctx);
    5786          10 : }
    5787             : 
    5788             : /* END spdk_bs_destroy */
    5789             : 
    5790             : /* START spdk_bs_unload */
    5791             : 
    5792             : static void
    5793         829 : bs_unload_finish(struct spdk_bs_load_ctx *ctx, int bserrno)
    5794             : {
    5795         829 :         spdk_bs_sequence_t *seq = ctx->seq;
    5796         829 :         struct spdk_blob_store *bs = ctx->bs;
    5797             : 
    5798         829 :         spdk_free(ctx->super);
    5799         829 :         free(ctx);
    5800             : 
    5801         829 :         if (bserrno != 0) {
    5802          10 :                 bs_sequence_finish(seq, bserrno);
    5803          10 :                 return;
    5804             :         }
    5805             : 
    5806             :         /*
    5807             :          * We need to defer calling bs_call_cpl() until after
    5808             :          * dev destruction, so tuck these away for later use.
    5809             :          */
    5810         819 :         bs->unload_err = bserrno;
    5811         819 :         memcpy(&bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
    5812         819 :         seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
    5813         819 :         bs_sequence_finish(seq, bserrno);
    5814             : 
    5815         819 :         bs_free(bs);
    5816         829 : }
    5817             : 
    5818             : static void
    5819         819 : bs_unload_write_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5820             : {
    5821         819 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5822             : 
    5823         819 :         bs_unload_finish(ctx, bserrno);
    5824         819 : }
    5825             : 
    5826             : static void
    5827         819 : bs_unload_write_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5828             : {
    5829         819 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5830             : 
    5831         819 :         spdk_free(ctx->mask);
    5832         819 :         ctx->mask = NULL;
    5833             : 
    5834         819 :         if (bserrno != 0) {
    5835           0 :                 bs_unload_finish(ctx, bserrno);
    5836           0 :                 return;
    5837             :         }
    5838             : 
    5839         819 :         ctx->super->clean = 1;
    5840             : 
    5841         819 :         bs_write_super(seq, ctx->bs, ctx->super, bs_unload_write_super_cpl, ctx);
    5842         819 : }
    5843             : 
    5844             : static void
    5845         819 : bs_unload_write_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5846             : {
    5847         819 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5848             : 
    5849         819 :         spdk_free(ctx->mask);
    5850         819 :         ctx->mask = NULL;
    5851             : 
    5852         819 :         if (bserrno != 0) {
    5853           0 :                 bs_unload_finish(ctx, bserrno);
    5854           0 :                 return;
    5855             :         }
    5856             : 
    5857         819 :         bs_write_used_clusters(seq, ctx, bs_unload_write_used_clusters_cpl);
    5858         819 : }
    5859             : 
    5860             : static void
    5861         829 : bs_unload_write_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5862             : {
    5863         829 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5864             : 
    5865         829 :         spdk_free(ctx->mask);
    5866         829 :         ctx->mask = NULL;
    5867             : 
    5868         829 :         if (bserrno != 0) {
    5869          10 :                 bs_unload_finish(ctx, bserrno);
    5870          10 :                 return;
    5871             :         }
    5872             : 
    5873         819 :         bs_write_used_blobids(seq, ctx, bs_unload_write_used_blobids_cpl);
    5874         829 : }
    5875             : 
    5876             : static void
    5877         829 : bs_unload_read_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5878             : {
    5879         829 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5880             :         int rc;
    5881             : 
    5882         829 :         if (bserrno != 0) {
    5883           0 :                 bs_unload_finish(ctx, bserrno);
    5884           0 :                 return;
    5885             :         }
    5886             : 
    5887         829 :         rc = bs_super_validate(ctx->super, ctx->bs);
    5888         829 :         if (rc != 0) {
    5889           0 :                 bs_unload_finish(ctx, rc);
    5890           0 :                 return;
    5891             :         }
    5892             : 
    5893         829 :         bs_write_used_md(seq, cb_arg, bs_unload_write_used_pages_cpl);
    5894         829 : }
    5895             : 
    5896             : void
    5897         839 : spdk_bs_unload(struct spdk_blob_store *bs, spdk_bs_op_complete cb_fn, void *cb_arg)
    5898             : {
    5899             :         struct spdk_bs_cpl      cpl;
    5900             :         struct spdk_bs_load_ctx *ctx;
    5901             : 
    5902         839 :         SPDK_DEBUGLOG(blob, "Syncing blobstore\n");
    5903             : 
    5904             :         /*
    5905             :          * If external snapshot channels are being destroyed while the blobstore is unloaded, the
    5906             :          * unload is deferred until after the channel destruction completes.
    5907             :          */
    5908         839 :         if (bs->esnap_channels_unloading != 0) {
    5909           5 :                 if (bs->esnap_unload_cb_fn != NULL) {
    5910           0 :                         SPDK_ERRLOG("Blobstore unload in progress\n");
    5911           0 :                         cb_fn(cb_arg, -EBUSY);
    5912           0 :                         return;
    5913             :                 }
    5914           5 :                 SPDK_DEBUGLOG(blob_esnap, "Blobstore unload deferred: %" PRIu32
    5915             :                               " esnap clones are unloading\n", bs->esnap_channels_unloading);
    5916           5 :                 bs->esnap_unload_cb_fn = cb_fn;
    5917           5 :                 bs->esnap_unload_cb_arg = cb_arg;
    5918           5 :                 return;
    5919             :         }
    5920         834 :         if (bs->esnap_unload_cb_fn != NULL) {
    5921           5 :                 SPDK_DEBUGLOG(blob_esnap, "Blobstore deferred unload progressing\n");
    5922           5 :                 assert(bs->esnap_unload_cb_fn == cb_fn);
    5923           5 :                 assert(bs->esnap_unload_cb_arg == cb_arg);
    5924           5 :                 bs->esnap_unload_cb_fn = NULL;
    5925           5 :                 bs->esnap_unload_cb_arg = NULL;
    5926           5 :         }
    5927             : 
    5928         834 :         if (!RB_EMPTY(&bs->open_blobs)) {
    5929           5 :                 SPDK_ERRLOG("Blobstore still has open blobs\n");
    5930           5 :                 cb_fn(cb_arg, -EBUSY);
    5931           5 :                 return;
    5932             :         }
    5933             : 
    5934         829 :         ctx = calloc(1, sizeof(*ctx));
    5935         829 :         if (!ctx) {
    5936           0 :                 cb_fn(cb_arg, -ENOMEM);
    5937           0 :                 return;
    5938             :         }
    5939             : 
    5940         829 :         ctx->bs = bs;
    5941             : 
    5942         829 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    5943             :                                   SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    5944         829 :         if (!ctx->super) {
    5945           0 :                 free(ctx);
    5946           0 :                 cb_fn(cb_arg, -ENOMEM);
    5947           0 :                 return;
    5948             :         }
    5949             : 
    5950         829 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    5951         829 :         cpl.u.bs_basic.cb_fn = cb_fn;
    5952         829 :         cpl.u.bs_basic.cb_arg = cb_arg;
    5953             : 
    5954         829 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5955         829 :         if (!ctx->seq) {
    5956           0 :                 spdk_free(ctx->super);
    5957           0 :                 free(ctx);
    5958           0 :                 cb_fn(cb_arg, -ENOMEM);
    5959           0 :                 return;
    5960             :         }
    5961             : 
    5962             :         /* Read super block */
    5963        1658 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    5964         829 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    5965         829 :                              bs_unload_read_super_cpl, ctx);
    5966         839 : }
    5967             : 
    5968             : /* END spdk_bs_unload */
    5969             : 
    5970             : /* START spdk_bs_set_super */
    5971             : 
    5972             : struct spdk_bs_set_super_ctx {
    5973             :         struct spdk_blob_store          *bs;
    5974             :         struct spdk_bs_super_block      *super;
    5975             : };
    5976             : 
    5977             : static void
    5978          10 : bs_set_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5979             : {
    5980          10 :         struct spdk_bs_set_super_ctx    *ctx = cb_arg;
    5981             : 
    5982          10 :         if (bserrno != 0) {
    5983           0 :                 SPDK_ERRLOG("Unable to write to super block of blobstore\n");
    5984           0 :         }
    5985             : 
    5986          10 :         spdk_free(ctx->super);
    5987             : 
    5988          10 :         bs_sequence_finish(seq, bserrno);
    5989             : 
    5990          10 :         free(ctx);
    5991          10 : }
    5992             : 
    5993             : static void
    5994          10 : bs_set_super_read_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5995             : {
    5996          10 :         struct spdk_bs_set_super_ctx    *ctx = cb_arg;
    5997             :         int rc;
    5998             : 
    5999          10 :         if (bserrno != 0) {
    6000           0 :                 SPDK_ERRLOG("Unable to read super block of blobstore\n");
    6001           0 :                 spdk_free(ctx->super);
    6002           0 :                 bs_sequence_finish(seq, bserrno);
    6003           0 :                 free(ctx);
    6004           0 :                 return;
    6005             :         }
    6006             : 
    6007          10 :         rc = bs_super_validate(ctx->super, ctx->bs);
    6008          10 :         if (rc != 0) {
    6009           0 :                 SPDK_ERRLOG("Not a valid super block\n");
    6010           0 :                 spdk_free(ctx->super);
    6011           0 :                 bs_sequence_finish(seq, rc);
    6012           0 :                 free(ctx);
    6013           0 :                 return;
    6014             :         }
    6015             : 
    6016          10 :         bs_write_super(seq, ctx->bs, ctx->super, bs_set_super_write_cpl, ctx);
    6017          10 : }
    6018             : 
    6019             : void
    6020          10 : spdk_bs_set_super(struct spdk_blob_store *bs, spdk_blob_id blobid,
    6021             :                   spdk_bs_op_complete cb_fn, void *cb_arg)
    6022             : {
    6023             :         struct spdk_bs_cpl              cpl;
    6024             :         spdk_bs_sequence_t              *seq;
    6025             :         struct spdk_bs_set_super_ctx    *ctx;
    6026             : 
    6027          10 :         SPDK_DEBUGLOG(blob, "Setting super blob id on blobstore\n");
    6028             : 
    6029          10 :         ctx = calloc(1, sizeof(*ctx));
    6030          10 :         if (!ctx) {
    6031           0 :                 cb_fn(cb_arg, -ENOMEM);
    6032           0 :                 return;
    6033             :         }
    6034             : 
    6035          10 :         ctx->bs = bs;
    6036             : 
    6037          10 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    6038             :                                   SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    6039          10 :         if (!ctx->super) {
    6040           0 :                 free(ctx);
    6041           0 :                 cb_fn(cb_arg, -ENOMEM);
    6042           0 :                 return;
    6043             :         }
    6044             : 
    6045          10 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    6046          10 :         cpl.u.bs_basic.cb_fn = cb_fn;
    6047          10 :         cpl.u.bs_basic.cb_arg = cb_arg;
    6048             : 
    6049          10 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    6050          10 :         if (!seq) {
    6051           0 :                 spdk_free(ctx->super);
    6052           0 :                 free(ctx);
    6053           0 :                 cb_fn(cb_arg, -ENOMEM);
    6054           0 :                 return;
    6055             :         }
    6056             : 
    6057          10 :         bs->super_blob = blobid;
    6058             : 
    6059             :         /* Read super block */
    6060          20 :         bs_sequence_read_dev(seq, ctx->super, bs_page_to_lba(bs, 0),
    6061          10 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    6062          10 :                              bs_set_super_read_cpl, ctx);
    6063          10 : }
    6064             : 
    6065             : /* END spdk_bs_set_super */
    6066             : 
    6067             : void
    6068          15 : spdk_bs_get_super(struct spdk_blob_store *bs,
    6069             :                   spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6070             : {
    6071          15 :         if (bs->super_blob == SPDK_BLOBID_INVALID) {
    6072           5 :                 cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOENT);
    6073           5 :         } else {
    6074          10 :                 cb_fn(cb_arg, bs->super_blob, 0);
    6075             :         }
    6076          15 : }
    6077             : 
    6078             : uint64_t
    6079         254 : spdk_bs_get_cluster_size(struct spdk_blob_store *bs)
    6080             : {
    6081         254 :         return bs->cluster_sz;
    6082             : }
    6083             : 
    6084             : uint64_t
    6085         114 : spdk_bs_get_page_size(struct spdk_blob_store *bs)
    6086             : {
    6087         114 :         return bs->md_page_size;
    6088             : }
    6089             : 
    6090             : uint64_t
    6091        1006 : spdk_bs_get_io_unit_size(struct spdk_blob_store *bs)
    6092             : {
    6093        1006 :         return bs->io_unit_size;
    6094             : }
    6095             : 
    6096             : uint64_t
    6097         700 : spdk_bs_free_cluster_count(struct spdk_blob_store *bs)
    6098             : {
    6099         700 :         return bs->num_free_clusters;
    6100             : }
    6101             : 
    6102             : uint64_t
    6103         194 : spdk_bs_total_data_cluster_count(struct spdk_blob_store *bs)
    6104             : {
    6105         194 :         return bs->total_data_clusters;
    6106             : }
    6107             : 
    6108             : static int
    6109         988 : bs_register_md_thread(struct spdk_blob_store *bs)
    6110             : {
    6111         988 :         bs->md_channel = spdk_get_io_channel(bs);
    6112         988 :         if (!bs->md_channel) {
    6113           0 :                 SPDK_ERRLOG("Failed to get IO channel.\n");
    6114           0 :                 return -1;
    6115             :         }
    6116             : 
    6117         988 :         return 0;
    6118         988 : }
    6119             : 
    6120             : static int
    6121         988 : bs_unregister_md_thread(struct spdk_blob_store *bs)
    6122             : {
    6123         988 :         spdk_put_io_channel(bs->md_channel);
    6124             : 
    6125         988 :         return 0;
    6126             : }
    6127             : 
    6128             : spdk_blob_id
    6129         712 : spdk_blob_get_id(struct spdk_blob *blob)
    6130             : {
    6131         712 :         assert(blob != NULL);
    6132             : 
    6133         712 :         return blob->id;
    6134             : }
    6135             : 
    6136             : uint64_t
    6137          30 : spdk_blob_get_num_io_units(struct spdk_blob *blob)
    6138             : {
    6139          30 :         assert(blob != NULL);
    6140             : 
    6141          30 :         return bs_cluster_to_io_unit(blob->bs, blob->active.num_clusters);
    6142             : }
    6143             : 
    6144             : uint64_t
    6145         707 : spdk_blob_get_num_clusters(struct spdk_blob *blob)
    6146             : {
    6147         707 :         assert(blob != NULL);
    6148             : 
    6149         707 :         return blob->active.num_clusters;
    6150             : }
    6151             : 
    6152             : uint64_t
    6153         415 : spdk_blob_get_num_allocated_clusters(struct spdk_blob *blob)
    6154             : {
    6155         415 :         assert(blob != NULL);
    6156             : 
    6157         415 :         return blob->active.num_allocated_clusters;
    6158             : }
    6159             : 
    6160             : static uint64_t
    6161          30 : blob_find_io_unit(struct spdk_blob *blob, uint64_t offset, bool is_allocated)
    6162             : {
    6163          30 :         uint64_t blob_io_unit_num = spdk_blob_get_num_io_units(blob);
    6164             : 
    6165          55 :         while (offset < blob_io_unit_num) {
    6166          50 :                 if (bs_io_unit_is_allocated(blob, offset) == is_allocated) {
    6167          25 :                         return offset;
    6168             :                 }
    6169             : 
    6170          25 :                 offset += bs_num_io_units_to_cluster_boundary(blob, offset);
    6171             :         }
    6172             : 
    6173           5 :         return UINT64_MAX;
    6174          30 : }
    6175             : 
    6176             : uint64_t
    6177          15 : spdk_blob_get_next_allocated_io_unit(struct spdk_blob *blob, uint64_t offset)
    6178             : {
    6179          15 :         return blob_find_io_unit(blob, offset, true);
    6180             : }
    6181             : 
    6182             : uint64_t
    6183          15 : spdk_blob_get_next_unallocated_io_unit(struct spdk_blob *blob, uint64_t offset)
    6184             : {
    6185          15 :         return blob_find_io_unit(blob, offset, false);
    6186             : }
    6187             : 
    6188             : /* START spdk_bs_create_blob */
    6189             : 
    6190             : static void
    6191        2346 : bs_create_blob_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    6192             : {
    6193        2346 :         struct spdk_blob *blob = cb_arg;
    6194        2346 :         uint32_t page_idx = bs_blobid_to_page(blob->id);
    6195             : 
    6196        2346 :         if (bserrno != 0) {
    6197           0 :                 spdk_spin_lock(&blob->bs->used_lock);
    6198           0 :                 spdk_bit_array_clear(blob->bs->used_blobids, page_idx);
    6199           0 :                 bs_release_md_page(blob->bs, page_idx);
    6200           0 :                 spdk_spin_unlock(&blob->bs->used_lock);
    6201           0 :         }
    6202             : 
    6203        2346 :         blob_free(blob);
    6204             : 
    6205        2346 :         bs_sequence_finish(seq, bserrno);
    6206        2346 : }
    6207             : 
    6208             : static int
    6209        4717 : blob_set_xattrs(struct spdk_blob *blob, const struct spdk_blob_xattr_opts *xattrs,
    6210             :                 bool internal)
    6211             : {
    6212             :         uint64_t i;
    6213        4717 :         size_t value_len = 0;
    6214             :         int rc;
    6215        4717 :         const void *value = NULL;
    6216        4717 :         if (xattrs->count > 0 && xattrs->get_value == NULL) {
    6217          10 :                 return -EINVAL;
    6218             :         }
    6219        5103 :         for (i = 0; i < xattrs->count; i++) {
    6220         401 :                 xattrs->get_value(xattrs->ctx, xattrs->names[i], &value, &value_len);
    6221         401 :                 if (value == NULL || value_len == 0) {
    6222           5 :                         return -EINVAL;
    6223             :                 }
    6224         396 :                 rc = blob_set_xattr(blob, xattrs->names[i], value, value_len, internal);
    6225         396 :                 if (rc < 0) {
    6226           0 :                         return rc;
    6227             :                 }
    6228         396 :         }
    6229        4702 :         return 0;
    6230        4717 : }
    6231             : 
    6232             : static void
    6233        2330 : blob_opts_copy(const struct spdk_blob_opts *src, struct spdk_blob_opts *dst)
    6234             : {
    6235             : #define FIELD_OK(field) \
    6236             :         offsetof(struct spdk_blob_opts, field) + sizeof(src->field) <= src->opts_size
    6237             : 
    6238             : #define SET_FIELD(field) \
    6239             :         if (FIELD_OK(field)) { \
    6240             :                 dst->field = src->field; \
    6241             :         } \
    6242             : 
    6243        2330 :         SET_FIELD(num_clusters);
    6244        2330 :         SET_FIELD(thin_provision);
    6245        2330 :         SET_FIELD(clear_method);
    6246             : 
    6247        2330 :         if (FIELD_OK(xattrs)) {
    6248        2330 :                 memcpy(&dst->xattrs, &src->xattrs, sizeof(src->xattrs));
    6249        2330 :         }
    6250             : 
    6251        2330 :         SET_FIELD(use_extent_table);
    6252        2330 :         SET_FIELD(esnap_id);
    6253        2330 :         SET_FIELD(esnap_id_len);
    6254             : 
    6255        2330 :         dst->opts_size = src->opts_size;
    6256             : 
    6257             :         /* You should not remove this statement, but need to update the assert statement
    6258             :          * if you add a new field, and also add a corresponding SET_FIELD statement */
    6259             :         SPDK_STATIC_ASSERT(sizeof(struct spdk_blob_opts) == 80, "Incorrect size");
    6260             : 
    6261             : #undef FIELD_OK
    6262             : #undef SET_FIELD
    6263        2330 : }
    6264             : 
    6265             : static void
    6266        2366 : bs_create_blob(struct spdk_blob_store *bs,
    6267             :                const struct spdk_blob_opts *opts,
    6268             :                const struct spdk_blob_xattr_opts *internal_xattrs,
    6269             :                spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6270             : {
    6271             :         struct spdk_blob        *blob;
    6272             :         uint32_t                page_idx;
    6273             :         struct spdk_bs_cpl      cpl;
    6274             :         struct spdk_blob_opts   opts_local;
    6275             :         struct spdk_blob_xattr_opts internal_xattrs_default;
    6276             :         spdk_bs_sequence_t      *seq;
    6277             :         spdk_blob_id            id;
    6278             :         int rc;
    6279             : 
    6280        2366 :         assert(spdk_get_thread() == bs->md_thread);
    6281             : 
    6282        2366 :         spdk_spin_lock(&bs->used_lock);
    6283        2366 :         page_idx = spdk_bit_array_find_first_clear(bs->used_md_pages, 0);
    6284        2366 :         if (page_idx == UINT32_MAX) {
    6285           0 :                 spdk_spin_unlock(&bs->used_lock);
    6286           0 :                 cb_fn(cb_arg, 0, -ENOMEM);
    6287           0 :                 return;
    6288             :         }
    6289        2366 :         spdk_bit_array_set(bs->used_blobids, page_idx);
    6290        2366 :         bs_claim_md_page(bs, page_idx);
    6291        2366 :         spdk_spin_unlock(&bs->used_lock);
    6292             : 
    6293        2366 :         id = bs_page_to_blobid(page_idx);
    6294             : 
    6295        2366 :         SPDK_DEBUGLOG(blob, "Creating blob with id 0x%" PRIx64 " at page %u\n", id, page_idx);
    6296             : 
    6297        2366 :         spdk_blob_opts_init(&opts_local, sizeof(opts_local));
    6298        2366 :         if (opts) {
    6299        2330 :                 blob_opts_copy(opts, &opts_local);
    6300        2330 :         }
    6301             : 
    6302        2366 :         blob = blob_alloc(bs, id);
    6303        2366 :         if (!blob) {
    6304           0 :                 rc = -ENOMEM;
    6305           0 :                 goto error;
    6306             :         }
    6307             : 
    6308        2366 :         blob->use_extent_table = opts_local.use_extent_table;
    6309        2366 :         if (blob->use_extent_table) {
    6310        1440 :                 blob->invalid_flags |= SPDK_BLOB_EXTENT_TABLE;
    6311        1440 :         }
    6312             : 
    6313        2366 :         if (!internal_xattrs) {
    6314        2025 :                 blob_xattrs_init(&internal_xattrs_default);
    6315        2025 :                 internal_xattrs = &internal_xattrs_default;
    6316        2025 :         }
    6317             : 
    6318        2366 :         rc = blob_set_xattrs(blob, &opts_local.xattrs, false);
    6319        2366 :         if (rc < 0) {
    6320          15 :                 goto error;
    6321             :         }
    6322             : 
    6323        2351 :         rc = blob_set_xattrs(blob, internal_xattrs, true);
    6324        2351 :         if (rc < 0) {
    6325           0 :                 goto error;
    6326             :         }
    6327             : 
    6328        2351 :         if (opts_local.thin_provision) {
    6329         446 :                 blob_set_thin_provision(blob);
    6330         446 :         }
    6331             : 
    6332        2351 :         blob_set_clear_method(blob, opts_local.clear_method);
    6333             : 
    6334        2351 :         if (opts_local.esnap_id != NULL) {
    6335          75 :                 if (opts_local.esnap_id_len > UINT16_MAX) {
    6336           0 :                         SPDK_ERRLOG("esnap id length %" PRIu64 "is too long\n",
    6337             :                                     opts_local.esnap_id_len);
    6338           0 :                         rc = -EINVAL;
    6339           0 :                         goto error;
    6340             : 
    6341             :                 }
    6342          75 :                 blob_set_thin_provision(blob);
    6343          75 :                 blob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
    6344         150 :                 rc = blob_set_xattr(blob, BLOB_EXTERNAL_SNAPSHOT_ID,
    6345          75 :                                     opts_local.esnap_id, opts_local.esnap_id_len, true);
    6346          75 :                 if (rc != 0) {
    6347           0 :                         goto error;
    6348             :                 }
    6349          75 :         }
    6350             : 
    6351        2351 :         rc = blob_resize(blob, opts_local.num_clusters);
    6352        2351 :         if (rc < 0) {
    6353           5 :                 goto error;
    6354             :         }
    6355        2346 :         cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
    6356        2346 :         cpl.u.blobid.cb_fn = cb_fn;
    6357        2346 :         cpl.u.blobid.cb_arg = cb_arg;
    6358        2346 :         cpl.u.blobid.blobid = blob->id;
    6359             : 
    6360        2346 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    6361        2346 :         if (!seq) {
    6362           0 :                 rc = -ENOMEM;
    6363           0 :                 goto error;
    6364             :         }
    6365             : 
    6366        2346 :         blob_persist(seq, blob, bs_create_blob_cpl, blob);
    6367        2346 :         return;
    6368             : 
    6369             : error:
    6370          20 :         SPDK_ERRLOG("Failed to create blob: %s, size in clusters/size: %lu (clusters)\n",
    6371             :                     spdk_strerror(rc), opts_local.num_clusters);
    6372          20 :         if (blob != NULL) {
    6373          20 :                 blob_free(blob);
    6374          20 :         }
    6375          20 :         spdk_spin_lock(&bs->used_lock);
    6376          20 :         spdk_bit_array_clear(bs->used_blobids, page_idx);
    6377          20 :         bs_release_md_page(bs, page_idx);
    6378          20 :         spdk_spin_unlock(&bs->used_lock);
    6379          20 :         cb_fn(cb_arg, 0, rc);
    6380        2366 : }
    6381             : 
    6382             : void
    6383          16 : spdk_bs_create_blob(struct spdk_blob_store *bs,
    6384             :                     spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6385             : {
    6386          16 :         bs_create_blob(bs, NULL, NULL, cb_fn, cb_arg);
    6387          16 : }
    6388             : 
    6389             : void
    6390        1999 : spdk_bs_create_blob_ext(struct spdk_blob_store *bs, const struct spdk_blob_opts *opts,
    6391             :                         spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6392             : {
    6393        1999 :         bs_create_blob(bs, opts, NULL, cb_fn, cb_arg);
    6394        1999 : }
    6395             : 
    6396             : /* END spdk_bs_create_blob */
    6397             : 
    6398             : /* START blob_cleanup */
    6399             : 
    6400             : struct spdk_clone_snapshot_ctx {
    6401             :         struct spdk_bs_cpl      cpl;
    6402             :         int bserrno;
    6403             :         bool frozen;
    6404             : 
    6405             :         struct spdk_io_channel *channel;
    6406             : 
    6407             :         /* Current cluster for inflate operation */
    6408             :         uint64_t cluster;
    6409             : 
    6410             :         /* For inflation force allocation of all unallocated clusters and remove
    6411             :          * thin-provisioning. Otherwise only decouple parent and keep clone thin. */
    6412             :         bool allocate_all;
    6413             : 
    6414             :         struct {
    6415             :                 spdk_blob_id id;
    6416             :                 struct spdk_blob *blob;
    6417             :                 bool md_ro;
    6418             :         } original;
    6419             :         struct {
    6420             :                 spdk_blob_id id;
    6421             :                 struct spdk_blob *blob;
    6422             :         } new;
    6423             : 
    6424             :         /* xattrs specified for snapshot/clones only. They have no impact on
    6425             :          * the original blobs xattrs. */
    6426             :         const struct spdk_blob_xattr_opts *xattrs;
    6427             : };
    6428             : 
    6429             : static void
    6430         429 : bs_clone_snapshot_cleanup_finish(void *cb_arg, int bserrno)
    6431             : {
    6432         429 :         struct spdk_clone_snapshot_ctx *ctx = cb_arg;
    6433         429 :         struct spdk_bs_cpl *cpl = &ctx->cpl;
    6434             : 
    6435         429 :         if (bserrno != 0) {
    6436           8 :                 if (ctx->bserrno != 0) {
    6437           0 :                         SPDK_ERRLOG("Cleanup error %d\n", bserrno);
    6438           0 :                 } else {
    6439           8 :                         ctx->bserrno = bserrno;
    6440             :                 }
    6441           8 :         }
    6442             : 
    6443         429 :         switch (cpl->type) {
    6444             :         case SPDK_BS_CPL_TYPE_BLOBID:
    6445         354 :                 cpl->u.blobid.cb_fn(cpl->u.blobid.cb_arg, cpl->u.blobid.blobid, ctx->bserrno);
    6446         354 :                 break;
    6447             :         case SPDK_BS_CPL_TYPE_BLOB_BASIC:
    6448          75 :                 cpl->u.blob_basic.cb_fn(cpl->u.blob_basic.cb_arg, ctx->bserrno);
    6449          75 :                 break;
    6450             :         default:
    6451           0 :                 SPDK_UNREACHABLE();
    6452             :                 break;
    6453             :         }
    6454             : 
    6455         429 :         free(ctx);
    6456         429 : }
    6457             : 
    6458             : static void
    6459         411 : bs_snapshot_unfreeze_cpl(void *cb_arg, int bserrno)
    6460             : {
    6461         411 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6462         411 :         struct spdk_blob *origblob = ctx->original.blob;
    6463             : 
    6464         411 :         if (bserrno != 0) {
    6465           0 :                 if (ctx->bserrno != 0) {
    6466           0 :                         SPDK_ERRLOG("Unfreeze error %d\n", bserrno);
    6467           0 :                 } else {
    6468           0 :                         ctx->bserrno = bserrno;
    6469             :                 }
    6470           0 :         }
    6471             : 
    6472         411 :         ctx->original.id = origblob->id;
    6473         411 :         origblob->locked_operation_in_progress = false;
    6474             : 
    6475             :         /* Revert md_ro to original state */
    6476         411 :         origblob->md_ro = ctx->original.md_ro;
    6477             : 
    6478         411 :         spdk_blob_close(origblob, bs_clone_snapshot_cleanup_finish, ctx);
    6479         411 : }
    6480             : 
    6481             : static void
    6482         411 : bs_clone_snapshot_origblob_cleanup(void *cb_arg, int bserrno)
    6483             : {
    6484         411 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6485         411 :         struct spdk_blob *origblob = ctx->original.blob;
    6486             : 
    6487         411 :         if (bserrno != 0) {
    6488          30 :                 if (ctx->bserrno != 0) {
    6489           5 :                         SPDK_ERRLOG("Cleanup error %d\n", bserrno);
    6490           5 :                 } else {
    6491          25 :                         ctx->bserrno = bserrno;
    6492             :                 }
    6493          30 :         }
    6494             : 
    6495         411 :         if (ctx->frozen) {
    6496             :                 /* Unfreeze any outstanding I/O */
    6497         266 :                 blob_unfreeze_io(origblob, bs_snapshot_unfreeze_cpl, ctx);
    6498         266 :         } else {
    6499         145 :                 bs_snapshot_unfreeze_cpl(ctx, 0);
    6500             :         }
    6501             : 
    6502         411 : }
    6503             : 
    6504             : static void
    6505           5 : bs_clone_snapshot_newblob_cleanup(struct spdk_clone_snapshot_ctx *ctx, int bserrno)
    6506             : {
    6507           5 :         struct spdk_blob *newblob = ctx->new.blob;
    6508             : 
    6509           5 :         if (bserrno != 0) {
    6510           5 :                 if (ctx->bserrno != 0) {
    6511           0 :                         SPDK_ERRLOG("Cleanup error %d\n", bserrno);
    6512           0 :                 } else {
    6513           5 :                         ctx->bserrno = bserrno;
    6514             :                 }
    6515           5 :         }
    6516             : 
    6517           5 :         ctx->new.id = newblob->id;
    6518           5 :         spdk_blob_close(newblob, bs_clone_snapshot_origblob_cleanup, ctx);
    6519           5 : }
    6520             : 
    6521             : /* END blob_cleanup */
    6522             : 
    6523             : /* START spdk_bs_create_snapshot */
    6524             : 
    6525             : static void
    6526         276 : bs_snapshot_swap_cluster_maps(struct spdk_blob *blob1, struct spdk_blob *blob2)
    6527             : {
    6528             :         uint64_t *cluster_temp;
    6529             :         uint64_t num_allocated_clusters_temp;
    6530             :         uint32_t *extent_page_temp;
    6531             : 
    6532         276 :         cluster_temp = blob1->active.clusters;
    6533         276 :         blob1->active.clusters = blob2->active.clusters;
    6534         276 :         blob2->active.clusters = cluster_temp;
    6535             : 
    6536         276 :         num_allocated_clusters_temp = blob1->active.num_allocated_clusters;
    6537         276 :         blob1->active.num_allocated_clusters = blob2->active.num_allocated_clusters;
    6538         276 :         blob2->active.num_allocated_clusters = num_allocated_clusters_temp;
    6539             : 
    6540         276 :         extent_page_temp = blob1->active.extent_pages;
    6541         276 :         blob1->active.extent_pages = blob2->active.extent_pages;
    6542         276 :         blob2->active.extent_pages = extent_page_temp;
    6543         276 : }
    6544             : 
    6545             : /* Copies an internal xattr */
    6546             : static int
    6547          25 : bs_snapshot_copy_xattr(struct spdk_blob *toblob, struct spdk_blob *fromblob, const char *name)
    6548             : {
    6549          25 :         const void      *val = NULL;
    6550             :         size_t          len;
    6551             :         int             bserrno;
    6552             : 
    6553          25 :         bserrno = blob_get_xattr_value(fromblob, name, &val, &len, true);
    6554          25 :         if (bserrno != 0) {
    6555           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " missing %s XATTR\n", fromblob->id, name);
    6556           0 :                 return bserrno;
    6557             :         }
    6558             : 
    6559          25 :         bserrno = blob_set_xattr(toblob, name, val, len, true);
    6560          25 :         if (bserrno != 0) {
    6561           0 :                 SPDK_ERRLOG("could not set %s XATTR on blob 0x%" PRIx64 "\n",
    6562             :                             name, toblob->id);
    6563           0 :                 return bserrno;
    6564             :         }
    6565          25 :         return 0;
    6566          25 : }
    6567             : 
    6568             : static void
    6569         261 : bs_snapshot_origblob_sync_cpl(void *cb_arg, int bserrno)
    6570             : {
    6571         261 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6572         261 :         struct spdk_blob *origblob = ctx->original.blob;
    6573         261 :         struct spdk_blob *newblob = ctx->new.blob;
    6574             : 
    6575         261 :         if (bserrno != 0) {
    6576           5 :                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6577           5 :                 if (blob_is_esnap_clone(newblob)) {
    6578           0 :                         bs_snapshot_copy_xattr(origblob, newblob, BLOB_EXTERNAL_SNAPSHOT_ID);
    6579           0 :                         origblob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
    6580           0 :                 }
    6581           5 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6582           5 :                 return;
    6583             :         }
    6584             : 
    6585             :         /* Remove metadata descriptor SNAPSHOT_IN_PROGRESS */
    6586         256 :         bserrno = blob_remove_xattr(newblob, SNAPSHOT_IN_PROGRESS, true);
    6587         256 :         if (bserrno != 0) {
    6588           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6589           0 :                 return;
    6590             :         }
    6591             : 
    6592         256 :         bs_blob_list_add(ctx->original.blob);
    6593             : 
    6594         256 :         spdk_blob_set_read_only(newblob);
    6595             : 
    6596             :         /* sync snapshot metadata */
    6597         256 :         spdk_blob_sync_md(newblob, bs_clone_snapshot_origblob_cleanup, ctx);
    6598         261 : }
    6599             : 
    6600             : static void
    6601         266 : bs_snapshot_newblob_sync_cpl(void *cb_arg, int bserrno)
    6602             : {
    6603         266 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6604         266 :         struct spdk_blob *origblob = ctx->original.blob;
    6605         266 :         struct spdk_blob *newblob = ctx->new.blob;
    6606             : 
    6607         266 :         if (bserrno != 0) {
    6608             :                 /* return cluster map back to original */
    6609           5 :                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6610             : 
    6611             :                 /* Newblob md sync failed. Valid clusters are only present in origblob.
    6612             :                  * Since I/O is frozen on origblob, not changes to zeroed out cluster map should have occurred.
    6613             :                  * Newblob needs to be reverted to thin_provisioned state at creation to properly close. */
    6614           5 :                 blob_set_thin_provision(newblob);
    6615           5 :                 assert(spdk_mem_all_zero(newblob->active.clusters,
    6616             :                                          newblob->active.num_clusters * sizeof(*newblob->active.clusters)));
    6617           5 :                 assert(spdk_mem_all_zero(newblob->active.extent_pages,
    6618             :                                          newblob->active.num_extent_pages * sizeof(*newblob->active.extent_pages)));
    6619             : 
    6620           5 :                 bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6621           5 :                 return;
    6622             :         }
    6623             : 
    6624             :         /* Set internal xattr for snapshot id */
    6625         261 :         bserrno = blob_set_xattr(origblob, BLOB_SNAPSHOT, &newblob->id, sizeof(spdk_blob_id), true);
    6626         261 :         if (bserrno != 0) {
    6627             :                 /* return cluster map back to original */
    6628           0 :                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6629           0 :                 blob_set_thin_provision(newblob);
    6630           0 :                 bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6631           0 :                 return;
    6632             :         }
    6633             : 
    6634             :         /* Create new back_bs_dev for snapshot */
    6635         261 :         origblob->back_bs_dev = bs_create_blob_bs_dev(newblob);
    6636         261 :         if (origblob->back_bs_dev == NULL) {
    6637             :                 /* return cluster map back to original */
    6638           0 :                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6639           0 :                 blob_set_thin_provision(newblob);
    6640           0 :                 bs_clone_snapshot_newblob_cleanup(ctx, -EINVAL);
    6641           0 :                 return;
    6642             :         }
    6643             : 
    6644             :         /* Remove the xattr that references an external snapshot */
    6645         261 :         if (blob_is_esnap_clone(origblob)) {
    6646          15 :                 origblob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
    6647          15 :                 bserrno = blob_remove_xattr(origblob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
    6648          15 :                 if (bserrno != 0) {
    6649           0 :                         if (bserrno == -ENOENT) {
    6650           0 :                                 SPDK_ERRLOG("blob 0x%" PRIx64 " has no " BLOB_EXTERNAL_SNAPSHOT_ID
    6651             :                                             " xattr to remove\n", origblob->id);
    6652           0 :                                 assert(false);
    6653             :                         } else {
    6654             :                                 /* return cluster map back to original */
    6655           0 :                                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6656           0 :                                 blob_set_thin_provision(newblob);
    6657           0 :                                 bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6658           0 :                                 return;
    6659             :                         }
    6660             :                 }
    6661          15 :         }
    6662             : 
    6663         261 :         bs_blob_list_remove(origblob);
    6664         261 :         origblob->parent_id = newblob->id;
    6665             :         /* set clone blob as thin provisioned */
    6666         261 :         blob_set_thin_provision(origblob);
    6667             : 
    6668         261 :         bs_blob_list_add(newblob);
    6669             : 
    6670             :         /* sync clone metadata */
    6671         261 :         spdk_blob_sync_md(origblob, bs_snapshot_origblob_sync_cpl, ctx);
    6672         266 : }
    6673             : 
    6674             : static void
    6675         266 : bs_snapshot_freeze_cpl(void *cb_arg, int rc)
    6676             : {
    6677         266 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6678         266 :         struct spdk_blob *origblob = ctx->original.blob;
    6679         266 :         struct spdk_blob *newblob = ctx->new.blob;
    6680             :         int bserrno;
    6681             : 
    6682         266 :         if (rc != 0) {
    6683           0 :                 bs_clone_snapshot_newblob_cleanup(ctx, rc);
    6684           0 :                 return;
    6685             :         }
    6686             : 
    6687         266 :         ctx->frozen = true;
    6688             : 
    6689         266 :         if (blob_is_esnap_clone(origblob)) {
    6690             :                 /* Clean up any channels associated with the original blob id because future IO will
    6691             :                  * perform IO using the snapshot blob_id.
    6692             :                  */
    6693          15 :                 blob_esnap_destroy_bs_dev_channels(origblob, false, NULL, NULL);
    6694          15 :         }
    6695         266 :         if (newblob->back_bs_dev) {
    6696         266 :                 blob_back_bs_destroy(newblob);
    6697         266 :         }
    6698             :         /* set new back_bs_dev for snapshot */
    6699         266 :         newblob->back_bs_dev = origblob->back_bs_dev;
    6700             :         /* Set invalid flags from origblob */
    6701         266 :         newblob->invalid_flags = origblob->invalid_flags;
    6702             : 
    6703             :         /* inherit parent from original blob if set */
    6704         266 :         newblob->parent_id = origblob->parent_id;
    6705         266 :         switch (origblob->parent_id) {
    6706             :         case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
    6707          15 :                 bserrno = bs_snapshot_copy_xattr(newblob, origblob, BLOB_EXTERNAL_SNAPSHOT_ID);
    6708          15 :                 if (bserrno != 0) {
    6709           0 :                         bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6710           0 :                         return;
    6711             :                 }
    6712          15 :                 break;
    6713             :         case SPDK_BLOBID_INVALID:
    6714         186 :                 break;
    6715             :         default:
    6716             :                 /* Set internal xattr for snapshot id */
    6717         130 :                 bserrno = blob_set_xattr(newblob, BLOB_SNAPSHOT,
    6718          65 :                                          &origblob->parent_id, sizeof(spdk_blob_id), true);
    6719          65 :                 if (bserrno != 0) {
    6720           0 :                         bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6721           0 :                         return;
    6722             :                 }
    6723          65 :         }
    6724             : 
    6725             :         /* swap cluster maps */
    6726         266 :         bs_snapshot_swap_cluster_maps(newblob, origblob);
    6727             : 
    6728             :         /* Set the clear method on the new blob to match the original. */
    6729         266 :         blob_set_clear_method(newblob, origblob->clear_method);
    6730             : 
    6731             :         /* sync snapshot metadata */
    6732         266 :         spdk_blob_sync_md(newblob, bs_snapshot_newblob_sync_cpl, ctx);
    6733         266 : }
    6734             : 
    6735             : static void
    6736         271 : bs_snapshot_newblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    6737             : {
    6738         271 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6739         271 :         struct spdk_blob *origblob = ctx->original.blob;
    6740         271 :         struct spdk_blob *newblob = _blob;
    6741             : 
    6742         271 :         if (bserrno != 0) {
    6743           5 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6744           5 :                 return;
    6745             :         }
    6746             : 
    6747         266 :         ctx->new.blob = newblob;
    6748         266 :         assert(spdk_blob_is_thin_provisioned(newblob));
    6749         266 :         assert(spdk_mem_all_zero(newblob->active.clusters,
    6750             :                                  newblob->active.num_clusters * sizeof(*newblob->active.clusters)));
    6751         266 :         assert(spdk_mem_all_zero(newblob->active.extent_pages,
    6752             :                                  newblob->active.num_extent_pages * sizeof(*newblob->active.extent_pages)));
    6753             : 
    6754         266 :         blob_freeze_io(origblob, bs_snapshot_freeze_cpl, ctx);
    6755         271 : }
    6756             : 
    6757             : static void
    6758         276 : bs_snapshot_newblob_create_cpl(void *cb_arg, spdk_blob_id blobid, int bserrno)
    6759             : {
    6760         276 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6761         276 :         struct spdk_blob *origblob = ctx->original.blob;
    6762             : 
    6763         276 :         if (bserrno != 0) {
    6764           5 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6765           5 :                 return;
    6766             :         }
    6767             : 
    6768         271 :         ctx->new.id = blobid;
    6769         271 :         ctx->cpl.u.blobid.blobid = blobid;
    6770             : 
    6771         271 :         spdk_bs_open_blob(origblob->bs, ctx->new.id, bs_snapshot_newblob_open_cpl, ctx);
    6772         276 : }
    6773             : 
    6774             : 
    6775             : static void
    6776         276 : bs_xattr_snapshot(void *arg, const char *name,
    6777             :                   const void **value, size_t *value_len)
    6778             : {
    6779         276 :         assert(strncmp(name, SNAPSHOT_IN_PROGRESS, sizeof(SNAPSHOT_IN_PROGRESS)) == 0);
    6780             : 
    6781         276 :         struct spdk_blob *blob = (struct spdk_blob *)arg;
    6782         276 :         *value = &blob->id;
    6783         276 :         *value_len = sizeof(blob->id);
    6784         276 : }
    6785             : 
    6786             : static void
    6787         289 : bs_snapshot_origblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    6788             : {
    6789         289 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6790             :         struct spdk_blob_opts opts;
    6791             :         struct spdk_blob_xattr_opts internal_xattrs;
    6792         289 :         char *xattrs_names[] = { SNAPSHOT_IN_PROGRESS };
    6793             : 
    6794         289 :         if (bserrno != 0) {
    6795           8 :                 bs_clone_snapshot_cleanup_finish(ctx, bserrno);
    6796           8 :                 return;
    6797             :         }
    6798             : 
    6799         281 :         ctx->original.blob = _blob;
    6800             : 
    6801         281 :         if (_blob->data_ro || _blob->md_ro) {
    6802           5 :                 SPDK_DEBUGLOG(blob, "Cannot create snapshot from read only blob with id 0x%"
    6803             :                               PRIx64 "\n", _blob->id);
    6804           5 :                 ctx->bserrno = -EINVAL;
    6805           5 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    6806           5 :                 return;
    6807             :         }
    6808             : 
    6809         276 :         if (_blob->locked_operation_in_progress) {
    6810           0 :                 SPDK_DEBUGLOG(blob, "Cannot create snapshot - another operation in progress\n");
    6811           0 :                 ctx->bserrno = -EBUSY;
    6812           0 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    6813           0 :                 return;
    6814             :         }
    6815             : 
    6816         276 :         _blob->locked_operation_in_progress = true;
    6817             : 
    6818         276 :         spdk_blob_opts_init(&opts, sizeof(opts));
    6819         276 :         blob_xattrs_init(&internal_xattrs);
    6820             : 
    6821             :         /* Change the size of new blob to the same as in original blob,
    6822             :          * but do not allocate clusters */
    6823         276 :         opts.thin_provision = true;
    6824         276 :         opts.num_clusters = spdk_blob_get_num_clusters(_blob);
    6825         276 :         opts.use_extent_table = _blob->use_extent_table;
    6826             : 
    6827             :         /* If there are any xattrs specified for snapshot, set them now */
    6828         276 :         if (ctx->xattrs) {
    6829           5 :                 memcpy(&opts.xattrs, ctx->xattrs, sizeof(*ctx->xattrs));
    6830           5 :         }
    6831             :         /* Set internal xattr SNAPSHOT_IN_PROGRESS */
    6832         276 :         internal_xattrs.count = 1;
    6833         276 :         internal_xattrs.ctx = _blob;
    6834         276 :         internal_xattrs.names = xattrs_names;
    6835         276 :         internal_xattrs.get_value = bs_xattr_snapshot;
    6836             : 
    6837         552 :         bs_create_blob(_blob->bs, &opts, &internal_xattrs,
    6838         276 :                        bs_snapshot_newblob_create_cpl, ctx);
    6839         289 : }
    6840             : 
    6841             : void
    6842         289 : spdk_bs_create_snapshot(struct spdk_blob_store *bs, spdk_blob_id blobid,
    6843             :                         const struct spdk_blob_xattr_opts *snapshot_xattrs,
    6844             :                         spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6845             : {
    6846         289 :         struct spdk_clone_snapshot_ctx *ctx = calloc(1, sizeof(*ctx));
    6847             : 
    6848         289 :         if (!ctx) {
    6849           0 :                 cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOMEM);
    6850           0 :                 return;
    6851             :         }
    6852         289 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
    6853         289 :         ctx->cpl.u.blobid.cb_fn = cb_fn;
    6854         289 :         ctx->cpl.u.blobid.cb_arg = cb_arg;
    6855         289 :         ctx->cpl.u.blobid.blobid = SPDK_BLOBID_INVALID;
    6856         289 :         ctx->bserrno = 0;
    6857         289 :         ctx->frozen = false;
    6858         289 :         ctx->original.id = blobid;
    6859         289 :         ctx->xattrs = snapshot_xattrs;
    6860             : 
    6861         289 :         spdk_bs_open_blob(bs, ctx->original.id, bs_snapshot_origblob_open_cpl, ctx);
    6862         289 : }
    6863             : /* END spdk_bs_create_snapshot */
    6864             : 
    6865             : /* START spdk_bs_create_clone */
    6866             : 
    6867             : static void
    6868          60 : bs_xattr_clone(void *arg, const char *name,
    6869             :                const void **value, size_t *value_len)
    6870             : {
    6871          60 :         assert(strncmp(name, BLOB_SNAPSHOT, sizeof(BLOB_SNAPSHOT)) == 0);
    6872             : 
    6873          60 :         struct spdk_blob *blob = (struct spdk_blob *)arg;
    6874          60 :         *value = &blob->id;
    6875          60 :         *value_len = sizeof(blob->id);
    6876          60 : }
    6877             : 
    6878             : static void
    6879          60 : bs_clone_newblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    6880             : {
    6881          60 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6882          60 :         struct spdk_blob *clone = _blob;
    6883             : 
    6884          60 :         ctx->new.blob = clone;
    6885          60 :         bs_blob_list_add(clone);
    6886             : 
    6887          60 :         spdk_blob_close(clone, bs_clone_snapshot_origblob_cleanup, ctx);
    6888          60 : }
    6889             : 
    6890             : static void
    6891          60 : bs_clone_newblob_create_cpl(void *cb_arg, spdk_blob_id blobid, int bserrno)
    6892             : {
    6893          60 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6894             : 
    6895          60 :         ctx->cpl.u.blobid.blobid = blobid;
    6896          60 :         spdk_bs_open_blob(ctx->original.blob->bs, blobid, bs_clone_newblob_open_cpl, ctx);
    6897          60 : }
    6898             : 
    6899             : static void
    6900          65 : bs_clone_origblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    6901             : {
    6902          65 :         struct spdk_clone_snapshot_ctx  *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6903             :         struct spdk_blob_opts           opts;
    6904             :         struct spdk_blob_xattr_opts internal_xattrs;
    6905          65 :         char *xattr_names[] = { BLOB_SNAPSHOT };
    6906             : 
    6907          65 :         if (bserrno != 0) {
    6908           0 :                 bs_clone_snapshot_cleanup_finish(ctx, bserrno);
    6909           0 :                 return;
    6910             :         }
    6911             : 
    6912          65 :         ctx->original.blob = _blob;
    6913          65 :         ctx->original.md_ro = _blob->md_ro;
    6914             : 
    6915          65 :         if (!_blob->data_ro || !_blob->md_ro) {
    6916           5 :                 SPDK_DEBUGLOG(blob, "Clone not from read-only blob\n");
    6917           5 :                 ctx->bserrno = -EINVAL;
    6918           5 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    6919           5 :                 return;
    6920             :         }
    6921             : 
    6922          60 :         if (_blob->locked_operation_in_progress) {
    6923           0 :                 SPDK_DEBUGLOG(blob, "Cannot create clone - another operation in progress\n");
    6924           0 :                 ctx->bserrno = -EBUSY;
    6925           0 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    6926           0 :                 return;
    6927             :         }
    6928             : 
    6929          60 :         _blob->locked_operation_in_progress = true;
    6930             : 
    6931          60 :         spdk_blob_opts_init(&opts, sizeof(opts));
    6932          60 :         blob_xattrs_init(&internal_xattrs);
    6933             : 
    6934          60 :         opts.thin_provision = true;
    6935          60 :         opts.num_clusters = spdk_blob_get_num_clusters(_blob);
    6936          60 :         opts.use_extent_table = _blob->use_extent_table;
    6937          60 :         if (ctx->xattrs) {
    6938           5 :                 memcpy(&opts.xattrs, ctx->xattrs, sizeof(*ctx->xattrs));
    6939           5 :         }
    6940             : 
    6941             :         /* Set internal xattr BLOB_SNAPSHOT */
    6942          60 :         internal_xattrs.count = 1;
    6943          60 :         internal_xattrs.ctx = _blob;
    6944          60 :         internal_xattrs.names = xattr_names;
    6945          60 :         internal_xattrs.get_value = bs_xattr_clone;
    6946             : 
    6947         120 :         bs_create_blob(_blob->bs, &opts, &internal_xattrs,
    6948          60 :                        bs_clone_newblob_create_cpl, ctx);
    6949          65 : }
    6950             : 
    6951             : void
    6952          65 : spdk_bs_create_clone(struct spdk_blob_store *bs, spdk_blob_id blobid,
    6953             :                      const struct spdk_blob_xattr_opts *clone_xattrs,
    6954             :                      spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6955             : {
    6956          65 :         struct spdk_clone_snapshot_ctx  *ctx = calloc(1, sizeof(*ctx));
    6957             : 
    6958          65 :         if (!ctx) {
    6959           0 :                 cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOMEM);
    6960           0 :                 return;
    6961             :         }
    6962             : 
    6963          65 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
    6964          65 :         ctx->cpl.u.blobid.cb_fn = cb_fn;
    6965          65 :         ctx->cpl.u.blobid.cb_arg = cb_arg;
    6966          65 :         ctx->cpl.u.blobid.blobid = SPDK_BLOBID_INVALID;
    6967          65 :         ctx->bserrno = 0;
    6968          65 :         ctx->xattrs = clone_xattrs;
    6969          65 :         ctx->original.id = blobid;
    6970             : 
    6971          65 :         spdk_bs_open_blob(bs, ctx->original.id, bs_clone_origblob_open_cpl, ctx);
    6972          65 : }
    6973             : 
    6974             : /* END spdk_bs_create_clone */
    6975             : 
    6976             : /* START spdk_bs_inflate_blob */
    6977             : 
    6978             : static void
    6979          15 : bs_inflate_blob_set_parent_cpl(void *cb_arg, struct spdk_blob *_parent, int bserrno)
    6980             : {
    6981          15 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6982          15 :         struct spdk_blob *_blob = ctx->original.blob;
    6983             : 
    6984          15 :         if (bserrno != 0) {
    6985           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6986           0 :                 return;
    6987             :         }
    6988             : 
    6989             :         /* Temporarily override md_ro flag for MD modification */
    6990          15 :         _blob->md_ro = false;
    6991             : 
    6992          15 :         bserrno = blob_set_xattr(_blob, BLOB_SNAPSHOT, &_parent->id, sizeof(spdk_blob_id), true);
    6993          15 :         if (bserrno != 0) {
    6994           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6995           0 :                 return;
    6996             :         }
    6997             : 
    6998          15 :         assert(_parent != NULL);
    6999             : 
    7000          15 :         bs_blob_list_remove(_blob);
    7001          15 :         _blob->parent_id = _parent->id;
    7002             : 
    7003          15 :         blob_back_bs_destroy(_blob);
    7004          15 :         _blob->back_bs_dev = bs_create_blob_bs_dev(_parent);
    7005          15 :         bs_blob_list_add(_blob);
    7006             : 
    7007          15 :         spdk_blob_sync_md(_blob, bs_clone_snapshot_origblob_cleanup, ctx);
    7008          15 : }
    7009             : 
    7010             : static void
    7011          70 : bs_inflate_blob_done(struct spdk_clone_snapshot_ctx *ctx)
    7012             : {
    7013          70 :         struct spdk_blob *_blob = ctx->original.blob;
    7014             :         struct spdk_blob *_parent;
    7015             : 
    7016          70 :         if (ctx->allocate_all) {
    7017             :                 /* remove thin provisioning */
    7018          40 :                 bs_blob_list_remove(_blob);
    7019          40 :                 if (_blob->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    7020          10 :                         blob_remove_xattr(_blob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
    7021          10 :                         _blob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
    7022          10 :                 } else {
    7023          30 :                         blob_remove_xattr(_blob, BLOB_SNAPSHOT, true);
    7024             :                 }
    7025          40 :                 _blob->invalid_flags = _blob->invalid_flags & ~SPDK_BLOB_THIN_PROV;
    7026          40 :                 blob_back_bs_destroy(_blob);
    7027          40 :                 _blob->parent_id = SPDK_BLOBID_INVALID;
    7028          40 :         } else {
    7029             :                 /* For now, esnap clones always have allocate_all set. */
    7030          30 :                 assert(!blob_is_esnap_clone(_blob));
    7031             : 
    7032          30 :                 _parent = ((struct spdk_blob_bs_dev *)(_blob->back_bs_dev))->blob;
    7033          30 :                 if (_parent->parent_id != SPDK_BLOBID_INVALID) {
    7034             :                         /* We must change the parent of the inflated blob */
    7035          30 :                         spdk_bs_open_blob(_blob->bs, _parent->parent_id,
    7036          15 :                                           bs_inflate_blob_set_parent_cpl, ctx);
    7037          15 :                         return;
    7038             :                 }
    7039             : 
    7040          15 :                 bs_blob_list_remove(_blob);
    7041          15 :                 _blob->parent_id = SPDK_BLOBID_INVALID;
    7042          15 :                 blob_back_bs_destroy(_blob);
    7043          15 :                 _blob->back_bs_dev = bs_create_zeroes_dev();
    7044             :         }
    7045             : 
    7046             :         /* Temporarily override md_ro flag for MD modification */
    7047          55 :         _blob->md_ro = false;
    7048          55 :         blob_remove_xattr(_blob, BLOB_SNAPSHOT, true);
    7049          55 :         _blob->state = SPDK_BLOB_STATE_DIRTY;
    7050             : 
    7051          55 :         spdk_blob_sync_md(_blob, bs_clone_snapshot_origblob_cleanup, ctx);
    7052          70 : }
    7053             : 
    7054             : /* Check if cluster needs allocation */
    7055             : static inline bool
    7056        1500 : bs_cluster_needs_allocation(struct spdk_blob *blob, uint64_t cluster, bool allocate_all)
    7057             : {
    7058             :         struct spdk_blob_bs_dev *b;
    7059             : 
    7060        1500 :         assert(blob != NULL);
    7061             : 
    7062        1500 :         if (blob->active.clusters[cluster] != 0) {
    7063             :                 /* Cluster is already allocated */
    7064          40 :                 return false;
    7065             :         }
    7066             : 
    7067        1460 :         if (blob->parent_id == SPDK_BLOBID_INVALID) {
    7068             :                 /* Blob have no parent blob */
    7069         100 :                 return allocate_all;
    7070             :         }
    7071             : 
    7072        1360 :         if (blob->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    7073          80 :                 return true;
    7074             :         }
    7075             : 
    7076        1280 :         b = (struct spdk_blob_bs_dev *)blob->back_bs_dev;
    7077        1280 :         return (allocate_all || b->blob->active.clusters[cluster] != 0);
    7078        1500 : }
    7079             : 
    7080             : static void
    7081         635 : bs_inflate_blob_touch_next(void *cb_arg, int bserrno)
    7082             : {
    7083         635 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    7084         635 :         struct spdk_blob *_blob = ctx->original.blob;
    7085             :         struct spdk_bs_cpl cpl;
    7086             :         spdk_bs_user_op_t *op;
    7087             :         uint64_t offset;
    7088             : 
    7089         635 :         if (bserrno != 0) {
    7090           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    7091           0 :                 return;
    7092             :         }
    7093             : 
    7094         820 :         for (; ctx->cluster < _blob->active.num_clusters; ctx->cluster++) {
    7095         750 :                 if (bs_cluster_needs_allocation(_blob, ctx->cluster, ctx->allocate_all)) {
    7096         565 :                         break;
    7097             :                 }
    7098         185 :         }
    7099             : 
    7100         635 :         if (ctx->cluster < _blob->active.num_clusters) {
    7101         565 :                 offset = bs_cluster_to_lba(_blob->bs, ctx->cluster);
    7102             : 
    7103             :                 /* We may safely increment a cluster before copying */
    7104         565 :                 ctx->cluster++;
    7105             : 
    7106             :                 /* Use a dummy 0B read as a context for cluster copy */
    7107         565 :                 cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    7108         565 :                 cpl.u.blob_basic.cb_fn = bs_inflate_blob_touch_next;
    7109         565 :                 cpl.u.blob_basic.cb_arg = ctx;
    7110             : 
    7111        1130 :                 op = bs_user_op_alloc(ctx->channel, &cpl, SPDK_BLOB_READ, _blob,
    7112         565 :                                       NULL, 0, offset, 0);
    7113         565 :                 if (!op) {
    7114           0 :                         bs_clone_snapshot_origblob_cleanup(ctx, -ENOMEM);
    7115           0 :                         return;
    7116             :                 }
    7117             : 
    7118         565 :                 bs_allocate_and_copy_cluster(_blob, ctx->channel, offset, op);
    7119         565 :         } else {
    7120          70 :                 bs_inflate_blob_done(ctx);
    7121             :         }
    7122         635 : }
    7123             : 
    7124             : static void
    7125          75 : bs_inflate_blob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    7126             : {
    7127          75 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    7128             :         uint64_t clusters_needed;
    7129             :         uint64_t i;
    7130             : 
    7131          75 :         if (bserrno != 0) {
    7132           0 :                 bs_clone_snapshot_cleanup_finish(ctx, bserrno);
    7133           0 :                 return;
    7134             :         }
    7135             : 
    7136          75 :         ctx->original.blob = _blob;
    7137          75 :         ctx->original.md_ro = _blob->md_ro;
    7138             : 
    7139          75 :         if (_blob->locked_operation_in_progress) {
    7140           0 :                 SPDK_DEBUGLOG(blob, "Cannot inflate blob - another operation in progress\n");
    7141           0 :                 ctx->bserrno = -EBUSY;
    7142           0 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    7143           0 :                 return;
    7144             :         }
    7145             : 
    7146          75 :         _blob->locked_operation_in_progress = true;
    7147             : 
    7148          75 :         switch (_blob->parent_id) {
    7149             :         case SPDK_BLOBID_INVALID:
    7150          10 :                 if (!ctx->allocate_all) {
    7151             :                         /* This blob has no parent, so we cannot decouple it. */
    7152           5 :                         SPDK_ERRLOG("Cannot decouple parent of blob with no parent.\n");
    7153           5 :                         bs_clone_snapshot_origblob_cleanup(ctx, -EINVAL);
    7154           5 :                         return;
    7155             :                 }
    7156           5 :                 break;
    7157             :         case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
    7158             :                 /*
    7159             :                  * It would be better to rely on back_bs_dev->is_zeroes(), to determine which
    7160             :                  * clusters require allocation. Until there is a blobstore consumer that
    7161             :                  * uses esnaps with an spdk_bs_dev that implements a useful is_zeroes() it is not
    7162             :                  * worth the effort.
    7163             :                  */
    7164          10 :                 ctx->allocate_all = true;
    7165          10 :                 break;
    7166             :         default:
    7167          55 :                 break;
    7168             :         }
    7169             : 
    7170          70 :         if (spdk_blob_is_thin_provisioned(_blob) == false) {
    7171             :                 /* This is not thin provisioned blob. No need to inflate. */
    7172           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, 0);
    7173           0 :                 return;
    7174             :         }
    7175             : 
    7176             :         /* Do two passes - one to verify that we can obtain enough clusters
    7177             :          * and another to actually claim them.
    7178             :          */
    7179          70 :         clusters_needed = 0;
    7180         820 :         for (i = 0; i < _blob->active.num_clusters; i++) {
    7181         750 :                 if (bs_cluster_needs_allocation(_blob, i, ctx->allocate_all)) {
    7182         565 :                         clusters_needed++;
    7183         565 :                 }
    7184         750 :         }
    7185             : 
    7186          70 :         if (clusters_needed > _blob->bs->num_free_clusters) {
    7187             :                 /* Not enough free clusters. Cannot satisfy the request. */
    7188           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, -ENOSPC);
    7189           0 :                 return;
    7190             :         }
    7191             : 
    7192          70 :         ctx->cluster = 0;
    7193          70 :         bs_inflate_blob_touch_next(ctx, 0);
    7194          75 : }
    7195             : 
    7196             : static void
    7197          75 : bs_inflate_blob(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
    7198             :                 spdk_blob_id blobid, bool allocate_all, spdk_blob_op_complete cb_fn, void *cb_arg)
    7199             : {
    7200          75 :         struct spdk_clone_snapshot_ctx *ctx = calloc(1, sizeof(*ctx));
    7201             : 
    7202          75 :         if (!ctx) {
    7203           0 :                 cb_fn(cb_arg, -ENOMEM);
    7204           0 :                 return;
    7205             :         }
    7206          75 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    7207          75 :         ctx->cpl.u.bs_basic.cb_fn = cb_fn;
    7208          75 :         ctx->cpl.u.bs_basic.cb_arg = cb_arg;
    7209          75 :         ctx->bserrno = 0;
    7210          75 :         ctx->original.id = blobid;
    7211          75 :         ctx->channel = channel;
    7212          75 :         ctx->allocate_all = allocate_all;
    7213             : 
    7214          75 :         spdk_bs_open_blob(bs, ctx->original.id, bs_inflate_blob_open_cpl, ctx);
    7215          75 : }
    7216             : 
    7217             : void
    7218          35 : spdk_bs_inflate_blob(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
    7219             :                      spdk_blob_id blobid, spdk_blob_op_complete cb_fn, void *cb_arg)
    7220             : {
    7221          35 :         bs_inflate_blob(bs, channel, blobid, true, cb_fn, cb_arg);
    7222          35 : }
    7223             : 
    7224             : void
    7225          40 : spdk_bs_blob_decouple_parent(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
    7226             :                              spdk_blob_id blobid, spdk_blob_op_complete cb_fn, void *cb_arg)
    7227             : {
    7228          40 :         bs_inflate_blob(bs, channel, blobid, false, cb_fn, cb_arg);
    7229          40 : }
    7230             : /* END spdk_bs_inflate_blob */
    7231             : 
    7232             : /* START spdk_bs_blob_shallow_copy */
    7233             : 
    7234             : struct shallow_copy_ctx {
    7235             :         struct spdk_bs_cpl cpl;
    7236             :         int bserrno;
    7237             : 
    7238             :         /* Blob source for copy */
    7239             :         struct spdk_blob_store *bs;
    7240             :         spdk_blob_id blobid;
    7241             :         struct spdk_blob *blob;
    7242             :         struct spdk_io_channel *blob_channel;
    7243             : 
    7244             :         /* Destination device for copy */
    7245             :         struct spdk_bs_dev *ext_dev;
    7246             :         struct spdk_io_channel *ext_channel;
    7247             : 
    7248             :         /* Current cluster for copy operation */
    7249             :         uint64_t cluster;
    7250             : 
    7251             :         /* Buffer for blob reading */
    7252             :         uint8_t *read_buff;
    7253             : 
    7254             :         /* Struct for external device writing */
    7255             :         struct spdk_bs_dev_cb_args ext_args;
    7256             : 
    7257             :         /* Actual number of copied clusters */
    7258             :         uint64_t copied_clusters_count;
    7259             : 
    7260             :         /* Status callback for updates about the ongoing operation */
    7261             :         spdk_blob_shallow_copy_status status_cb;
    7262             : 
    7263             :         /* Argument passed to function status_cb */
    7264             :         void *status_cb_arg;
    7265             : };
    7266             : 
    7267             : static void
    7268          20 : bs_shallow_copy_cleanup_finish(void *cb_arg, int bserrno)
    7269             : {
    7270          20 :         struct shallow_copy_ctx *ctx = cb_arg;
    7271          20 :         struct spdk_bs_cpl *cpl = &ctx->cpl;
    7272             : 
    7273          20 :         if (bserrno != 0) {
    7274           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, cleanup error %d\n", ctx->blob->id, bserrno);
    7275           0 :                 ctx->bserrno = bserrno;
    7276           0 :         }
    7277             : 
    7278          20 :         ctx->ext_dev->destroy_channel(ctx->ext_dev, ctx->ext_channel);
    7279          20 :         spdk_free(ctx->read_buff);
    7280             : 
    7281          20 :         cpl->u.blob_basic.cb_fn(cpl->u.blob_basic.cb_arg, ctx->bserrno);
    7282             : 
    7283          20 :         free(ctx);
    7284          20 : }
    7285             : 
    7286             : static void
    7287          10 : bs_shallow_copy_bdev_write_cpl(struct spdk_io_channel *channel, void *cb_arg, int bserrno)
    7288             : {
    7289          10 :         struct shallow_copy_ctx *ctx = cb_arg;
    7290          10 :         struct spdk_blob *_blob = ctx->blob;
    7291             : 
    7292          10 :         if (bserrno != 0) {
    7293           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, ext dev write error %d\n", ctx->blob->id, bserrno);
    7294           0 :                 ctx->bserrno = bserrno;
    7295           0 :                 _blob->locked_operation_in_progress = false;
    7296           0 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7297           0 :                 return;
    7298             :         }
    7299             : 
    7300          10 :         ctx->cluster++;
    7301          10 :         if (ctx->status_cb) {
    7302          10 :                 ctx->copied_clusters_count++;
    7303          10 :                 ctx->status_cb(ctx->copied_clusters_count, ctx->status_cb_arg);
    7304          10 :         }
    7305             : 
    7306          10 :         bs_shallow_copy_cluster_find_next(ctx);
    7307          10 : }
    7308             : 
    7309             : static void
    7310          10 : bs_shallow_copy_blob_read_cpl(void *cb_arg, int bserrno)
    7311             : {
    7312          10 :         struct shallow_copy_ctx *ctx = cb_arg;
    7313          10 :         struct spdk_bs_dev *ext_dev = ctx->ext_dev;
    7314          10 :         struct spdk_blob *_blob = ctx->blob;
    7315             : 
    7316          10 :         if (bserrno != 0) {
    7317           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, blob read error %d\n", ctx->blob->id, bserrno);
    7318           0 :                 ctx->bserrno = bserrno;
    7319           0 :                 _blob->locked_operation_in_progress = false;
    7320           0 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7321           0 :                 return;
    7322             :         }
    7323             : 
    7324          10 :         ctx->ext_args.channel = ctx->ext_channel;
    7325          10 :         ctx->ext_args.cb_fn = bs_shallow_copy_bdev_write_cpl;
    7326          10 :         ctx->ext_args.cb_arg = ctx;
    7327             : 
    7328          20 :         ext_dev->write(ext_dev, ctx->ext_channel, ctx->read_buff,
    7329          10 :                        bs_cluster_to_lba(_blob->bs, ctx->cluster),
    7330          10 :                        bs_dev_byte_to_lba(_blob->bs->dev, _blob->bs->cluster_sz),
    7331          10 :                        &ctx->ext_args);
    7332          10 : }
    7333             : 
    7334             : static void
    7335          15 : bs_shallow_copy_cluster_find_next(void *cb_arg)
    7336             : {
    7337          15 :         struct shallow_copy_ctx *ctx = cb_arg;
    7338          15 :         struct spdk_blob *_blob = ctx->blob;
    7339             : 
    7340          25 :         while (ctx->cluster < _blob->active.num_clusters) {
    7341          20 :                 if (_blob->active.clusters[ctx->cluster] != 0) {
    7342          10 :                         break;
    7343             :                 }
    7344             : 
    7345          10 :                 ctx->cluster++;
    7346             :         }
    7347             : 
    7348          15 :         if (ctx->cluster < _blob->active.num_clusters) {
    7349          20 :                 blob_request_submit_op_single(ctx->blob_channel, _blob, ctx->read_buff,
    7350          10 :                                               bs_cluster_to_lba(_blob->bs, ctx->cluster),
    7351          10 :                                               bs_dev_byte_to_lba(_blob->bs->dev, _blob->bs->cluster_sz),
    7352          10 :                                               bs_shallow_copy_blob_read_cpl, ctx, SPDK_BLOB_READ);
    7353          10 :         } else {
    7354           5 :                 _blob->locked_operation_in_progress = false;
    7355           5 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7356             :         }
    7357          15 : }
    7358             : 
    7359             : static void
    7360          20 : bs_shallow_copy_blob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    7361             : {
    7362          20 :         struct shallow_copy_ctx *ctx = cb_arg;
    7363          20 :         struct spdk_bs_dev *ext_dev = ctx->ext_dev;
    7364             :         uint32_t blob_block_size;
    7365             :         uint64_t blob_total_size;
    7366             : 
    7367          20 :         if (bserrno != 0) {
    7368           0 :                 SPDK_ERRLOG("Shallow copy blob open error %d\n", bserrno);
    7369           0 :                 ctx->bserrno = bserrno;
    7370           0 :                 bs_shallow_copy_cleanup_finish(ctx, 0);
    7371           0 :                 return;
    7372             :         }
    7373             : 
    7374          20 :         if (!spdk_blob_is_read_only(_blob)) {
    7375           5 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, blob must be read only\n", _blob->id);
    7376           5 :                 ctx->bserrno = -EPERM;
    7377           5 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7378           5 :                 return;
    7379             :         }
    7380             : 
    7381          15 :         blob_block_size = _blob->bs->dev->blocklen;
    7382          15 :         blob_total_size = spdk_blob_get_num_clusters(_blob) * spdk_bs_get_cluster_size(_blob->bs);
    7383             : 
    7384          15 :         if (blob_total_size > ext_dev->blockcnt * ext_dev->blocklen) {
    7385           5 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, external device must have at least blob size\n",
    7386             :                             _blob->id);
    7387           5 :                 ctx->bserrno = -EINVAL;
    7388           5 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7389           5 :                 return;
    7390             :         }
    7391             : 
    7392          10 :         if (blob_block_size % ext_dev->blocklen != 0) {
    7393           5 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, external device block size is not compatible with \
    7394             : blobstore block size\n", _blob->id);
    7395           5 :                 ctx->bserrno = -EINVAL;
    7396           5 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7397           5 :                 return;
    7398             :         }
    7399             : 
    7400           5 :         ctx->blob = _blob;
    7401             : 
    7402           5 :         if (_blob->locked_operation_in_progress) {
    7403           0 :                 SPDK_DEBUGLOG(blob, "blob 0x%" PRIx64 " shallow copy - another operation in progress\n", _blob->id);
    7404           0 :                 ctx->bserrno = -EBUSY;
    7405           0 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7406           0 :                 return;
    7407             :         }
    7408             : 
    7409           5 :         _blob->locked_operation_in_progress = true;
    7410             : 
    7411           5 :         ctx->cluster = 0;
    7412           5 :         bs_shallow_copy_cluster_find_next(ctx);
    7413          20 : }
    7414             : 
    7415             : int
    7416          20 : spdk_bs_blob_shallow_copy(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
    7417             :                           spdk_blob_id blobid, struct spdk_bs_dev *ext_dev,
    7418             :                           spdk_blob_shallow_copy_status status_cb_fn, void *status_cb_arg,
    7419             :                           spdk_blob_op_complete cb_fn, void *cb_arg)
    7420             : {
    7421             :         struct shallow_copy_ctx *ctx;
    7422             :         struct spdk_io_channel *ext_channel;
    7423             : 
    7424          20 :         ctx = calloc(1, sizeof(*ctx));
    7425          20 :         if (!ctx) {
    7426           0 :                 return -ENOMEM;
    7427             :         }
    7428             : 
    7429          20 :         ctx->bs = bs;
    7430          20 :         ctx->blobid = blobid;
    7431          20 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    7432          20 :         ctx->cpl.u.bs_basic.cb_fn = cb_fn;
    7433          20 :         ctx->cpl.u.bs_basic.cb_arg = cb_arg;
    7434          20 :         ctx->bserrno = 0;
    7435          20 :         ctx->blob_channel = channel;
    7436          20 :         ctx->status_cb = status_cb_fn;
    7437          20 :         ctx->status_cb_arg = status_cb_arg;
    7438          20 :         ctx->read_buff = spdk_malloc(bs->cluster_sz, bs->dev->blocklen, NULL,
    7439             :                                      SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
    7440          20 :         if (!ctx->read_buff) {
    7441           0 :                 free(ctx);
    7442           0 :                 return -ENOMEM;
    7443             :         }
    7444             : 
    7445          20 :         ext_channel = ext_dev->create_channel(ext_dev);
    7446          20 :         if (!ext_channel) {
    7447           0 :                 spdk_free(ctx->read_buff);
    7448           0 :                 free(ctx);
    7449           0 :                 return -ENOMEM;
    7450             :         }
    7451          20 :         ctx->ext_dev = ext_dev;
    7452          20 :         ctx->ext_channel = ext_channel;
    7453             : 
    7454          20 :         spdk_bs_open_blob(ctx->bs, ctx->blobid, bs_shallow_copy_blob_open_cpl, ctx);
    7455             : 
    7456          20 :         return 0;
    7457          20 : }
    7458             : /* END spdk_bs_blob_shallow_copy */
    7459             : 
    7460             : /* START spdk_bs_blob_set_parent */
    7461             : 
    7462             : struct set_parent_ctx {
    7463             :         struct spdk_blob_store *bs;
    7464             :         int                     bserrno;
    7465             :         spdk_bs_op_complete     cb_fn;
    7466             :         void                    *cb_arg;
    7467             : 
    7468             :         struct spdk_blob        *blob;
    7469             :         bool                    blob_md_ro;
    7470             : 
    7471             :         struct blob_parent      parent;
    7472             : };
    7473             : 
    7474             : static void
    7475          30 : bs_set_parent_cleanup_finish(void *cb_arg, int bserrno)
    7476             : {
    7477          30 :         struct set_parent_ctx *ctx = cb_arg;
    7478             : 
    7479          30 :         assert(ctx != NULL);
    7480             : 
    7481          30 :         if (bserrno != 0) {
    7482           0 :                 SPDK_ERRLOG("blob set parent finish error %d\n", bserrno);
    7483           0 :                 if (ctx->bserrno == 0) {
    7484           0 :                         ctx->bserrno = bserrno;
    7485           0 :                 }
    7486           0 :         }
    7487             : 
    7488          30 :         ctx->cb_fn(ctx->cb_arg, ctx->bserrno);
    7489             : 
    7490          30 :         free(ctx);
    7491          30 : }
    7492             : 
    7493             : static void
    7494          25 : bs_set_parent_close_snapshot(void *cb_arg, int bserrno)
    7495             : {
    7496          25 :         struct set_parent_ctx *ctx = cb_arg;
    7497             : 
    7498          25 :         if (ctx->bserrno != 0) {
    7499          10 :                 spdk_blob_close(ctx->parent.u.snapshot.blob, bs_set_parent_cleanup_finish, ctx);
    7500          10 :                 return;
    7501             :         }
    7502             : 
    7503          15 :         if (bserrno != 0) {
    7504           0 :                 SPDK_ERRLOG("blob close error %d\n", bserrno);
    7505           0 :                 ctx->bserrno = bserrno;
    7506           0 :         }
    7507             : 
    7508          15 :         bs_set_parent_cleanup_finish(ctx, ctx->bserrno);
    7509          25 : }
    7510             : 
    7511             : static void
    7512          15 : bs_set_parent_close_blob(void *cb_arg, int bserrno)
    7513             : {
    7514          15 :         struct set_parent_ctx *ctx = cb_arg;
    7515          15 :         struct spdk_blob *blob = ctx->blob;
    7516          15 :         struct spdk_blob *snapshot = ctx->parent.u.snapshot.blob;
    7517             : 
    7518          15 :         if (bserrno != 0 && ctx->bserrno == 0) {
    7519           0 :                 SPDK_ERRLOG("error %d in metadata sync\n", bserrno);
    7520           0 :                 ctx->bserrno = bserrno;
    7521           0 :         }
    7522             : 
    7523             :         /* Revert md_ro to original state */
    7524          15 :         blob->md_ro = ctx->blob_md_ro;
    7525             : 
    7526          15 :         blob->locked_operation_in_progress = false;
    7527          15 :         snapshot->locked_operation_in_progress = false;
    7528             : 
    7529          15 :         spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
    7530          15 : }
    7531             : 
    7532             : static void
    7533          15 : bs_set_parent_set_back_bs_dev_done(void *cb_arg, int bserrno)
    7534             : {
    7535          15 :         struct set_parent_ctx *ctx = cb_arg;
    7536          15 :         struct spdk_blob *blob = ctx->blob;
    7537             : 
    7538          15 :         if (bserrno != 0) {
    7539           0 :                 SPDK_ERRLOG("error %d setting back_bs_dev\n", bserrno);
    7540           0 :                 ctx->bserrno = bserrno;
    7541           0 :                 bs_set_parent_close_blob(ctx, bserrno);
    7542           0 :                 return;
    7543             :         }
    7544             : 
    7545          15 :         spdk_blob_sync_md(blob, bs_set_parent_close_blob, ctx);
    7546          15 : }
    7547             : 
    7548             : static int
    7549          15 : bs_set_parent_refs(struct spdk_blob *blob, struct blob_parent *parent)
    7550             : {
    7551             :         int rc;
    7552             : 
    7553          15 :         bs_blob_list_remove(blob);
    7554             : 
    7555          15 :         rc = blob_set_xattr(blob, BLOB_SNAPSHOT, &parent->u.snapshot.id, sizeof(spdk_blob_id), true);
    7556          15 :         if (rc != 0) {
    7557           0 :                 SPDK_ERRLOG("error %d setting snapshot xattr\n", rc);
    7558           0 :                 return rc;
    7559             :         }
    7560          15 :         blob->parent_id = parent->u.snapshot.id;
    7561             : 
    7562          15 :         if (blob_is_esnap_clone(blob)) {
    7563             :                 /* Remove the xattr that references the external snapshot */
    7564           5 :                 blob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
    7565           5 :                 blob_remove_xattr(blob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
    7566           5 :         }
    7567             : 
    7568          15 :         bs_blob_list_add(blob);
    7569             : 
    7570          15 :         return 0;
    7571          15 : }
    7572             : 
    7573             : static void
    7574          25 : bs_set_parent_snapshot_open_cpl(void *cb_arg, struct spdk_blob *snapshot, int bserrno)
    7575             : {
    7576          25 :         struct set_parent_ctx *ctx = cb_arg;
    7577          25 :         struct spdk_blob *blob = ctx->blob;
    7578             :         struct spdk_bs_dev *back_bs_dev;
    7579             : 
    7580          25 :         if (bserrno != 0) {
    7581           0 :                 SPDK_ERRLOG("snapshot open error %d\n", bserrno);
    7582           0 :                 ctx->bserrno = bserrno;
    7583           0 :                 spdk_blob_close(blob, bs_set_parent_cleanup_finish, ctx);
    7584           0 :                 return;
    7585             :         }
    7586             : 
    7587          25 :         ctx->parent.u.snapshot.blob = snapshot;
    7588          25 :         ctx->parent.u.snapshot.id = snapshot->id;
    7589             : 
    7590          25 :         if (!spdk_blob_is_snapshot(snapshot)) {
    7591           5 :                 SPDK_ERRLOG("parent blob is not a snapshot\n");
    7592           5 :                 ctx->bserrno = -EINVAL;
    7593           5 :                 spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
    7594           5 :                 return;
    7595             :         }
    7596             : 
    7597          20 :         if (blob->active.num_clusters != snapshot->active.num_clusters) {
    7598           5 :                 SPDK_ERRLOG("parent blob has a number of clusters different from child's ones\n");
    7599           5 :                 ctx->bserrno = -EINVAL;
    7600           5 :                 spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
    7601           5 :                 return;
    7602             :         }
    7603             : 
    7604          15 :         if (blob->locked_operation_in_progress || snapshot->locked_operation_in_progress) {
    7605           0 :                 SPDK_ERRLOG("cannot set parent of blob, another operation in progress\n");
    7606           0 :                 ctx->bserrno = -EBUSY;
    7607           0 :                 spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
    7608           0 :                 return;
    7609             :         }
    7610             : 
    7611          15 :         blob->locked_operation_in_progress = true;
    7612          15 :         snapshot->locked_operation_in_progress = true;
    7613             : 
    7614             :         /* Temporarily override md_ro flag for MD modification */
    7615          15 :         blob->md_ro = false;
    7616             : 
    7617          15 :         back_bs_dev = bs_create_blob_bs_dev(snapshot);
    7618             : 
    7619          30 :         blob_set_back_bs_dev(blob, back_bs_dev, bs_set_parent_refs, &ctx->parent,
    7620             :                              bs_set_parent_set_back_bs_dev_done,
    7621          15 :                              ctx);
    7622          25 : }
    7623             : 
    7624             : static void
    7625          30 : bs_set_parent_blob_open_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno)
    7626             : {
    7627          30 :         struct set_parent_ctx *ctx = cb_arg;
    7628             : 
    7629          30 :         if (bserrno != 0) {
    7630           0 :                 SPDK_ERRLOG("blob open error %d\n", bserrno);
    7631           0 :                 ctx->bserrno = bserrno;
    7632           0 :                 bs_set_parent_cleanup_finish(ctx, 0);
    7633           0 :                 return;
    7634             :         }
    7635             : 
    7636          30 :         if (!spdk_blob_is_thin_provisioned(blob)) {
    7637           5 :                 SPDK_ERRLOG("blob is not thin-provisioned\n");
    7638           5 :                 ctx->bserrno = -EINVAL;
    7639           5 :                 spdk_blob_close(blob, bs_set_parent_cleanup_finish, ctx);
    7640           5 :                 return;
    7641             :         }
    7642             : 
    7643          25 :         ctx->blob = blob;
    7644          25 :         ctx->blob_md_ro = blob->md_ro;
    7645             : 
    7646          25 :         spdk_bs_open_blob(ctx->bs, ctx->parent.u.snapshot.id, bs_set_parent_snapshot_open_cpl, ctx);
    7647          30 : }
    7648             : 
    7649             : void
    7650          45 : spdk_bs_blob_set_parent(struct spdk_blob_store *bs, spdk_blob_id blob_id,
    7651             :                         spdk_blob_id snapshot_id, spdk_blob_op_complete cb_fn, void *cb_arg)
    7652             : {
    7653             :         struct set_parent_ctx *ctx;
    7654             : 
    7655          45 :         if (snapshot_id == SPDK_BLOBID_INVALID) {
    7656           5 :                 SPDK_ERRLOG("snapshot id not valid\n");
    7657           5 :                 cb_fn(cb_arg, -EINVAL);
    7658           5 :                 return;
    7659             :         }
    7660             : 
    7661          40 :         if (blob_id == snapshot_id) {
    7662           5 :                 SPDK_ERRLOG("blob id and snapshot id cannot be the same\n");
    7663           5 :                 cb_fn(cb_arg, -EINVAL);
    7664           5 :                 return;
    7665             :         }
    7666             : 
    7667          35 :         if (spdk_blob_get_parent_snapshot(bs, blob_id) == snapshot_id) {
    7668           5 :                 SPDK_NOTICELOG("snapshot is already the parent of blob\n");
    7669           5 :                 cb_fn(cb_arg, -EEXIST);
    7670           5 :                 return;
    7671             :         }
    7672             : 
    7673          30 :         ctx = calloc(1, sizeof(*ctx));
    7674          30 :         if (!ctx) {
    7675           0 :                 cb_fn(cb_arg, -ENOMEM);
    7676           0 :                 return;
    7677             :         }
    7678             : 
    7679          30 :         ctx->bs = bs;
    7680          30 :         ctx->parent.u.snapshot.id = snapshot_id;
    7681          30 :         ctx->cb_fn = cb_fn;
    7682          30 :         ctx->cb_arg = cb_arg;
    7683          30 :         ctx->bserrno = 0;
    7684             : 
    7685          30 :         spdk_bs_open_blob(bs, blob_id, bs_set_parent_blob_open_cpl, ctx);
    7686          45 : }
    7687             : /* END spdk_bs_blob_set_parent */
    7688             : 
    7689             : /* START spdk_bs_blob_set_external_parent */
    7690             : 
    7691             : static void
    7692          20 : bs_set_external_parent_cleanup_finish(void *cb_arg, int bserrno)
    7693             : {
    7694          20 :         struct set_parent_ctx *ctx = cb_arg;
    7695             : 
    7696          20 :         if (bserrno != 0) {
    7697           0 :                 SPDK_ERRLOG("blob set external parent finish error %d\n", bserrno);
    7698           0 :                 if (ctx->bserrno == 0) {
    7699           0 :                         ctx->bserrno = bserrno;
    7700           0 :                 }
    7701           0 :         }
    7702             : 
    7703          20 :         ctx->cb_fn(ctx->cb_arg, ctx->bserrno);
    7704             : 
    7705          20 :         free(ctx->parent.u.esnap.id);
    7706          20 :         free(ctx);
    7707          20 : }
    7708             : 
    7709             : static void
    7710          10 : bs_set_external_parent_close_blob(void *cb_arg, int bserrno)
    7711             : {
    7712          10 :         struct set_parent_ctx *ctx = cb_arg;
    7713          10 :         struct spdk_blob *blob = ctx->blob;
    7714             : 
    7715          10 :         if (bserrno != 0 && ctx->bserrno == 0) {
    7716           0 :                 SPDK_ERRLOG("error %d in metadata sync\n", bserrno);
    7717           0 :                 ctx->bserrno = bserrno;
    7718           0 :         }
    7719             : 
    7720             :         /* Revert md_ro to original state */
    7721          10 :         blob->md_ro = ctx->blob_md_ro;
    7722             : 
    7723          10 :         blob->locked_operation_in_progress = false;
    7724             : 
    7725          10 :         spdk_blob_close(blob, bs_set_external_parent_cleanup_finish, ctx);
    7726          10 : }
    7727             : 
    7728             : static void
    7729          10 : bs_set_external_parent_unfrozen(void *cb_arg, int bserrno)
    7730             : {
    7731          10 :         struct set_parent_ctx *ctx = cb_arg;
    7732          10 :         struct spdk_blob *blob = ctx->blob;
    7733             : 
    7734          10 :         if (bserrno != 0) {
    7735           0 :                 SPDK_ERRLOG("error %d setting back_bs_dev\n", bserrno);
    7736           0 :                 ctx->bserrno = bserrno;
    7737           0 :                 bs_set_external_parent_close_blob(ctx, bserrno);
    7738           0 :                 return;
    7739             :         }
    7740             : 
    7741          10 :         spdk_blob_sync_md(blob, bs_set_external_parent_close_blob, ctx);
    7742          10 : }
    7743             : 
    7744             : static int
    7745          10 : bs_set_external_parent_refs(struct spdk_blob *blob, struct blob_parent *parent)
    7746             : {
    7747             :         int rc;
    7748             : 
    7749          10 :         bs_blob_list_remove(blob);
    7750             : 
    7751          10 :         if (spdk_blob_is_clone(blob)) {
    7752             :                 /* Remove the xattr that references the snapshot */
    7753           0 :                 blob->parent_id = SPDK_BLOBID_INVALID;
    7754           0 :                 blob_remove_xattr(blob, BLOB_SNAPSHOT, true);
    7755           0 :         }
    7756             : 
    7757          20 :         rc = blob_set_xattr(blob, BLOB_EXTERNAL_SNAPSHOT_ID, parent->u.esnap.id,
    7758          10 :                             parent->u.esnap.id_len, true);
    7759          10 :         if (rc != 0) {
    7760           0 :                 SPDK_ERRLOG("error %d setting external snapshot xattr\n", rc);
    7761           0 :                 return rc;
    7762             :         }
    7763          10 :         blob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
    7764          10 :         blob->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
    7765             : 
    7766          10 :         bs_blob_list_add(blob);
    7767             : 
    7768          10 :         return 0;
    7769          10 : }
    7770             : 
    7771             : static void
    7772          20 : bs_set_external_parent_blob_open_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno)
    7773             : {
    7774          20 :         struct set_parent_ctx *ctx = cb_arg;
    7775             :         const void *esnap_id;
    7776             :         size_t esnap_id_len;
    7777             :         int rc;
    7778             : 
    7779          20 :         if (bserrno != 0) {
    7780           0 :                 SPDK_ERRLOG("blob open error %d\n", bserrno);
    7781           0 :                 ctx->bserrno = bserrno;
    7782           0 :                 bs_set_parent_cleanup_finish(ctx, 0);
    7783           0 :                 return;
    7784             :         }
    7785             : 
    7786          20 :         ctx->blob = blob;
    7787          20 :         ctx->blob_md_ro = blob->md_ro;
    7788             : 
    7789          20 :         rc = spdk_blob_get_esnap_id(blob, &esnap_id, &esnap_id_len);
    7790          20 :         if (rc == 0 && esnap_id != NULL && esnap_id_len == ctx->parent.u.esnap.id_len &&
    7791           5 :             memcmp(esnap_id, ctx->parent.u.esnap.id, esnap_id_len) == 0) {
    7792           5 :                 SPDK_ERRLOG("external snapshot is already the parent of blob\n");
    7793           5 :                 ctx->bserrno = -EEXIST;
    7794           5 :                 goto error;
    7795             :         }
    7796             : 
    7797          15 :         if (!spdk_blob_is_thin_provisioned(blob)) {
    7798           5 :                 SPDK_ERRLOG("blob is not thin-provisioned\n");
    7799           5 :                 ctx->bserrno = -EINVAL;
    7800           5 :                 goto error;
    7801             :         }
    7802             : 
    7803          10 :         if (blob->locked_operation_in_progress) {
    7804           0 :                 SPDK_ERRLOG("cannot set external parent of blob, another operation in progress\n");
    7805           0 :                 ctx->bserrno = -EBUSY;
    7806           0 :                 goto error;
    7807             :         }
    7808             : 
    7809          10 :         blob->locked_operation_in_progress = true;
    7810             : 
    7811             :         /* Temporarily override md_ro flag for MD modification */
    7812          10 :         blob->md_ro = false;
    7813             : 
    7814          20 :         blob_set_back_bs_dev(blob, ctx->parent.u.esnap.back_bs_dev, bs_set_external_parent_refs,
    7815          10 :                              &ctx->parent, bs_set_external_parent_unfrozen, ctx);
    7816          10 :         return;
    7817             : 
    7818             : error:
    7819          10 :         spdk_blob_close(blob, bs_set_external_parent_cleanup_finish, ctx);
    7820          20 : }
    7821             : 
    7822             : void
    7823          30 : spdk_bs_blob_set_external_parent(struct spdk_blob_store *bs, spdk_blob_id blob_id,
    7824             :                                  struct spdk_bs_dev *esnap_bs_dev, const void *esnap_id,
    7825             :                                  uint32_t esnap_id_len, spdk_blob_op_complete cb_fn, void *cb_arg)
    7826             : {
    7827             :         struct set_parent_ctx *ctx;
    7828             :         uint64_t esnap_dev_size, cluster_sz;
    7829             : 
    7830          30 :         if (sizeof(blob_id) == esnap_id_len && memcmp(&blob_id, esnap_id, sizeof(blob_id)) == 0) {
    7831           5 :                 SPDK_ERRLOG("blob id and external snapshot id cannot be the same\n");
    7832           5 :                 cb_fn(cb_arg, -EINVAL);
    7833           5 :                 return;
    7834             :         }
    7835             : 
    7836          25 :         esnap_dev_size = esnap_bs_dev->blockcnt * esnap_bs_dev->blocklen;
    7837          25 :         cluster_sz = spdk_bs_get_cluster_size(bs);
    7838          25 :         if ((esnap_dev_size % cluster_sz) != 0) {
    7839           5 :                 SPDK_ERRLOG("Esnap device size %" PRIu64 " is not an integer multiple of "
    7840             :                             "cluster size %" PRIu64 "\n", esnap_dev_size, cluster_sz);
    7841           5 :                 cb_fn(cb_arg, -EINVAL);
    7842           5 :                 return;
    7843             :         }
    7844             : 
    7845          20 :         ctx = calloc(1, sizeof(*ctx));
    7846          20 :         if (!ctx) {
    7847           0 :                 cb_fn(cb_arg, -ENOMEM);
    7848           0 :                 return;
    7849             :         }
    7850             : 
    7851          20 :         ctx->parent.u.esnap.id = calloc(1, esnap_id_len);
    7852          20 :         if (!ctx->parent.u.esnap.id) {
    7853           0 :                 free(ctx);
    7854           0 :                 cb_fn(cb_arg, -ENOMEM);
    7855           0 :                 return;
    7856             :         }
    7857             : 
    7858          20 :         ctx->bs = bs;
    7859          20 :         ctx->parent.u.esnap.back_bs_dev = esnap_bs_dev;
    7860          20 :         memcpy(ctx->parent.u.esnap.id, esnap_id, esnap_id_len);
    7861          20 :         ctx->parent.u.esnap.id_len = esnap_id_len;
    7862          20 :         ctx->cb_fn = cb_fn;
    7863          20 :         ctx->cb_arg = cb_arg;
    7864          20 :         ctx->bserrno = 0;
    7865             : 
    7866          20 :         spdk_bs_open_blob(bs, blob_id, bs_set_external_parent_blob_open_cpl, ctx);
    7867          30 : }
    7868             : /* END spdk_bs_blob_set_external_parent */
    7869             : 
    7870             : /* START spdk_blob_resize */
    7871             : struct spdk_bs_resize_ctx {
    7872             :         spdk_blob_op_complete cb_fn;
    7873             :         void *cb_arg;
    7874             :         struct spdk_blob *blob;
    7875             :         uint64_t sz;
    7876             :         int rc;
    7877             : };
    7878             : 
    7879             : static void
    7880         252 : bs_resize_unfreeze_cpl(void *cb_arg, int rc)
    7881             : {
    7882         252 :         struct spdk_bs_resize_ctx *ctx = (struct spdk_bs_resize_ctx *)cb_arg;
    7883             : 
    7884         252 :         if (rc != 0) {
    7885           0 :                 SPDK_ERRLOG("Unfreeze failed, rc=%d\n", rc);
    7886           0 :         }
    7887             : 
    7888         252 :         if (ctx->rc != 0) {
    7889           5 :                 SPDK_ERRLOG("Unfreeze failed, ctx->rc=%d\n", ctx->rc);
    7890           5 :                 rc = ctx->rc;
    7891           5 :         }
    7892             : 
    7893         252 :         ctx->blob->locked_operation_in_progress = false;
    7894             : 
    7895         252 :         ctx->cb_fn(ctx->cb_arg, rc);
    7896         252 :         free(ctx);
    7897         252 : }
    7898             : 
    7899             : static void
    7900         252 : bs_resize_freeze_cpl(void *cb_arg, int rc)
    7901             : {
    7902         252 :         struct spdk_bs_resize_ctx *ctx = (struct spdk_bs_resize_ctx *)cb_arg;
    7903             : 
    7904         252 :         if (rc != 0) {
    7905           0 :                 ctx->blob->locked_operation_in_progress = false;
    7906           0 :                 ctx->cb_fn(ctx->cb_arg, rc);
    7907           0 :                 free(ctx);
    7908           0 :                 return;
    7909             :         }
    7910             : 
    7911         252 :         ctx->rc = blob_resize(ctx->blob, ctx->sz);
    7912             : 
    7913         252 :         blob_unfreeze_io(ctx->blob, bs_resize_unfreeze_cpl, ctx);
    7914         252 : }
    7915             : 
    7916             : void
    7917         269 : spdk_blob_resize(struct spdk_blob *blob, uint64_t sz, spdk_blob_op_complete cb_fn, void *cb_arg)
    7918             : {
    7919             :         struct spdk_bs_resize_ctx *ctx;
    7920             : 
    7921         269 :         blob_verify_md_op(blob);
    7922             : 
    7923         269 :         SPDK_DEBUGLOG(blob, "Resizing blob 0x%" PRIx64 " to %" PRIu64 " clusters\n", blob->id, sz);
    7924             : 
    7925         269 :         if (blob->md_ro) {
    7926           5 :                 cb_fn(cb_arg, -EPERM);
    7927           5 :                 return;
    7928             :         }
    7929             : 
    7930         264 :         if (sz == blob->active.num_clusters) {
    7931          12 :                 cb_fn(cb_arg, 0);
    7932          12 :                 return;
    7933             :         }
    7934             : 
    7935         252 :         if (blob->locked_operation_in_progress) {
    7936           0 :                 cb_fn(cb_arg, -EBUSY);
    7937           0 :                 return;
    7938             :         }
    7939             : 
    7940         252 :         ctx = calloc(1, sizeof(*ctx));
    7941         252 :         if (!ctx) {
    7942           0 :                 cb_fn(cb_arg, -ENOMEM);
    7943           0 :                 return;
    7944             :         }
    7945             : 
    7946         252 :         blob->locked_operation_in_progress = true;
    7947         252 :         ctx->cb_fn = cb_fn;
    7948         252 :         ctx->cb_arg = cb_arg;
    7949         252 :         ctx->blob = blob;
    7950         252 :         ctx->sz = sz;
    7951         252 :         blob_freeze_io(blob, bs_resize_freeze_cpl, ctx);
    7952         269 : }
    7953             : 
    7954             : /* END spdk_blob_resize */
    7955             : 
    7956             : 
    7957             : /* START spdk_bs_delete_blob */
    7958             : 
    7959             : static void
    7960        1862 : bs_delete_close_cpl(void *cb_arg, int bserrno)
    7961             : {
    7962        1862 :         spdk_bs_sequence_t *seq = cb_arg;
    7963             : 
    7964        1862 :         bs_sequence_finish(seq, bserrno);
    7965        1862 : }
    7966             : 
    7967             : static void
    7968        1862 : bs_delete_persist_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    7969             : {
    7970        1862 :         struct spdk_blob *blob = cb_arg;
    7971             : 
    7972        1862 :         if (bserrno != 0) {
    7973             :                 /*
    7974             :                  * We already removed this blob from the blobstore tailq, so
    7975             :                  *  we need to free it here since this is the last reference
    7976             :                  *  to it.
    7977             :                  */
    7978           0 :                 blob_free(blob);
    7979           0 :                 bs_delete_close_cpl(seq, bserrno);
    7980           0 :                 return;
    7981             :         }
    7982             : 
    7983             :         /*
    7984             :          * This will immediately decrement the ref_count and call
    7985             :          *  the completion routine since the metadata state is clean.
    7986             :          *  By calling spdk_blob_close, we reduce the number of call
    7987             :          *  points into code that touches the blob->open_ref count
    7988             :          *  and the blobstore's blob list.
    7989             :          */
    7990        1862 :         spdk_blob_close(blob, bs_delete_close_cpl, seq);
    7991        1862 : }
    7992             : 
    7993             : struct delete_snapshot_ctx {
    7994             :         struct spdk_blob_list *parent_snapshot_entry;
    7995             :         struct spdk_blob *snapshot;
    7996             :         struct spdk_blob_md_page *page;
    7997             :         bool snapshot_md_ro;
    7998             :         struct spdk_blob *clone;
    7999             :         bool clone_md_ro;
    8000             :         spdk_blob_op_with_handle_complete cb_fn;
    8001             :         void *cb_arg;
    8002             :         int bserrno;
    8003             :         uint32_t next_extent_page;
    8004             : };
    8005             : 
    8006             : static void
    8007         138 : delete_blob_cleanup_finish(void *cb_arg, int bserrno)
    8008             : {
    8009         138 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8010             : 
    8011         138 :         if (bserrno != 0) {
    8012           0 :                 SPDK_ERRLOG("Snapshot cleanup error %d\n", bserrno);
    8013           0 :         }
    8014             : 
    8015         138 :         assert(ctx != NULL);
    8016             : 
    8017         138 :         if (bserrno != 0 && ctx->bserrno == 0) {
    8018           0 :                 ctx->bserrno = bserrno;
    8019           0 :         }
    8020             : 
    8021         138 :         ctx->cb_fn(ctx->cb_arg, ctx->snapshot, ctx->bserrno);
    8022         138 :         spdk_free(ctx->page);
    8023         138 :         free(ctx);
    8024         138 : }
    8025             : 
    8026             : static void
    8027          28 : delete_snapshot_cleanup_snapshot(void *cb_arg, int bserrno)
    8028             : {
    8029          28 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8030             : 
    8031          28 :         if (bserrno != 0) {
    8032           0 :                 ctx->bserrno = bserrno;
    8033           0 :                 SPDK_ERRLOG("Clone cleanup error %d\n", bserrno);
    8034           0 :         }
    8035             : 
    8036          28 :         if (ctx->bserrno != 0) {
    8037          28 :                 assert(blob_lookup(ctx->snapshot->bs, ctx->snapshot->id) == NULL);
    8038          28 :                 RB_INSERT(spdk_blob_tree, &ctx->snapshot->bs->open_blobs, ctx->snapshot);
    8039          28 :                 spdk_bit_array_set(ctx->snapshot->bs->open_blobids, ctx->snapshot->id);
    8040          28 :         }
    8041             : 
    8042          28 :         ctx->snapshot->locked_operation_in_progress = false;
    8043          28 :         ctx->snapshot->md_ro = ctx->snapshot_md_ro;
    8044             : 
    8045          28 :         spdk_blob_close(ctx->snapshot, delete_blob_cleanup_finish, ctx);
    8046          28 : }
    8047             : 
    8048             : static void
    8049          15 : delete_snapshot_cleanup_clone(void *cb_arg, int bserrno)
    8050             : {
    8051          15 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8052             : 
    8053          15 :         ctx->clone->locked_operation_in_progress = false;
    8054          15 :         ctx->clone->md_ro = ctx->clone_md_ro;
    8055             : 
    8056          15 :         spdk_blob_close(ctx->clone, delete_snapshot_cleanup_snapshot, ctx);
    8057          15 : }
    8058             : 
    8059             : static void
    8060          60 : delete_snapshot_unfreeze_cpl(void *cb_arg, int bserrno)
    8061             : {
    8062          60 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8063             : 
    8064          60 :         if (bserrno) {
    8065           0 :                 ctx->bserrno = bserrno;
    8066           0 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8067           0 :                 return;
    8068             :         }
    8069             : 
    8070          60 :         ctx->clone->locked_operation_in_progress = false;
    8071          60 :         spdk_blob_close(ctx->clone, delete_blob_cleanup_finish, ctx);
    8072          60 : }
    8073             : 
    8074             : static void
    8075          65 : delete_snapshot_sync_snapshot_cpl(void *cb_arg, int bserrno)
    8076             : {
    8077          65 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8078          65 :         struct spdk_blob_list *parent_snapshot_entry = NULL;
    8079          65 :         struct spdk_blob_list *snapshot_entry = NULL;
    8080          65 :         struct spdk_blob_list *clone_entry = NULL;
    8081          65 :         struct spdk_blob_list *snapshot_clone_entry = NULL;
    8082             : 
    8083          65 :         if (bserrno) {
    8084           5 :                 SPDK_ERRLOG("Failed to sync MD on blob\n");
    8085           5 :                 ctx->bserrno = bserrno;
    8086           5 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8087           5 :                 return;
    8088             :         }
    8089             : 
    8090             :         /* Get snapshot entry for the snapshot we want to remove */
    8091          60 :         snapshot_entry = bs_get_snapshot_entry(ctx->snapshot->bs, ctx->snapshot->id);
    8092             : 
    8093          60 :         assert(snapshot_entry != NULL);
    8094             : 
    8095             :         /* Remove clone entry in this snapshot (at this point there can be only one clone) */
    8096          60 :         clone_entry = TAILQ_FIRST(&snapshot_entry->clones);
    8097          60 :         assert(clone_entry != NULL);
    8098          60 :         TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
    8099          60 :         snapshot_entry->clone_count--;
    8100          60 :         assert(TAILQ_EMPTY(&snapshot_entry->clones));
    8101             : 
    8102          60 :         switch (ctx->snapshot->parent_id) {
    8103             :         case SPDK_BLOBID_INVALID:
    8104             :         case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
    8105             :                 /* No parent snapshot - just remove clone entry */
    8106          50 :                 free(clone_entry);
    8107          50 :                 break;
    8108             :         default:
    8109             :                 /* This snapshot is at the same time a clone of another snapshot - we need to
    8110             :                  * update parent snapshot (remove current clone, add new one inherited from
    8111             :                  * the snapshot that is being removed) */
    8112             : 
    8113             :                 /* Get snapshot entry for parent snapshot and clone entry within that snapshot for
    8114             :                  * snapshot that we are removing */
    8115          10 :                 blob_get_snapshot_and_clone_entries(ctx->snapshot, &parent_snapshot_entry,
    8116             :                                                     &snapshot_clone_entry);
    8117             : 
    8118             :                 /* Switch clone entry in parent snapshot */
    8119          10 :                 TAILQ_INSERT_TAIL(&parent_snapshot_entry->clones, clone_entry, link);
    8120          10 :                 TAILQ_REMOVE(&parent_snapshot_entry->clones, snapshot_clone_entry, link);
    8121          10 :                 free(snapshot_clone_entry);
    8122          10 :         }
    8123             : 
    8124             :         /* Restore md_ro flags */
    8125          60 :         ctx->clone->md_ro = ctx->clone_md_ro;
    8126          60 :         ctx->snapshot->md_ro = ctx->snapshot_md_ro;
    8127             : 
    8128          60 :         blob_unfreeze_io(ctx->clone, delete_snapshot_unfreeze_cpl, ctx);
    8129          65 : }
    8130             : 
    8131             : static void
    8132          70 : delete_snapshot_sync_clone_cpl(void *cb_arg, int bserrno)
    8133             : {
    8134          70 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8135             :         uint64_t i;
    8136             : 
    8137          70 :         ctx->snapshot->md_ro = false;
    8138             : 
    8139          70 :         if (bserrno) {
    8140           5 :                 SPDK_ERRLOG("Failed to sync MD on clone\n");
    8141           5 :                 ctx->bserrno = bserrno;
    8142             : 
    8143             :                 /* Restore snapshot to previous state */
    8144           5 :                 bserrno = blob_remove_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, true);
    8145           5 :                 if (bserrno != 0) {
    8146           0 :                         delete_snapshot_cleanup_clone(ctx, bserrno);
    8147           0 :                         return;
    8148             :                 }
    8149             : 
    8150           5 :                 spdk_blob_sync_md(ctx->snapshot, delete_snapshot_cleanup_clone, ctx);
    8151           5 :                 return;
    8152             :         }
    8153             : 
    8154             :         /* Clear cluster map entries for snapshot */
    8155         690 :         for (i = 0; i < ctx->snapshot->active.num_clusters && i < ctx->clone->active.num_clusters; i++) {
    8156         625 :                 if (ctx->clone->active.clusters[i] == ctx->snapshot->active.clusters[i]) {
    8157         615 :                         if (ctx->snapshot->active.clusters[i] != 0) {
    8158         410 :                                 ctx->snapshot->active.num_allocated_clusters--;
    8159         410 :                         }
    8160         615 :                         ctx->snapshot->active.clusters[i] = 0;
    8161         615 :                 }
    8162         625 :         }
    8163         143 :         for (i = 0; i < ctx->snapshot->active.num_extent_pages &&
    8164          78 :              i < ctx->clone->active.num_extent_pages; i++) {
    8165          39 :                 if (ctx->clone->active.extent_pages[i] == ctx->snapshot->active.extent_pages[i]) {
    8166          36 :                         ctx->snapshot->active.extent_pages[i] = 0;
    8167          36 :                 }
    8168          39 :         }
    8169             : 
    8170          65 :         blob_set_thin_provision(ctx->snapshot);
    8171          65 :         ctx->snapshot->state = SPDK_BLOB_STATE_DIRTY;
    8172             : 
    8173          65 :         if (ctx->parent_snapshot_entry != NULL) {
    8174          10 :                 ctx->snapshot->back_bs_dev = NULL;
    8175          10 :         }
    8176             : 
    8177          65 :         spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_cpl, ctx);
    8178          70 : }
    8179             : 
    8180             : static void
    8181          70 : delete_snapshot_update_extent_pages_cpl(struct delete_snapshot_ctx *ctx)
    8182             : {
    8183             :         int bserrno;
    8184             : 
    8185             :         /* Delete old backing bs_dev from clone (related to snapshot that will be removed) */
    8186          70 :         blob_back_bs_destroy(ctx->clone);
    8187             : 
    8188             :         /* Set/remove snapshot xattr and switch parent ID and backing bs_dev on clone... */
    8189          70 :         if (ctx->snapshot->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    8190          10 :                 bserrno = bs_snapshot_copy_xattr(ctx->clone, ctx->snapshot,
    8191             :                                                  BLOB_EXTERNAL_SNAPSHOT_ID);
    8192          10 :                 if (bserrno != 0) {
    8193           0 :                         ctx->bserrno = bserrno;
    8194             : 
    8195             :                         /* Restore snapshot to previous state */
    8196           0 :                         bserrno = blob_remove_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, true);
    8197           0 :                         if (bserrno != 0) {
    8198           0 :                                 delete_snapshot_cleanup_clone(ctx, bserrno);
    8199           0 :                                 return;
    8200             :                         }
    8201             : 
    8202           0 :                         spdk_blob_sync_md(ctx->snapshot, delete_snapshot_cleanup_clone, ctx);
    8203           0 :                         return;
    8204             :                 }
    8205          10 :                 ctx->clone->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
    8206          10 :                 ctx->clone->back_bs_dev = ctx->snapshot->back_bs_dev;
    8207             :                 /* Do not delete the external snapshot along with this snapshot */
    8208          10 :                 ctx->snapshot->back_bs_dev = NULL;
    8209          10 :                 ctx->clone->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
    8210          70 :         } else if (ctx->parent_snapshot_entry != NULL) {
    8211             :                 /* ...to parent snapshot */
    8212          10 :                 ctx->clone->parent_id = ctx->parent_snapshot_entry->id;
    8213          10 :                 ctx->clone->back_bs_dev = ctx->snapshot->back_bs_dev;
    8214          10 :                 blob_set_xattr(ctx->clone, BLOB_SNAPSHOT, &ctx->parent_snapshot_entry->id,
    8215             :                                sizeof(spdk_blob_id),
    8216             :                                true);
    8217          10 :         } else {
    8218             :                 /* ...to blobid invalid and zeroes dev */
    8219          50 :                 ctx->clone->parent_id = SPDK_BLOBID_INVALID;
    8220          50 :                 ctx->clone->back_bs_dev = bs_create_zeroes_dev();
    8221          50 :                 blob_remove_xattr(ctx->clone, BLOB_SNAPSHOT, true);
    8222             :         }
    8223             : 
    8224          70 :         spdk_blob_sync_md(ctx->clone, delete_snapshot_sync_clone_cpl, ctx);
    8225          70 : }
    8226             : 
    8227             : static void
    8228          73 : delete_snapshot_update_extent_pages(void *cb_arg, int bserrno)
    8229             : {
    8230          73 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8231             :         uint32_t *extent_page;
    8232             :         uint64_t i;
    8233             : 
    8234         154 :         for (i = ctx->next_extent_page; i < ctx->snapshot->active.num_extent_pages &&
    8235          81 :              i < ctx->clone->active.num_extent_pages; i++) {
    8236          42 :                 if (ctx->snapshot->active.extent_pages[i] == 0) {
    8237             :                         /* No extent page to use from snapshot */
    8238          12 :                         continue;
    8239             :                 }
    8240             : 
    8241          30 :                 extent_page = &ctx->clone->active.extent_pages[i];
    8242          30 :                 if (*extent_page == 0) {
    8243             :                         /* Copy extent page from snapshot when clone did not have a matching one */
    8244          27 :                         *extent_page = ctx->snapshot->active.extent_pages[i];
    8245          27 :                         continue;
    8246             :                 }
    8247             : 
    8248             :                 /* Clone and snapshot both contain partially filled matching extent pages.
    8249             :                  * Update the clone extent page in place with cluster map containing the mix of both. */
    8250           3 :                 ctx->next_extent_page = i + 1;
    8251           3 :                 memset(ctx->page, 0, SPDK_BS_PAGE_SIZE);
    8252             : 
    8253           6 :                 blob_write_extent_page(ctx->clone, *extent_page, i * SPDK_EXTENTS_PER_EP, ctx->page,
    8254           3 :                                        delete_snapshot_update_extent_pages, ctx);
    8255           3 :                 return;
    8256             :         }
    8257          70 :         delete_snapshot_update_extent_pages_cpl(ctx);
    8258          73 : }
    8259             : 
    8260             : static void
    8261          75 : delete_snapshot_sync_snapshot_xattr_cpl(void *cb_arg, int bserrno)
    8262             : {
    8263          75 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8264             :         uint64_t i;
    8265             : 
    8266             :         /* Temporarily override md_ro flag for clone for MD modification */
    8267          75 :         ctx->clone_md_ro = ctx->clone->md_ro;
    8268          75 :         ctx->clone->md_ro = false;
    8269             : 
    8270          75 :         if (bserrno) {
    8271           5 :                 SPDK_ERRLOG("Failed to sync MD with xattr on blob\n");
    8272           5 :                 ctx->bserrno = bserrno;
    8273           5 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8274           5 :                 return;
    8275             :         }
    8276             : 
    8277             :         /* Copy snapshot map to clone map (only unallocated clusters in clone) */
    8278         745 :         for (i = 0; i < ctx->snapshot->active.num_clusters && i < ctx->clone->active.num_clusters; i++) {
    8279         675 :                 if (ctx->clone->active.clusters[i] == 0) {
    8280         665 :                         ctx->clone->active.clusters[i] = ctx->snapshot->active.clusters[i];
    8281         665 :                         if (ctx->clone->active.clusters[i] != 0) {
    8282         460 :                                 ctx->clone->active.num_allocated_clusters++;
    8283         460 :                         }
    8284         665 :                 }
    8285         675 :         }
    8286          70 :         ctx->next_extent_page = 0;
    8287          70 :         delete_snapshot_update_extent_pages(ctx, 0);
    8288          75 : }
    8289             : 
    8290             : static void
    8291          10 : delete_snapshot_esnap_channels_destroyed_cb(void *cb_arg, struct spdk_blob *blob, int bserrno)
    8292             : {
    8293          10 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8294             : 
    8295          10 :         if (bserrno != 0) {
    8296           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to destroy esnap channels: %d\n",
    8297             :                             blob->id, bserrno);
    8298             :                 /* That error should not stop us from syncing metadata. */
    8299           0 :         }
    8300             : 
    8301          10 :         spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_xattr_cpl, ctx);
    8302          10 : }
    8303             : 
    8304             : static void
    8305          75 : delete_snapshot_freeze_io_cb(void *cb_arg, int bserrno)
    8306             : {
    8307          75 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8308             : 
    8309          75 :         if (bserrno) {
    8310           0 :                 SPDK_ERRLOG("Failed to freeze I/O on clone\n");
    8311           0 :                 ctx->bserrno = bserrno;
    8312           0 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8313           0 :                 return;
    8314             :         }
    8315             : 
    8316             :         /* Temporarily override md_ro flag for snapshot for MD modification */
    8317          75 :         ctx->snapshot_md_ro = ctx->snapshot->md_ro;
    8318          75 :         ctx->snapshot->md_ro = false;
    8319             : 
    8320             :         /* Mark blob as pending for removal for power failure safety, use clone id for recovery */
    8321          75 :         ctx->bserrno = blob_set_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, &ctx->clone->id,
    8322             :                                       sizeof(spdk_blob_id), true);
    8323          75 :         if (ctx->bserrno != 0) {
    8324           0 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8325           0 :                 return;
    8326             :         }
    8327             : 
    8328          75 :         if (blob_is_esnap_clone(ctx->snapshot)) {
    8329          20 :                 blob_esnap_destroy_bs_dev_channels(ctx->snapshot, false,
    8330             :                                                    delete_snapshot_esnap_channels_destroyed_cb,
    8331          10 :                                                    ctx);
    8332          10 :                 return;
    8333             :         }
    8334             : 
    8335          65 :         spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_xattr_cpl, ctx);
    8336          75 : }
    8337             : 
    8338             : static void
    8339          88 : delete_snapshot_open_clone_cb(void *cb_arg, struct spdk_blob *clone, int bserrno)
    8340             : {
    8341          88 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8342             : 
    8343          88 :         if (bserrno) {
    8344          13 :                 SPDK_ERRLOG("Failed to open clone\n");
    8345          13 :                 ctx->bserrno = bserrno;
    8346          13 :                 delete_snapshot_cleanup_snapshot(ctx, 0);
    8347          13 :                 return;
    8348             :         }
    8349             : 
    8350          75 :         ctx->clone = clone;
    8351             : 
    8352          75 :         if (clone->locked_operation_in_progress) {
    8353           0 :                 SPDK_DEBUGLOG(blob, "Cannot remove blob - another operation in progress on its clone\n");
    8354           0 :                 ctx->bserrno = -EBUSY;
    8355           0 :                 spdk_blob_close(ctx->clone, delete_snapshot_cleanup_snapshot, ctx);
    8356           0 :                 return;
    8357             :         }
    8358             : 
    8359          75 :         clone->locked_operation_in_progress = true;
    8360             : 
    8361          75 :         blob_freeze_io(clone, delete_snapshot_freeze_io_cb, ctx);
    8362          88 : }
    8363             : 
    8364             : static void
    8365          88 : update_clone_on_snapshot_deletion(struct spdk_blob *snapshot, struct delete_snapshot_ctx *ctx)
    8366             : {
    8367          88 :         struct spdk_blob_list *snapshot_entry = NULL;
    8368          88 :         struct spdk_blob_list *clone_entry = NULL;
    8369          88 :         struct spdk_blob_list *snapshot_clone_entry = NULL;
    8370             : 
    8371             :         /* Get snapshot entry for the snapshot we want to remove */
    8372          88 :         snapshot_entry = bs_get_snapshot_entry(snapshot->bs, snapshot->id);
    8373             : 
    8374          88 :         assert(snapshot_entry != NULL);
    8375             : 
    8376             :         /* Get clone of the snapshot (at this point there can be only one clone) */
    8377          88 :         clone_entry = TAILQ_FIRST(&snapshot_entry->clones);
    8378          88 :         assert(snapshot_entry->clone_count == 1);
    8379          88 :         assert(clone_entry != NULL);
    8380             : 
    8381             :         /* Get snapshot entry for parent snapshot and clone entry within that snapshot for
    8382             :          * snapshot that we are removing */
    8383          88 :         blob_get_snapshot_and_clone_entries(snapshot, &ctx->parent_snapshot_entry,
    8384             :                                             &snapshot_clone_entry);
    8385             : 
    8386          88 :         spdk_bs_open_blob(snapshot->bs, clone_entry->id, delete_snapshot_open_clone_cb, ctx);
    8387          88 : }
    8388             : 
    8389             : static void
    8390        1940 : bs_delete_blob_finish(void *cb_arg, struct spdk_blob *blob, int bserrno)
    8391             : {
    8392        1940 :         spdk_bs_sequence_t *seq = cb_arg;
    8393        1940 :         struct spdk_blob_list *snapshot_entry = NULL;
    8394             :         uint32_t page_num;
    8395             : 
    8396        1940 :         if (bserrno) {
    8397          78 :                 SPDK_ERRLOG("Failed to remove blob\n");
    8398          78 :                 bs_sequence_finish(seq, bserrno);
    8399          78 :                 return;
    8400             :         }
    8401             : 
    8402             :         /* Remove snapshot from the list */
    8403        1862 :         snapshot_entry = bs_get_snapshot_entry(blob->bs, blob->id);
    8404        1862 :         if (snapshot_entry != NULL) {
    8405         180 :                 TAILQ_REMOVE(&blob->bs->snapshots, snapshot_entry, link);
    8406         180 :                 free(snapshot_entry);
    8407         180 :         }
    8408             : 
    8409        1862 :         page_num = bs_blobid_to_page(blob->id);
    8410        1862 :         spdk_bit_array_clear(blob->bs->used_blobids, page_num);
    8411        1862 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    8412        1862 :         blob->active.num_pages = 0;
    8413        1862 :         blob_resize(blob, 0);
    8414             : 
    8415        1862 :         blob_persist(seq, blob, bs_delete_persist_cpl, blob);
    8416        1940 : }
    8417             : 
    8418             : static int
    8419        1940 : bs_is_blob_deletable(struct spdk_blob *blob, bool *update_clone)
    8420             : {
    8421        1940 :         struct spdk_blob_list *snapshot_entry = NULL;
    8422        1940 :         struct spdk_blob_list *clone_entry = NULL;
    8423        1940 :         struct spdk_blob *clone = NULL;
    8424        1940 :         bool has_one_clone = false;
    8425             : 
    8426             :         /* Check if this is a snapshot with clones */
    8427        1940 :         snapshot_entry = bs_get_snapshot_entry(blob->bs, blob->id);
    8428        1940 :         if (snapshot_entry != NULL) {
    8429         243 :                 if (snapshot_entry->clone_count > 1) {
    8430          30 :                         SPDK_ERRLOG("Cannot remove snapshot with more than one clone\n");
    8431          30 :                         return -EBUSY;
    8432         213 :                 } else if (snapshot_entry->clone_count == 1) {
    8433          88 :                         has_one_clone = true;
    8434          88 :                 }
    8435         213 :         }
    8436             : 
    8437             :         /* Check if someone has this blob open (besides this delete context):
    8438             :          * - open_ref = 1 - only this context opened blob, so it is ok to remove it
    8439             :          * - open_ref <= 2 && has_one_clone = true - clone is holding snapshot
    8440             :          *      and that is ok, because we will update it accordingly */
    8441        1910 :         if (blob->open_ref <= 2 && has_one_clone) {
    8442          88 :                 clone_entry = TAILQ_FIRST(&snapshot_entry->clones);
    8443          88 :                 assert(clone_entry != NULL);
    8444          88 :                 clone = blob_lookup(blob->bs, clone_entry->id);
    8445             : 
    8446          88 :                 if (blob->open_ref == 2 && clone == NULL) {
    8447             :                         /* Clone is closed and someone else opened this blob */
    8448           0 :                         SPDK_ERRLOG("Cannot remove snapshot because it is open\n");
    8449           0 :                         return -EBUSY;
    8450             :                 }
    8451             : 
    8452          88 :                 *update_clone = true;
    8453          88 :                 return 0;
    8454             :         }
    8455             : 
    8456        1822 :         if (blob->open_ref > 1) {
    8457          20 :                 SPDK_ERRLOG("Cannot remove snapshot because it is open\n");
    8458          20 :                 return -EBUSY;
    8459             :         }
    8460             : 
    8461        1802 :         assert(has_one_clone == false);
    8462        1802 :         *update_clone = false;
    8463        1802 :         return 0;
    8464        1940 : }
    8465             : 
    8466             : static void
    8467           0 : bs_delete_enomem_close_cpl(void *cb_arg, int bserrno)
    8468             : {
    8469           0 :         spdk_bs_sequence_t *seq = cb_arg;
    8470             : 
    8471           0 :         bs_sequence_finish(seq, -ENOMEM);
    8472           0 : }
    8473             : 
    8474             : static void
    8475        1953 : bs_delete_open_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno)
    8476             : {
    8477        1953 :         spdk_bs_sequence_t *seq = cb_arg;
    8478             :         struct delete_snapshot_ctx *ctx;
    8479        1953 :         bool update_clone = false;
    8480             : 
    8481        1953 :         if (bserrno != 0) {
    8482          13 :                 bs_sequence_finish(seq, bserrno);
    8483          13 :                 return;
    8484             :         }
    8485             : 
    8486        1940 :         blob_verify_md_op(blob);
    8487             : 
    8488        1940 :         ctx = calloc(1, sizeof(*ctx));
    8489        1940 :         if (ctx == NULL) {
    8490           0 :                 spdk_blob_close(blob, bs_delete_enomem_close_cpl, seq);
    8491           0 :                 return;
    8492             :         }
    8493             : 
    8494        1940 :         ctx->snapshot = blob;
    8495        1940 :         ctx->cb_fn = bs_delete_blob_finish;
    8496        1940 :         ctx->cb_arg = seq;
    8497             : 
    8498             :         /* Check if blob can be removed and if it is a snapshot with clone on top of it */
    8499        1940 :         ctx->bserrno = bs_is_blob_deletable(blob, &update_clone);
    8500        1940 :         if (ctx->bserrno) {
    8501          50 :                 spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
    8502          50 :                 return;
    8503             :         }
    8504             : 
    8505        1890 :         if (blob->locked_operation_in_progress) {
    8506           0 :                 SPDK_DEBUGLOG(blob, "Cannot remove blob - another operation in progress\n");
    8507           0 :                 ctx->bserrno = -EBUSY;
    8508           0 :                 spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
    8509           0 :                 return;
    8510             :         }
    8511             : 
    8512        1890 :         blob->locked_operation_in_progress = true;
    8513             : 
    8514             :         /*
    8515             :          * Remove the blob from the blob_store list now, to ensure it does not
    8516             :          *  get returned after this point by blob_lookup().
    8517             :          */
    8518        1890 :         spdk_bit_array_clear(blob->bs->open_blobids, blob->id);
    8519        1890 :         RB_REMOVE(spdk_blob_tree, &blob->bs->open_blobs, blob);
    8520             : 
    8521        1890 :         if (update_clone) {
    8522          88 :                 ctx->page = spdk_zmalloc(blob->bs->md_page_size, 0, NULL, SPDK_ENV_NUMA_ID_ANY,
    8523             :                                          SPDK_MALLOC_DMA);
    8524          88 :                 if (!ctx->page) {
    8525           0 :                         ctx->bserrno = -ENOMEM;
    8526           0 :                         spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
    8527           0 :                         return;
    8528             :                 }
    8529             :                 /* This blob is a snapshot with active clone - update clone first */
    8530          88 :                 update_clone_on_snapshot_deletion(blob, ctx);
    8531          88 :         } else {
    8532             :                 /* This blob does not have any clones - just remove it */
    8533        1802 :                 bs_blob_list_remove(blob);
    8534        1802 :                 bs_delete_blob_finish(seq, blob, 0);
    8535        1802 :                 free(ctx);
    8536             :         }
    8537        1953 : }
    8538             : 
    8539             : void
    8540        1953 : spdk_bs_delete_blob(struct spdk_blob_store *bs, spdk_blob_id blobid,
    8541             :                     spdk_blob_op_complete cb_fn, void *cb_arg)
    8542             : {
    8543             :         struct spdk_bs_cpl      cpl;
    8544             :         spdk_bs_sequence_t      *seq;
    8545             : 
    8546        1953 :         SPDK_DEBUGLOG(blob, "Deleting blob 0x%" PRIx64 "\n", blobid);
    8547             : 
    8548        1953 :         assert(spdk_get_thread() == bs->md_thread);
    8549             : 
    8550        1953 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    8551        1953 :         cpl.u.blob_basic.cb_fn = cb_fn;
    8552        1953 :         cpl.u.blob_basic.cb_arg = cb_arg;
    8553             : 
    8554        1953 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    8555        1953 :         if (!seq) {
    8556           0 :                 cb_fn(cb_arg, -ENOMEM);
    8557           0 :                 return;
    8558             :         }
    8559             : 
    8560        1953 :         spdk_bs_open_blob(bs, blobid, bs_delete_open_cpl, seq);
    8561        1953 : }
    8562             : 
    8563             : /* END spdk_bs_delete_blob */
    8564             : 
    8565             : /* START spdk_bs_open_blob */
    8566             : 
    8567             : static void
    8568        4341 : bs_open_blob_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8569             : {
    8570        4341 :         struct spdk_blob *blob = cb_arg;
    8571             :         struct spdk_blob *existing;
    8572             : 
    8573        4341 :         if (bserrno != 0) {
    8574          82 :                 blob_free(blob);
    8575          82 :                 seq->cpl.u.blob_handle.blob = NULL;
    8576          82 :                 bs_sequence_finish(seq, bserrno);
    8577          82 :                 return;
    8578             :         }
    8579             : 
    8580        4259 :         existing = blob_lookup(blob->bs, blob->id);
    8581        4259 :         if (existing) {
    8582           5 :                 blob_free(blob);
    8583           5 :                 existing->open_ref++;
    8584           5 :                 seq->cpl.u.blob_handle.blob = existing;
    8585           5 :                 bs_sequence_finish(seq, 0);
    8586           5 :                 return;
    8587             :         }
    8588             : 
    8589        4254 :         blob->open_ref++;
    8590             : 
    8591        4254 :         spdk_bit_array_set(blob->bs->open_blobids, blob->id);
    8592        4254 :         RB_INSERT(spdk_blob_tree, &blob->bs->open_blobs, blob);
    8593             : 
    8594        4254 :         bs_sequence_finish(seq, bserrno);
    8595        4341 : }
    8596             : 
    8597             : static inline void
    8598           5 : blob_open_opts_copy(const struct spdk_blob_open_opts *src, struct spdk_blob_open_opts *dst)
    8599             : {
    8600             : #define FIELD_OK(field) \
    8601             :         offsetof(struct spdk_blob_open_opts, field) + sizeof(src->field) <= src->opts_size
    8602             : 
    8603             : #define SET_FIELD(field) \
    8604             :         if (FIELD_OK(field)) { \
    8605             :                 dst->field = src->field; \
    8606             :         } \
    8607             : 
    8608           5 :         SET_FIELD(clear_method);
    8609           5 :         SET_FIELD(esnap_ctx);
    8610             : 
    8611           5 :         dst->opts_size = src->opts_size;
    8612             : 
    8613             :         /* You should not remove this statement, but need to update the assert statement
    8614             :          * if you add a new field, and also add a corresponding SET_FIELD statement */
    8615             :         SPDK_STATIC_ASSERT(sizeof(struct spdk_blob_open_opts) == 24, "Incorrect size");
    8616             : 
    8617             : #undef FIELD_OK
    8618             : #undef SET_FIELD
    8619           5 : }
    8620             : 
    8621             : static void
    8622        5353 : bs_open_blob(struct spdk_blob_store *bs,
    8623             :              spdk_blob_id blobid,
    8624             :              struct spdk_blob_open_opts *opts,
    8625             :              spdk_blob_op_with_handle_complete cb_fn,
    8626             :              void *cb_arg)
    8627             : {
    8628             :         struct spdk_blob                *blob;
    8629             :         struct spdk_bs_cpl              cpl;
    8630             :         struct spdk_blob_open_opts      opts_local;
    8631             :         spdk_bs_sequence_t              *seq;
    8632             :         uint32_t                        page_num;
    8633             : 
    8634        5353 :         SPDK_DEBUGLOG(blob, "Opening blob 0x%" PRIx64 "\n", blobid);
    8635        5353 :         assert(spdk_get_thread() == bs->md_thread);
    8636             : 
    8637        5353 :         page_num = bs_blobid_to_page(blobid);
    8638        5353 :         if (spdk_bit_array_get(bs->used_blobids, page_num) == false) {
    8639             :                 /* Invalid blobid */
    8640          60 :                 cb_fn(cb_arg, NULL, -ENOENT);
    8641          60 :                 return;
    8642             :         }
    8643             : 
    8644        5293 :         blob = blob_lookup(bs, blobid);
    8645        5293 :         if (blob) {
    8646         952 :                 blob->open_ref++;
    8647         952 :                 cb_fn(cb_arg, blob, 0);
    8648         952 :                 return;
    8649             :         }
    8650             : 
    8651        4341 :         blob = blob_alloc(bs, blobid);
    8652        4341 :         if (!blob) {
    8653           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    8654           0 :                 return;
    8655             :         }
    8656             : 
    8657        4341 :         spdk_blob_open_opts_init(&opts_local, sizeof(opts_local));
    8658        4341 :         if (opts) {
    8659           5 :                 blob_open_opts_copy(opts, &opts_local);
    8660           5 :         }
    8661             : 
    8662        4341 :         blob->clear_method = opts_local.clear_method;
    8663             : 
    8664        4341 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_HANDLE;
    8665        4341 :         cpl.u.blob_handle.cb_fn = cb_fn;
    8666        4341 :         cpl.u.blob_handle.cb_arg = cb_arg;
    8667        4341 :         cpl.u.blob_handle.blob = blob;
    8668        4341 :         cpl.u.blob_handle.esnap_ctx = opts_local.esnap_ctx;
    8669             : 
    8670        4341 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    8671        4341 :         if (!seq) {
    8672           0 :                 blob_free(blob);
    8673           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    8674           0 :                 return;
    8675             :         }
    8676             : 
    8677        4341 :         blob_load(seq, blob, bs_open_blob_cpl, blob);
    8678        5353 : }
    8679             : 
    8680             : void
    8681        5348 : spdk_bs_open_blob(struct spdk_blob_store *bs, spdk_blob_id blobid,
    8682             :                   spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
    8683             : {
    8684        5348 :         bs_open_blob(bs, blobid, NULL, cb_fn, cb_arg);
    8685        5348 : }
    8686             : 
    8687             : void
    8688           5 : spdk_bs_open_blob_ext(struct spdk_blob_store *bs, spdk_blob_id blobid,
    8689             :                       struct spdk_blob_open_opts *opts, spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
    8690             : {
    8691           5 :         bs_open_blob(bs, blobid, opts, cb_fn, cb_arg);
    8692           5 : }
    8693             : 
    8694             : /* END spdk_bs_open_blob */
    8695             : 
    8696             : /* START spdk_blob_set_read_only */
    8697             : int
    8698         296 : spdk_blob_set_read_only(struct spdk_blob *blob)
    8699             : {
    8700         296 :         blob_verify_md_op(blob);
    8701             : 
    8702         296 :         blob->data_ro_flags |= SPDK_BLOB_READ_ONLY;
    8703             : 
    8704         296 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    8705         296 :         return 0;
    8706             : }
    8707             : /* END spdk_blob_set_read_only */
    8708             : 
    8709             : /* START spdk_blob_sync_md */
    8710             : 
    8711             : static void
    8712        1927 : blob_sync_md_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8713             : {
    8714        1927 :         struct spdk_blob *blob = cb_arg;
    8715             : 
    8716        1927 :         if (bserrno == 0 && (blob->data_ro_flags & SPDK_BLOB_READ_ONLY)) {
    8717         497 :                 blob->data_ro = true;
    8718         497 :                 blob->md_ro = true;
    8719         497 :         }
    8720             : 
    8721        1927 :         bs_sequence_finish(seq, bserrno);
    8722        1927 : }
    8723             : 
    8724             : static void
    8725        1927 : blob_sync_md(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
    8726             : {
    8727             :         struct spdk_bs_cpl      cpl;
    8728             :         spdk_bs_sequence_t      *seq;
    8729             : 
    8730        1927 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    8731        1927 :         cpl.u.blob_basic.cb_fn = cb_fn;
    8732        1927 :         cpl.u.blob_basic.cb_arg = cb_arg;
    8733             : 
    8734        1927 :         seq = bs_sequence_start_bs(blob->bs->md_channel, &cpl);
    8735        1927 :         if (!seq) {
    8736           0 :                 cb_fn(cb_arg, -ENOMEM);
    8737           0 :                 return;
    8738             :         }
    8739             : 
    8740        1927 :         blob_persist(seq, blob, blob_sync_md_cpl, blob);
    8741        1927 : }
    8742             : 
    8743             : void
    8744        1370 : spdk_blob_sync_md(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
    8745             : {
    8746        1370 :         blob_verify_md_op(blob);
    8747             : 
    8748        1370 :         SPDK_DEBUGLOG(blob, "Syncing blob 0x%" PRIx64 "\n", blob->id);
    8749             : 
    8750        1370 :         if (blob->md_ro) {
    8751           5 :                 assert(blob->state == SPDK_BLOB_STATE_CLEAN);
    8752           5 :                 cb_fn(cb_arg, 0);
    8753           5 :                 return;
    8754             :         }
    8755             : 
    8756        1365 :         blob_sync_md(blob, cb_fn, cb_arg);
    8757        1370 : }
    8758             : 
    8759             : /* END spdk_blob_sync_md */
    8760             : 
    8761             : struct spdk_blob_cluster_op_ctx {
    8762             :         struct spdk_thread      *thread;
    8763             :         struct spdk_blob        *blob;
    8764             :         uint32_t                cluster_num;    /* cluster index in blob */
    8765             :         uint32_t                cluster;        /* cluster on disk */
    8766             :         uint32_t                extent_page;    /* extent page on disk */
    8767             :         struct spdk_blob_md_page *page; /* preallocated extent page */
    8768             :         int                     rc;
    8769             :         spdk_blob_op_complete   cb_fn;
    8770             :         void                    *cb_arg;
    8771             : };
    8772             : 
    8773             : static void
    8774        1105 : blob_op_cluster_msg_cpl(void *arg)
    8775             : {
    8776        1105 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8777             : 
    8778        1105 :         ctx->cb_fn(ctx->cb_arg, ctx->rc);
    8779        1105 :         free(ctx);
    8780        1105 : }
    8781             : 
    8782             : static void
    8783        1061 : blob_op_cluster_msg_cb(void *arg, int bserrno)
    8784             : {
    8785        1061 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8786             : 
    8787        1061 :         ctx->rc = bserrno;
    8788        1061 :         spdk_thread_send_msg(ctx->thread, blob_op_cluster_msg_cpl, ctx);
    8789        1061 : }
    8790             : 
    8791             : static void
    8792         126 : blob_insert_new_ep_cb(void *arg, int bserrno)
    8793             : {
    8794         126 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8795             :         uint32_t *extent_page;
    8796             : 
    8797         126 :         extent_page = bs_cluster_to_extent_page(ctx->blob, ctx->cluster_num);
    8798         126 :         *extent_page = ctx->extent_page;
    8799         126 :         ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    8800         126 :         blob_sync_md(ctx->blob, blob_op_cluster_msg_cb, ctx);
    8801         126 : }
    8802             : 
    8803             : struct spdk_blob_write_extent_page_ctx {
    8804             :         struct spdk_blob_store          *bs;
    8805             : 
    8806             :         uint32_t                        extent;
    8807             :         struct spdk_blob_md_page        *page;
    8808             : };
    8809             : 
    8810             : static void
    8811          39 : blob_free_cluster_msg_cb(void *arg, int bserrno)
    8812             : {
    8813          39 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8814             : 
    8815          39 :         spdk_spin_lock(&ctx->blob->bs->used_lock);
    8816          39 :         bs_release_cluster(ctx->blob->bs, ctx->cluster);
    8817          39 :         spdk_spin_unlock(&ctx->blob->bs->used_lock);
    8818             : 
    8819          39 :         ctx->rc = bserrno;
    8820          39 :         spdk_thread_send_msg(ctx->thread, blob_op_cluster_msg_cpl, ctx);
    8821          39 : }
    8822             : 
    8823             : static void
    8824          39 : blob_free_cluster_update_ep_cb(void *arg, int bserrno)
    8825             : {
    8826          39 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8827             : 
    8828          39 :         if (bserrno != 0 || ctx->blob->bs->clean == 0) {
    8829          39 :                 blob_free_cluster_msg_cb(ctx, bserrno);
    8830          39 :                 return;
    8831             :         }
    8832             : 
    8833           0 :         ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    8834           0 :         blob_sync_md(ctx->blob, blob_free_cluster_msg_cb, ctx);
    8835          39 : }
    8836             : 
    8837             : static void
    8838           0 : blob_free_cluster_free_ep_cb(void *arg, int bserrno)
    8839             : {
    8840           0 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8841             : 
    8842           0 :         spdk_spin_lock(&ctx->blob->bs->used_lock);
    8843           0 :         assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
    8844           0 :         bs_release_md_page(ctx->blob->bs, ctx->extent_page);
    8845           0 :         spdk_spin_unlock(&ctx->blob->bs->used_lock);
    8846           0 :         ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    8847           0 :         blob_sync_md(ctx->blob, blob_free_cluster_msg_cb, ctx);
    8848           0 : }
    8849             : 
    8850             : static void
    8851         657 : blob_persist_extent_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8852             : {
    8853         657 :         struct spdk_blob_write_extent_page_ctx *ctx = cb_arg;
    8854             : 
    8855         657 :         free(ctx);
    8856         657 :         bs_sequence_finish(seq, bserrno);
    8857         657 : }
    8858             : 
    8859             : static void
    8860         657 : blob_write_extent_page_ready(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8861             : {
    8862         657 :         struct spdk_blob_write_extent_page_ctx *ctx = cb_arg;
    8863             : 
    8864         657 :         if (bserrno != 0) {
    8865           0 :                 blob_persist_extent_page_cpl(seq, ctx, bserrno);
    8866           0 :                 return;
    8867             :         }
    8868        1314 :         bs_sequence_write_dev(seq, ctx->page, bs_md_page_to_lba(ctx->bs, ctx->extent),
    8869         657 :                               bs_byte_to_lba(ctx->bs, ctx->bs->md_page_size),
    8870         657 :                               blob_persist_extent_page_cpl, ctx);
    8871         657 : }
    8872             : 
    8873             : static void
    8874         657 : blob_write_extent_page(struct spdk_blob *blob, uint32_t extent, uint64_t cluster_num,
    8875             :                        struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg)
    8876             : {
    8877             :         struct spdk_blob_write_extent_page_ctx  *ctx;
    8878             :         spdk_bs_sequence_t                      *seq;
    8879             :         struct spdk_bs_cpl                      cpl;
    8880             : 
    8881         657 :         ctx = calloc(1, sizeof(*ctx));
    8882         657 :         if (!ctx) {
    8883           0 :                 cb_fn(cb_arg, -ENOMEM);
    8884           0 :                 return;
    8885             :         }
    8886         657 :         ctx->bs = blob->bs;
    8887         657 :         ctx->extent = extent;
    8888         657 :         ctx->page = page;
    8889             : 
    8890         657 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    8891         657 :         cpl.u.blob_basic.cb_fn = cb_fn;
    8892         657 :         cpl.u.blob_basic.cb_arg = cb_arg;
    8893             : 
    8894         657 :         seq = bs_sequence_start_bs(blob->bs->md_channel, &cpl);
    8895         657 :         if (!seq) {
    8896           0 :                 free(ctx);
    8897           0 :                 cb_fn(cb_arg, -ENOMEM);
    8898           0 :                 return;
    8899             :         }
    8900             : 
    8901         657 :         assert(page);
    8902         657 :         page->next = SPDK_INVALID_MD_PAGE;
    8903         657 :         page->id = blob->id;
    8904         657 :         page->sequence_num = 0;
    8905             : 
    8906         657 :         blob_serialize_extent_page(blob, cluster_num, page);
    8907             : 
    8908         657 :         page->crc = blob_md_page_calc_crc(page);
    8909             : 
    8910         657 :         assert(spdk_bit_array_get(blob->bs->used_md_pages, extent) == true);
    8911             : 
    8912         657 :         bs_mark_dirty(seq, blob->bs, blob_write_extent_page_ready, ctx);
    8913         657 : }
    8914             : 
    8915             : static void
    8916        1030 : blob_insert_cluster_msg(void *arg)
    8917             : {
    8918        1030 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8919             :         uint32_t *extent_page;
    8920             : 
    8921        1030 :         ctx->rc = blob_insert_cluster(ctx->blob, ctx->cluster_num, ctx->cluster);
    8922        1030 :         if (ctx->rc != 0) {
    8923           5 :                 spdk_thread_send_msg(ctx->thread, blob_op_cluster_msg_cpl, ctx);
    8924           5 :                 return;
    8925             :         }
    8926             : 
    8927        1025 :         if (ctx->blob->use_extent_table == false) {
    8928             :                 /* Extent table is not used, proceed with sync of md that will only use extents_rle. */
    8929         410 :                 ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    8930         410 :                 blob_sync_md(ctx->blob, blob_op_cluster_msg_cb, ctx);
    8931         410 :                 return;
    8932             :         }
    8933             : 
    8934         615 :         extent_page = bs_cluster_to_extent_page(ctx->blob, ctx->cluster_num);
    8935         615 :         if (*extent_page == 0) {
    8936             :                 /* Extent page requires allocation.
    8937             :                  * It was already claimed in the used_md_pages map and placed in ctx. */
    8938         126 :                 assert(ctx->extent_page != 0);
    8939         126 :                 assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
    8940         252 :                 blob_write_extent_page(ctx->blob, ctx->extent_page, ctx->cluster_num, ctx->page,
    8941         126 :                                        blob_insert_new_ep_cb, ctx);
    8942         126 :         } else {
    8943             :                 /* It is possible for original thread to allocate extent page for
    8944             :                  * different cluster in the same extent page. In such case proceed with
    8945             :                  * updating the existing extent page, but release the additional one. */
    8946         489 :                 if (ctx->extent_page != 0) {
    8947           0 :                         spdk_spin_lock(&ctx->blob->bs->used_lock);
    8948           0 :                         assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
    8949           0 :                         bs_release_md_page(ctx->blob->bs, ctx->extent_page);
    8950           0 :                         spdk_spin_unlock(&ctx->blob->bs->used_lock);
    8951           0 :                         ctx->extent_page = 0;
    8952           0 :                 }
    8953             :                 /* Extent page already allocated.
    8954             :                  * Every cluster allocation, requires just an update of single extent page. */
    8955         978 :                 blob_write_extent_page(ctx->blob, *extent_page, ctx->cluster_num, ctx->page,
    8956         489 :                                        blob_op_cluster_msg_cb, ctx);
    8957             :         }
    8958        1030 : }
    8959             : 
    8960             : static void
    8961        1030 : blob_insert_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
    8962             :                                  uint64_t cluster, uint32_t extent_page, struct spdk_blob_md_page *page,
    8963             :                                  spdk_blob_op_complete cb_fn, void *cb_arg)
    8964             : {
    8965             :         struct spdk_blob_cluster_op_ctx *ctx;
    8966             : 
    8967        1030 :         ctx = calloc(1, sizeof(*ctx));
    8968        1030 :         if (ctx == NULL) {
    8969           0 :                 cb_fn(cb_arg, -ENOMEM);
    8970           0 :                 return;
    8971             :         }
    8972             : 
    8973        1030 :         ctx->thread = spdk_get_thread();
    8974        1030 :         ctx->blob = blob;
    8975        1030 :         ctx->cluster_num = cluster_num;
    8976        1030 :         ctx->cluster = cluster;
    8977        1030 :         ctx->extent_page = extent_page;
    8978        1030 :         ctx->page = page;
    8979        1030 :         ctx->cb_fn = cb_fn;
    8980        1030 :         ctx->cb_arg = cb_arg;
    8981             : 
    8982        1030 :         spdk_thread_send_msg(blob->bs->md_thread, blob_insert_cluster_msg, ctx);
    8983        1030 : }
    8984             : 
    8985             : static void
    8986          75 : blob_free_cluster_msg(void *arg)
    8987             : {
    8988          75 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8989             :         uint32_t *extent_page;
    8990             :         uint32_t start_cluster_idx;
    8991          75 :         bool free_extent_page = true;
    8992             :         size_t i;
    8993             : 
    8994          75 :         ctx->cluster = bs_lba_to_cluster(ctx->blob->bs, ctx->blob->active.clusters[ctx->cluster_num]);
    8995             : 
    8996             :         /* There were concurrent unmaps to the same cluster, only release the cluster on the first one */
    8997          75 :         if (ctx->cluster == 0) {
    8998          10 :                 blob_op_cluster_msg_cb(ctx, 0);
    8999          10 :                 return;
    9000             :         }
    9001             : 
    9002          65 :         ctx->blob->active.clusters[ctx->cluster_num] = 0;
    9003          65 :         if (ctx->cluster != 0) {
    9004          65 :                 ctx->blob->active.num_allocated_clusters--;
    9005          65 :         }
    9006             : 
    9007          65 :         if (ctx->blob->use_extent_table == false) {
    9008             :                 /* Extent table is not used, proceed with sync of md that will only use extents_rle. */
    9009          26 :                 spdk_spin_lock(&ctx->blob->bs->used_lock);
    9010          26 :                 bs_release_cluster(ctx->blob->bs, ctx->cluster);
    9011          26 :                 spdk_spin_unlock(&ctx->blob->bs->used_lock);
    9012          26 :                 ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    9013          26 :                 blob_sync_md(ctx->blob, blob_op_cluster_msg_cb, ctx);
    9014          26 :                 return;
    9015             :         }
    9016             : 
    9017          39 :         extent_page = bs_cluster_to_extent_page(ctx->blob, ctx->cluster_num);
    9018             : 
    9019             :         /* There shouldn't be parallel release operations on same cluster */
    9020          39 :         assert(*extent_page == ctx->extent_page);
    9021             : 
    9022          39 :         start_cluster_idx = (ctx->cluster_num / SPDK_EXTENTS_PER_EP) * SPDK_EXTENTS_PER_EP;
    9023          72 :         for (i = 0; i < SPDK_EXTENTS_PER_EP; ++i) {
    9024          72 :                 if (ctx->blob->active.clusters[start_cluster_idx + i] != 0) {
    9025          39 :                         free_extent_page = false;
    9026          39 :                         break;
    9027             :                 }
    9028          33 :         }
    9029             : 
    9030          39 :         if (free_extent_page) {
    9031           0 :                 assert(ctx->extent_page != 0);
    9032           0 :                 assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
    9033           0 :                 ctx->blob->active.extent_pages[bs_cluster_to_extent_table_id(ctx->cluster_num)] = 0;
    9034           0 :                 blob_write_extent_page(ctx->blob, ctx->extent_page, ctx->cluster_num, ctx->page,
    9035           0 :                                        blob_free_cluster_free_ep_cb, ctx);
    9036           0 :         } else {
    9037          78 :                 blob_write_extent_page(ctx->blob, *extent_page, ctx->cluster_num, ctx->page,
    9038          39 :                                        blob_free_cluster_update_ep_cb, ctx);
    9039             :         }
    9040          75 : }
    9041             : 
    9042             : 
    9043             : static void
    9044          75 : blob_free_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num, uint32_t extent_page,
    9045             :                                struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg)
    9046             : {
    9047             :         struct spdk_blob_cluster_op_ctx *ctx;
    9048             : 
    9049          75 :         ctx = calloc(1, sizeof(*ctx));
    9050          75 :         if (ctx == NULL) {
    9051           0 :                 cb_fn(cb_arg, -ENOMEM);
    9052           0 :                 return;
    9053             :         }
    9054             : 
    9055          75 :         ctx->thread = spdk_get_thread();
    9056          75 :         ctx->blob = blob;
    9057          75 :         ctx->cluster_num = cluster_num;
    9058          75 :         ctx->extent_page = extent_page;
    9059          75 :         ctx->page = page;
    9060          75 :         ctx->cb_fn = cb_fn;
    9061          75 :         ctx->cb_arg = cb_arg;
    9062             : 
    9063          75 :         spdk_thread_send_msg(blob->bs->md_thread, blob_free_cluster_msg, ctx);
    9064          75 : }
    9065             : 
    9066             : /* START spdk_blob_close */
    9067             : 
    9068             : static void
    9069        5211 : blob_close_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9070             : {
    9071        5211 :         struct spdk_blob *blob = cb_arg;
    9072             : 
    9073        5211 :         if (bserrno == 0) {
    9074        5211 :                 blob->open_ref--;
    9075        5211 :                 if (blob->open_ref == 0) {
    9076             :                         /*
    9077             :                          * Blobs with active.num_pages == 0 are deleted blobs.
    9078             :                          *  these blobs are removed from the blob_store list
    9079             :                          *  when the deletion process starts - so don't try to
    9080             :                          *  remove them again.
    9081             :                          */
    9082        4254 :                         if (blob->active.num_pages > 0) {
    9083        2392 :                                 spdk_bit_array_clear(blob->bs->open_blobids, blob->id);
    9084        2392 :                                 RB_REMOVE(spdk_blob_tree, &blob->bs->open_blobs, blob);
    9085        2392 :                         }
    9086        4254 :                         blob_free(blob);
    9087        4254 :                 }
    9088        5211 :         }
    9089             : 
    9090        5211 :         bs_sequence_finish(seq, bserrno);
    9091        5211 : }
    9092             : 
    9093             : static void
    9094         140 : blob_close_esnap_done(void *cb_arg, struct spdk_blob *blob, int bserrno)
    9095             : {
    9096         140 :         spdk_bs_sequence_t      *seq = cb_arg;
    9097             : 
    9098         140 :         if (bserrno != 0) {
    9099           0 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": close failed with error %d\n",
    9100             :                               blob->id, bserrno);
    9101           0 :                 bs_sequence_finish(seq, bserrno);
    9102           0 :                 return;
    9103             :         }
    9104             : 
    9105         140 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": closed, syncing metadata on thread %s\n",
    9106             :                       blob->id, spdk_thread_get_name(spdk_get_thread()));
    9107             : 
    9108             :         /* Sync metadata */
    9109         140 :         blob_persist(seq, blob, blob_close_cpl, blob);
    9110         140 : }
    9111             : 
    9112             : void
    9113        5211 : spdk_blob_close(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
    9114             : {
    9115             :         struct spdk_bs_cpl      cpl;
    9116             :         spdk_bs_sequence_t      *seq;
    9117             : 
    9118        5211 :         blob_verify_md_op(blob);
    9119             : 
    9120        5211 :         SPDK_DEBUGLOG(blob, "Closing blob 0x%" PRIx64 "\n", blob->id);
    9121             : 
    9122        5211 :         if (blob->open_ref == 0) {
    9123           0 :                 cb_fn(cb_arg, -EBADF);
    9124           0 :                 return;
    9125             :         }
    9126             : 
    9127        5211 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    9128        5211 :         cpl.u.blob_basic.cb_fn = cb_fn;
    9129        5211 :         cpl.u.blob_basic.cb_arg = cb_arg;
    9130             : 
    9131        5211 :         seq = bs_sequence_start_bs(blob->bs->md_channel, &cpl);
    9132        5211 :         if (!seq) {
    9133           0 :                 cb_fn(cb_arg, -ENOMEM);
    9134           0 :                 return;
    9135             :         }
    9136             : 
    9137        5211 :         if (blob->open_ref == 1 && blob_is_esnap_clone(blob)) {
    9138         140 :                 blob_esnap_destroy_bs_dev_channels(blob, false, blob_close_esnap_done, seq);
    9139         140 :                 return;
    9140             :         }
    9141             : 
    9142             :         /* Sync metadata */
    9143        5071 :         blob_persist(seq, blob, blob_close_cpl, blob);
    9144        5211 : }
    9145             : 
    9146             : /* END spdk_blob_close */
    9147             : 
    9148         276 : struct spdk_io_channel *spdk_bs_alloc_io_channel(struct spdk_blob_store *bs)
    9149             : {
    9150         276 :         return spdk_get_io_channel(bs);
    9151             : }
    9152             : 
    9153             : void
    9154         276 : spdk_bs_free_io_channel(struct spdk_io_channel *channel)
    9155             : {
    9156         276 :         blob_esnap_destroy_bs_channel(spdk_io_channel_get_ctx(channel));
    9157         276 :         spdk_put_io_channel(channel);
    9158         276 : }
    9159             : 
    9160             : void
    9161         140 : spdk_blob_io_unmap(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9162             :                    uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg)
    9163             : {
    9164         140 :         blob_request_submit_op(blob, channel, NULL, offset, length, cb_fn, cb_arg,
    9165             :                                SPDK_BLOB_UNMAP);
    9166         140 : }
    9167             : 
    9168             : void
    9169          60 : spdk_blob_io_write_zeroes(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9170             :                           uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg)
    9171             : {
    9172          60 :         blob_request_submit_op(blob, channel, NULL, offset, length, cb_fn, cb_arg,
    9173             :                                SPDK_BLOB_WRITE_ZEROES);
    9174          60 : }
    9175             : 
    9176             : void
    9177       27349 : spdk_blob_io_write(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9178             :                    void *payload, uint64_t offset, uint64_t length,
    9179             :                    spdk_blob_op_complete cb_fn, void *cb_arg)
    9180             : {
    9181       27349 :         blob_request_submit_op(blob, channel, payload, offset, length, cb_fn, cb_arg,
    9182             :                                SPDK_BLOB_WRITE);
    9183       27349 : }
    9184             : 
    9185             : void
    9186       26143 : spdk_blob_io_read(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9187             :                   void *payload, uint64_t offset, uint64_t length,
    9188             :                   spdk_blob_op_complete cb_fn, void *cb_arg)
    9189             : {
    9190       26143 :         blob_request_submit_op(blob, channel, payload, offset, length, cb_fn, cb_arg,
    9191             :                                SPDK_BLOB_READ);
    9192       26143 : }
    9193             : 
    9194             : void
    9195         175 : spdk_blob_io_writev(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9196             :                     struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    9197             :                     spdk_blob_op_complete cb_fn, void *cb_arg)
    9198             : {
    9199         175 :         blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, false, NULL);
    9200         175 : }
    9201             : 
    9202             : void
    9203        1655 : spdk_blob_io_readv(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9204             :                    struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    9205             :                    spdk_blob_op_complete cb_fn, void *cb_arg)
    9206             : {
    9207        1655 :         blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, true, NULL);
    9208        1655 : }
    9209             : 
    9210             : void
    9211         260 : spdk_blob_io_writev_ext(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9212             :                         struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    9213             :                         spdk_blob_op_complete cb_fn, void *cb_arg, struct spdk_blob_ext_io_opts *io_opts)
    9214             : {
    9215         520 :         blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, false,
    9216         260 :                                    io_opts);
    9217         260 : }
    9218             : 
    9219             : void
    9220        2105 : spdk_blob_io_readv_ext(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9221             :                        struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    9222             :                        spdk_blob_op_complete cb_fn, void *cb_arg, struct spdk_blob_ext_io_opts *io_opts)
    9223             : {
    9224        4210 :         blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, true,
    9225        2105 :                                    io_opts);
    9226        2105 : }
    9227             : 
    9228             : struct spdk_bs_iter_ctx {
    9229             :         int64_t page_num;
    9230             :         struct spdk_blob_store *bs;
    9231             : 
    9232             :         spdk_blob_op_with_handle_complete cb_fn;
    9233             :         void *cb_arg;
    9234             : };
    9235             : 
    9236             : static void
    9237        1460 : bs_iter_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    9238             : {
    9239        1460 :         struct spdk_bs_iter_ctx *ctx = cb_arg;
    9240        1460 :         struct spdk_blob_store *bs = ctx->bs;
    9241             :         spdk_blob_id id;
    9242             : 
    9243        1460 :         if (bserrno == 0) {
    9244         557 :                 ctx->cb_fn(ctx->cb_arg, _blob, bserrno);
    9245         557 :                 free(ctx);
    9246         557 :                 return;
    9247             :         }
    9248             : 
    9249         903 :         ctx->page_num++;
    9250         903 :         ctx->page_num = spdk_bit_array_find_first_set(bs->used_blobids, ctx->page_num);
    9251         903 :         if (ctx->page_num >= spdk_bit_array_capacity(bs->used_blobids)) {
    9252         336 :                 ctx->cb_fn(ctx->cb_arg, NULL, -ENOENT);
    9253         336 :                 free(ctx);
    9254         336 :                 return;
    9255             :         }
    9256             : 
    9257         567 :         id = bs_page_to_blobid(ctx->page_num);
    9258             : 
    9259         567 :         spdk_bs_open_blob(bs, id, bs_iter_cpl, ctx);
    9260        1460 : }
    9261             : 
    9262             : void
    9263         366 : spdk_bs_iter_first(struct spdk_blob_store *bs,
    9264             :                    spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
    9265             : {
    9266             :         struct spdk_bs_iter_ctx *ctx;
    9267             : 
    9268         366 :         ctx = calloc(1, sizeof(*ctx));
    9269         366 :         if (!ctx) {
    9270           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    9271           0 :                 return;
    9272             :         }
    9273             : 
    9274         366 :         ctx->page_num = -1;
    9275         366 :         ctx->bs = bs;
    9276         366 :         ctx->cb_fn = cb_fn;
    9277         366 :         ctx->cb_arg = cb_arg;
    9278             : 
    9279         366 :         bs_iter_cpl(ctx, NULL, -1);
    9280         366 : }
    9281             : 
    9282             : static void
    9283         527 : bs_iter_close_cpl(void *cb_arg, int bserrno)
    9284             : {
    9285         527 :         struct spdk_bs_iter_ctx *ctx = cb_arg;
    9286             : 
    9287         527 :         bs_iter_cpl(ctx, NULL, -1);
    9288         527 : }
    9289             : 
    9290             : void
    9291         527 : spdk_bs_iter_next(struct spdk_blob_store *bs, struct spdk_blob *blob,
    9292             :                   spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
    9293             : {
    9294             :         struct spdk_bs_iter_ctx *ctx;
    9295             : 
    9296         527 :         assert(blob != NULL);
    9297             : 
    9298         527 :         ctx = calloc(1, sizeof(*ctx));
    9299         527 :         if (!ctx) {
    9300           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    9301           0 :                 return;
    9302             :         }
    9303             : 
    9304         527 :         ctx->page_num = bs_blobid_to_page(blob->id);
    9305         527 :         ctx->bs = bs;
    9306         527 :         ctx->cb_fn = cb_fn;
    9307         527 :         ctx->cb_arg = cb_arg;
    9308             : 
    9309             :         /* Close the existing blob */
    9310         527 :         spdk_blob_close(blob, bs_iter_close_cpl, ctx);
    9311         527 : }
    9312             : 
    9313             : static int
    9314        1178 : blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
    9315             :                uint16_t value_len, bool internal)
    9316             : {
    9317             :         struct spdk_xattr_tailq *xattrs;
    9318             :         struct spdk_xattr       *xattr;
    9319             :         size_t                  desc_size;
    9320             :         void                    *tmp;
    9321             : 
    9322        1178 :         blob_verify_md_op(blob);
    9323             : 
    9324        1178 :         if (blob->md_ro) {
    9325           5 :                 return -EPERM;
    9326             :         }
    9327             : 
    9328        1173 :         desc_size = sizeof(struct spdk_blob_md_descriptor_xattr) + strlen(name) + value_len;
    9329        1173 :         if (desc_size > SPDK_BS_MAX_DESC_SIZE) {
    9330           5 :                 SPDK_DEBUGLOG(blob, "Xattr '%s' of size %zu does not fix into single page %zu\n", name,
    9331             :                               desc_size, SPDK_BS_MAX_DESC_SIZE);
    9332           5 :                 return -ENOMEM;
    9333             :         }
    9334             : 
    9335        1168 :         if (internal) {
    9336         917 :                 xattrs = &blob->xattrs_internal;
    9337         917 :                 blob->invalid_flags |= SPDK_BLOB_INTERNAL_XATTR;
    9338         917 :         } else {
    9339         251 :                 xattrs = &blob->xattrs;
    9340             :         }
    9341             : 
    9342        1438 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9343         402 :                 if (!strcmp(name, xattr->name)) {
    9344         132 :                         tmp = malloc(value_len);
    9345         132 :                         if (!tmp) {
    9346           0 :                                 return -ENOMEM;
    9347             :                         }
    9348             : 
    9349         132 :                         free(xattr->value);
    9350         132 :                         xattr->value_len = value_len;
    9351         132 :                         xattr->value = tmp;
    9352         132 :                         memcpy(xattr->value, value, value_len);
    9353             : 
    9354         132 :                         blob->state = SPDK_BLOB_STATE_DIRTY;
    9355             : 
    9356         132 :                         return 0;
    9357             :                 }
    9358         270 :         }
    9359             : 
    9360        1036 :         xattr = calloc(1, sizeof(*xattr));
    9361        1036 :         if (!xattr) {
    9362           0 :                 return -ENOMEM;
    9363             :         }
    9364             : 
    9365        1036 :         xattr->name = strdup(name);
    9366        1036 :         if (!xattr->name) {
    9367           0 :                 free(xattr);
    9368           0 :                 return -ENOMEM;
    9369             :         }
    9370             : 
    9371        1036 :         xattr->value_len = value_len;
    9372        1036 :         xattr->value = malloc(value_len);
    9373        1036 :         if (!xattr->value) {
    9374           0 :                 free(xattr->name);
    9375           0 :                 free(xattr);
    9376           0 :                 return -ENOMEM;
    9377             :         }
    9378        1036 :         memcpy(xattr->value, value, value_len);
    9379        1036 :         TAILQ_INSERT_TAIL(xattrs, xattr, link);
    9380             : 
    9381        1036 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    9382             : 
    9383        1036 :         return 0;
    9384        1178 : }
    9385             : 
    9386             : int
    9387         216 : spdk_blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
    9388             :                     uint16_t value_len)
    9389             : {
    9390         216 :         return blob_set_xattr(blob, name, value, value_len, false);
    9391             : }
    9392             : 
    9393             : static int
    9394         511 : blob_remove_xattr(struct spdk_blob *blob, const char *name, bool internal)
    9395             : {
    9396             :         struct spdk_xattr_tailq *xattrs;
    9397             :         struct spdk_xattr       *xattr;
    9398             : 
    9399         511 :         blob_verify_md_op(blob);
    9400             : 
    9401         511 :         if (blob->md_ro) {
    9402           5 :                 return -EPERM;
    9403             :         }
    9404         506 :         xattrs = internal ? &blob->xattrs_internal : &blob->xattrs;
    9405             : 
    9406         521 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9407         456 :                 if (!strcmp(name, xattr->name)) {
    9408         441 :                         TAILQ_REMOVE(xattrs, xattr, link);
    9409         441 :                         free(xattr->value);
    9410         441 :                         free(xattr->name);
    9411         441 :                         free(xattr);
    9412             : 
    9413         441 :                         if (internal && TAILQ_EMPTY(&blob->xattrs_internal)) {
    9414         306 :                                 blob->invalid_flags &= ~SPDK_BLOB_INTERNAL_XATTR;
    9415         306 :                         }
    9416         441 :                         blob->state = SPDK_BLOB_STATE_DIRTY;
    9417             : 
    9418         441 :                         return 0;
    9419             :                 }
    9420          15 :         }
    9421             : 
    9422          65 :         return -ENOENT;
    9423         511 : }
    9424             : 
    9425             : int
    9426          45 : spdk_blob_remove_xattr(struct spdk_blob *blob, const char *name)
    9427             : {
    9428          45 :         return blob_remove_xattr(blob, name, false);
    9429             : }
    9430             : 
    9431             : static int
    9432        2852 : blob_get_xattr_value(struct spdk_blob *blob, const char *name,
    9433             :                      const void **value, size_t *value_len, bool internal)
    9434             : {
    9435             :         struct spdk_xattr       *xattr;
    9436             :         struct spdk_xattr_tailq *xattrs;
    9437             : 
    9438        2852 :         xattrs = internal ? &blob->xattrs_internal : &blob->xattrs;
    9439             : 
    9440        3636 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9441        1728 :                 if (!strcmp(name, xattr->name)) {
    9442         944 :                         *value = xattr->value;
    9443         944 :                         *value_len = xattr->value_len;
    9444         944 :                         return 0;
    9445             :                 }
    9446         784 :         }
    9447        1908 :         return -ENOENT;
    9448        2852 : }
    9449             : 
    9450             : int
    9451         192 : spdk_blob_get_xattr_value(struct spdk_blob *blob, const char *name,
    9452             :                           const void **value, size_t *value_len)
    9453             : {
    9454         192 :         blob_verify_md_op(blob);
    9455             : 
    9456         192 :         return blob_get_xattr_value(blob, name, value, value_len, false);
    9457             : }
    9458             : 
    9459             : struct spdk_xattr_names {
    9460             :         uint32_t        count;
    9461             :         const char      *names[0];
    9462             : };
    9463             : 
    9464             : static int
    9465           5 : blob_get_xattr_names(struct spdk_xattr_tailq *xattrs, struct spdk_xattr_names **names)
    9466             : {
    9467             :         struct spdk_xattr       *xattr;
    9468           5 :         int                     count = 0;
    9469             : 
    9470          15 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9471          10 :                 count++;
    9472          10 :         }
    9473             : 
    9474           5 :         *names = calloc(1, sizeof(struct spdk_xattr_names) + count * sizeof(char *));
    9475           5 :         if (*names == NULL) {
    9476           0 :                 return -ENOMEM;
    9477             :         }
    9478             : 
    9479          15 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9480          10 :                 (*names)->names[(*names)->count++] = xattr->name;
    9481          10 :         }
    9482             : 
    9483           5 :         return 0;
    9484           5 : }
    9485             : 
    9486             : int
    9487           5 : spdk_blob_get_xattr_names(struct spdk_blob *blob, struct spdk_xattr_names **names)
    9488             : {
    9489           5 :         blob_verify_md_op(blob);
    9490             : 
    9491           5 :         return blob_get_xattr_names(&blob->xattrs, names);
    9492             : }
    9493             : 
    9494             : uint32_t
    9495           5 : spdk_xattr_names_get_count(struct spdk_xattr_names *names)
    9496             : {
    9497           5 :         assert(names != NULL);
    9498             : 
    9499           5 :         return names->count;
    9500             : }
    9501             : 
    9502             : const char *
    9503          10 : spdk_xattr_names_get_name(struct spdk_xattr_names *names, uint32_t index)
    9504             : {
    9505          10 :         if (index >= names->count) {
    9506           0 :                 return NULL;
    9507             :         }
    9508             : 
    9509          10 :         return names->names[index];
    9510          10 : }
    9511             : 
    9512             : void
    9513           5 : spdk_xattr_names_free(struct spdk_xattr_names *names)
    9514             : {
    9515           5 :         free(names);
    9516           5 : }
    9517             : 
    9518             : struct spdk_bs_type
    9519           2 : spdk_bs_get_bstype(struct spdk_blob_store *bs)
    9520             : {
    9521           2 :         return bs->bstype;
    9522             : }
    9523             : 
    9524             : void
    9525           0 : spdk_bs_set_bstype(struct spdk_blob_store *bs, struct spdk_bs_type bstype)
    9526             : {
    9527           0 :         memcpy(&bs->bstype, &bstype, sizeof(bstype));
    9528           0 : }
    9529             : 
    9530             : bool
    9531          60 : spdk_blob_is_read_only(struct spdk_blob *blob)
    9532             : {
    9533          60 :         assert(blob != NULL);
    9534          60 :         return (blob->data_ro || blob->md_ro);
    9535             : }
    9536             : 
    9537             : bool
    9538          65 : spdk_blob_is_snapshot(struct spdk_blob *blob)
    9539             : {
    9540             :         struct spdk_blob_list *snapshot_entry;
    9541             : 
    9542          65 :         assert(blob != NULL);
    9543             : 
    9544          65 :         snapshot_entry = bs_get_snapshot_entry(blob->bs, blob->id);
    9545          65 :         if (snapshot_entry == NULL) {
    9546          35 :                 return false;
    9547             :         }
    9548             : 
    9549          30 :         return true;
    9550          65 : }
    9551             : 
    9552             : bool
    9553          85 : spdk_blob_is_clone(struct spdk_blob *blob)
    9554             : {
    9555          85 :         assert(blob != NULL);
    9556             : 
    9557          85 :         if (blob->parent_id != SPDK_BLOBID_INVALID &&
    9558          65 :             blob->parent_id != SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    9559          50 :                 assert(spdk_blob_is_thin_provisioned(blob));
    9560          50 :                 return true;
    9561             :         }
    9562             : 
    9563          35 :         return false;
    9564          85 : }
    9565             : 
    9566             : bool
    9567       46657 : spdk_blob_is_thin_provisioned(struct spdk_blob *blob)
    9568             : {
    9569       46657 :         assert(blob != NULL);
    9570       46657 :         return !!(blob->invalid_flags & SPDK_BLOB_THIN_PROV);
    9571             : }
    9572             : 
    9573             : bool
    9574       57607 : spdk_blob_is_esnap_clone(const struct spdk_blob *blob)
    9575             : {
    9576       57607 :         return blob_is_esnap_clone(blob);
    9577             : }
    9578             : 
    9579             : static void
    9580        4291 : blob_update_clear_method(struct spdk_blob *blob)
    9581             : {
    9582             :         enum blob_clear_method stored_cm;
    9583             : 
    9584        4291 :         assert(blob != NULL);
    9585             : 
    9586             :         /* If BLOB_CLEAR_WITH_DEFAULT was passed in, use the setting stored
    9587             :          * in metadata previously.  If something other than the default was
    9588             :          * specified, ignore stored value and used what was passed in.
    9589             :          */
    9590        4291 :         stored_cm = ((blob->md_ro_flags & SPDK_BLOB_CLEAR_METHOD) >> SPDK_BLOB_CLEAR_METHOD_SHIFT);
    9591             : 
    9592        4291 :         if (blob->clear_method == BLOB_CLEAR_WITH_DEFAULT) {
    9593        4291 :                 blob->clear_method = stored_cm;
    9594        4291 :         } else if (blob->clear_method != stored_cm) {
    9595           0 :                 SPDK_WARNLOG("Using passed in clear method 0x%x instead of stored value of 0x%x\n",
    9596             :                              blob->clear_method, stored_cm);
    9597           0 :         }
    9598        4291 : }
    9599             : 
    9600             : spdk_blob_id
    9601         324 : spdk_blob_get_parent_snapshot(struct spdk_blob_store *bs, spdk_blob_id blob_id)
    9602             : {
    9603         324 :         struct spdk_blob_list *snapshot_entry = NULL;
    9604         324 :         struct spdk_blob_list *clone_entry = NULL;
    9605             : 
    9606         619 :         TAILQ_FOREACH(snapshot_entry, &bs->snapshots, link) {
    9607         916 :                 TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
    9608         621 :                         if (clone_entry->id == blob_id) {
    9609         211 :                                 return snapshot_entry->id;
    9610             :                         }
    9611         410 :                 }
    9612         295 :         }
    9613             : 
    9614         113 :         return SPDK_BLOBID_INVALID;
    9615         324 : }
    9616             : 
    9617             : int
    9618         246 : spdk_blob_get_clones(struct spdk_blob_store *bs, spdk_blob_id blobid, spdk_blob_id *ids,
    9619             :                      size_t *count)
    9620             : {
    9621             :         struct spdk_blob_list *snapshot_entry, *clone_entry;
    9622             :         size_t n;
    9623             : 
    9624         246 :         snapshot_entry = bs_get_snapshot_entry(bs, blobid);
    9625         246 :         if (snapshot_entry == NULL) {
    9626          35 :                 *count = 0;
    9627          35 :                 return 0;
    9628             :         }
    9629             : 
    9630         211 :         if (ids == NULL || *count < snapshot_entry->clone_count) {
    9631          10 :                 *count = snapshot_entry->clone_count;
    9632          10 :                 return -ENOMEM;
    9633             :         }
    9634         201 :         *count = snapshot_entry->clone_count;
    9635             : 
    9636         201 :         n = 0;
    9637         427 :         TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
    9638         226 :                 ids[n++] = clone_entry->id;
    9639         226 :         }
    9640             : 
    9641         201 :         return 0;
    9642         246 : }
    9643             : 
    9644             : static void
    9645           5 : bs_load_grow_continue(struct spdk_bs_load_ctx *ctx)
    9646             : {
    9647             :         int rc;
    9648             : 
    9649           5 :         if (ctx->super->size == 0) {
    9650           0 :                 ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    9651           0 :         }
    9652             : 
    9653           5 :         if (ctx->super->io_unit_size == 0) {
    9654           0 :                 ctx->super->io_unit_size = SPDK_BS_PAGE_SIZE;
    9655           0 :         }
    9656           5 :         if (ctx->super->md_page_size == 0) {
    9657           0 :                 ctx->super->md_page_size = SPDK_BS_PAGE_SIZE;
    9658           0 :         }
    9659             : 
    9660             :         /* Parse the super block */
    9661           5 :         ctx->bs->clean = 1;
    9662           5 :         ctx->bs->cluster_sz = ctx->super->cluster_size;
    9663           5 :         ctx->bs->total_clusters = ctx->super->size / ctx->super->cluster_size;
    9664           5 :         ctx->bs->md_page_size = ctx->super->md_page_size;
    9665           5 :         ctx->bs->io_unit_size = ctx->super->io_unit_size;
    9666           5 :         bs_init_per_cluster_fields(ctx->bs);
    9667           5 :         rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->bs->total_clusters);
    9668           5 :         if (rc < 0) {
    9669           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    9670           0 :                 return;
    9671             :         }
    9672           5 :         ctx->bs->md_start = ctx->super->md_start;
    9673           5 :         ctx->bs->md_len = ctx->super->md_len;
    9674           5 :         rc = spdk_bit_array_resize(&ctx->bs->open_blobids, ctx->bs->md_len);
    9675           5 :         if (rc < 0) {
    9676           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    9677           0 :                 return;
    9678             :         }
    9679             : 
    9680          10 :         ctx->bs->total_data_clusters = ctx->bs->total_clusters - spdk_divide_round_up(
    9681           5 :                                                ctx->bs->md_start + ctx->bs->md_len, ctx->bs->pages_per_cluster);
    9682           5 :         ctx->bs->super_blob = ctx->super->super_blob;
    9683           5 :         memcpy(&ctx->bs->bstype, &ctx->super->bstype, sizeof(ctx->super->bstype));
    9684             : 
    9685           5 :         if (ctx->super->used_blobid_mask_len == 0 || ctx->super->clean == 0) {
    9686           0 :                 SPDK_ERRLOG("Can not grow an unclean blobstore, please load it normally to clean it.\n");
    9687           0 :                 bs_load_ctx_fail(ctx, -EIO);
    9688           0 :                 return;
    9689             :         } else {
    9690           5 :                 bs_load_read_used_pages(ctx);
    9691             :         }
    9692           5 : }
    9693             : 
    9694             : static void
    9695           5 : bs_load_grow_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9696             : {
    9697           5 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    9698             : 
    9699           5 :         if (bserrno != 0) {
    9700           0 :                 bs_load_ctx_fail(ctx, bserrno);
    9701           0 :                 return;
    9702             :         }
    9703           5 :         bs_load_grow_continue(ctx);
    9704           5 : }
    9705             : 
    9706             : static void
    9707           5 : bs_load_grow_used_clusters_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9708             : {
    9709           5 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    9710             : 
    9711           5 :         if (bserrno != 0) {
    9712           0 :                 bs_load_ctx_fail(ctx, bserrno);
    9713           0 :                 return;
    9714             :         }
    9715             : 
    9716           5 :         spdk_free(ctx->mask);
    9717             : 
    9718          10 :         bs_sequence_write_dev(ctx->seq, ctx->super, bs_page_to_lba(ctx->bs, 0),
    9719           5 :                               bs_byte_to_lba(ctx->bs, sizeof(*ctx->super)),
    9720           5 :                               bs_load_grow_super_write_cpl, ctx);
    9721           5 : }
    9722             : 
    9723             : static void
    9724           5 : bs_load_grow_used_clusters_read_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9725             : {
    9726           5 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    9727             :         uint64_t                lba, lba_count;
    9728             :         uint64_t                dev_size;
    9729             :         uint64_t                total_clusters;
    9730             : 
    9731           5 :         if (bserrno != 0) {
    9732           0 :                 bs_load_ctx_fail(ctx, bserrno);
    9733           0 :                 return;
    9734             :         }
    9735             : 
    9736             :         /* The type must be correct */
    9737           5 :         assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_CLUSTERS);
    9738             :         /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
    9739           5 :         assert(ctx->mask->length <= (ctx->super->used_cluster_mask_len * sizeof(
    9740             :                                              struct spdk_blob_md_page) * 8));
    9741           5 :         dev_size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    9742           5 :         total_clusters = dev_size / ctx->super->cluster_size;
    9743           5 :         ctx->mask->length = total_clusters;
    9744             : 
    9745           5 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
    9746           5 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
    9747          10 :         bs_sequence_write_dev(ctx->seq, ctx->mask, lba, lba_count,
    9748           5 :                               bs_load_grow_used_clusters_write_cpl, ctx);
    9749           5 : }
    9750             : 
    9751             : static void
    9752           5 : bs_load_try_to_grow(struct spdk_bs_load_ctx *ctx)
    9753             : {
    9754             :         uint64_t dev_size, total_clusters, used_cluster_mask_len, max_used_cluster_mask;
    9755             :         uint64_t lba, lba_count, mask_size;
    9756             : 
    9757           5 :         dev_size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    9758           5 :         total_clusters = dev_size / ctx->super->cluster_size;
    9759          10 :         used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    9760           5 :                                 spdk_divide_round_up(total_clusters, 8),
    9761           5 :                                 ctx->super->md_page_size);
    9762           5 :         max_used_cluster_mask = ctx->super->used_blobid_mask_start - ctx->super->used_cluster_mask_start;
    9763             :         /* No necessary to grow or no space to grow */
    9764           5 :         if (ctx->super->size >= dev_size || used_cluster_mask_len > max_used_cluster_mask) {
    9765           0 :                 SPDK_DEBUGLOG(blob, "No grow\n");
    9766           0 :                 bs_load_grow_continue(ctx);
    9767           0 :                 return;
    9768             :         }
    9769             : 
    9770           5 :         SPDK_DEBUGLOG(blob, "Resize blobstore\n");
    9771             : 
    9772           5 :         ctx->super->size = dev_size;
    9773           5 :         ctx->super->used_cluster_mask_len = used_cluster_mask_len;
    9774           5 :         ctx->super->crc = blob_md_page_calc_crc(ctx->super);
    9775             : 
    9776           5 :         mask_size = used_cluster_mask_len * ctx->super->md_page_size;
    9777           5 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_NUMA_ID_ANY,
    9778             :                                  SPDK_MALLOC_DMA);
    9779           5 :         if (!ctx->mask) {
    9780           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    9781           0 :                 return;
    9782             :         }
    9783           5 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
    9784           5 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
    9785          10 :         bs_sequence_read_dev(ctx->seq, ctx->mask, lba, lba_count,
    9786           5 :                              bs_load_grow_used_clusters_read_cpl, ctx);
    9787           5 : }
    9788             : 
    9789             : static void
    9790           5 : bs_grow_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9791             : {
    9792           5 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    9793             :         int rc;
    9794             : 
    9795           5 :         rc = bs_super_validate(ctx->super, ctx->bs);
    9796           5 :         if (rc != 0) {
    9797           0 :                 bs_load_ctx_fail(ctx, rc);
    9798           0 :                 return;
    9799             :         }
    9800             : 
    9801           5 :         bs_load_try_to_grow(ctx);
    9802           5 : }
    9803             : 
    9804             : struct spdk_bs_grow_ctx {
    9805             :         struct spdk_blob_store          *bs;
    9806             :         struct spdk_bs_super_block      *super;
    9807             : 
    9808             :         struct spdk_bit_pool            *new_used_clusters;
    9809             :         struct spdk_bs_md_mask          *new_used_clusters_mask;
    9810             : 
    9811             :         spdk_bs_sequence_t              *seq;
    9812             : };
    9813             : 
    9814             : static void
    9815          40 : bs_grow_live_done(struct spdk_bs_grow_ctx *ctx, int bserrno)
    9816             : {
    9817          40 :         if (bserrno != 0) {
    9818          10 :                 spdk_bit_pool_free(&ctx->new_used_clusters);
    9819          10 :         }
    9820             : 
    9821          40 :         bs_sequence_finish(ctx->seq, bserrno);
    9822          40 :         free(ctx->new_used_clusters_mask);
    9823          40 :         spdk_free(ctx->super);
    9824          40 :         free(ctx);
    9825          40 : }
    9826             : 
    9827             : static void
    9828          10 : bs_grow_live_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9829             : {
    9830          10 :         struct spdk_bs_grow_ctx *ctx = cb_arg;
    9831          10 :         struct spdk_blob_store *bs = ctx->bs;
    9832             :         uint64_t total_clusters;
    9833             : 
    9834          10 :         if (bserrno != 0) {
    9835           0 :                 bs_grow_live_done(ctx, bserrno);
    9836           0 :                 return;
    9837             :         }
    9838             : 
    9839             :         /*
    9840             :          * Blobstore is not clean until unload, for now only the super block is up to date.
    9841             :          * This is similar to state right after blobstore init, when bs_write_used_md() didn't
    9842             :          * yet execute.
    9843             :          * When cleanly unloaded, the used md pages will be written out.
    9844             :          * In case of unclean shutdown, loading blobstore will go through recovery path correctly
    9845             :          * filling out the used_clusters with new size and writing it out.
    9846             :          */
    9847          10 :         bs->clean = 0;
    9848             : 
    9849             :         /* Reverting the super->size past this point is complex, avoid any error paths
    9850             :          * that require to do so. */
    9851          10 :         spdk_spin_lock(&bs->used_lock);
    9852             : 
    9853          10 :         total_clusters = ctx->super->size / ctx->super->cluster_size;
    9854             : 
    9855          10 :         assert(total_clusters >= spdk_bit_pool_capacity(bs->used_clusters));
    9856          10 :         spdk_bit_pool_store_mask(bs->used_clusters, ctx->new_used_clusters_mask);
    9857             : 
    9858          10 :         assert(total_clusters == spdk_bit_pool_capacity(ctx->new_used_clusters));
    9859          10 :         spdk_bit_pool_load_mask(ctx->new_used_clusters, ctx->new_used_clusters_mask);
    9860             : 
    9861          10 :         spdk_bit_pool_free(&bs->used_clusters);
    9862          10 :         bs->used_clusters = ctx->new_used_clusters;
    9863             : 
    9864          10 :         bs->total_clusters = total_clusters;
    9865          20 :         bs->total_data_clusters = bs->total_clusters - spdk_divide_round_up(
    9866          10 :                                           bs->md_start + bs->md_len, bs->pages_per_cluster);
    9867             : 
    9868          10 :         bs->num_free_clusters = spdk_bit_pool_count_free(bs->used_clusters);
    9869          10 :         assert(ctx->bs->num_free_clusters <= ctx->bs->total_clusters);
    9870          10 :         spdk_spin_unlock(&bs->used_lock);
    9871             : 
    9872          10 :         bs_grow_live_done(ctx, 0);
    9873          10 : }
    9874             : 
    9875             : static void
    9876          40 : bs_grow_live_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9877             : {
    9878          40 :         struct spdk_bs_grow_ctx *ctx = cb_arg;
    9879             :         uint64_t dev_size, total_clusters, used_cluster_mask_len, max_used_cluster_mask;
    9880             :         int rc;
    9881             : 
    9882          40 :         if (bserrno != 0) {
    9883           0 :                 bs_grow_live_done(ctx, bserrno);
    9884           0 :                 return;
    9885             :         }
    9886             : 
    9887          40 :         rc = bs_super_validate(ctx->super, ctx->bs);
    9888          40 :         if (rc != 0) {
    9889           5 :                 bs_grow_live_done(ctx, rc);
    9890           5 :                 return;
    9891             :         }
    9892             : 
    9893          35 :         dev_size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    9894          35 :         total_clusters = dev_size / ctx->super->cluster_size;
    9895          70 :         used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    9896          35 :                                 spdk_divide_round_up(total_clusters, 8),
    9897          35 :                                 ctx->super->md_page_size);
    9898          35 :         max_used_cluster_mask = ctx->super->used_blobid_mask_start - ctx->super->used_cluster_mask_start;
    9899             :         /* Only checking dev_size. Since it can change, but total_clusters remain the same. */
    9900          35 :         if (dev_size == ctx->super->size) {
    9901          20 :                 SPDK_DEBUGLOG(blob, "No need to grow blobstore\n");
    9902          20 :                 bs_grow_live_done(ctx, 0);
    9903          20 :                 return;
    9904             :         }
    9905             :         /*
    9906             :          * Blobstore cannot be shrunk, so check before if:
    9907             :          * - new size of the device is smaller than size in super_block
    9908             :          * - new total number of clusters is smaller than used_clusters bit_pool
    9909             :          * - there is enough space in metadata for used_cluster_mask to be written out
    9910             :          */
    9911          30 :         if (dev_size < ctx->super->size ||
    9912          15 :             total_clusters < spdk_bit_pool_capacity(ctx->bs->used_clusters) ||
    9913          15 :             used_cluster_mask_len > max_used_cluster_mask) {
    9914           5 :                 SPDK_DEBUGLOG(blob, "No space to grow blobstore\n");
    9915           5 :                 bs_grow_live_done(ctx, -ENOSPC);
    9916           5 :                 return;
    9917             :         }
    9918             : 
    9919          10 :         SPDK_DEBUGLOG(blob, "Resizing blobstore\n");
    9920             : 
    9921          10 :         ctx->new_used_clusters_mask = calloc(1, total_clusters);
    9922          10 :         if (!ctx->new_used_clusters_mask) {
    9923           0 :                 bs_grow_live_done(ctx, -ENOMEM);
    9924           0 :                 return;
    9925             :         }
    9926          10 :         ctx->new_used_clusters = spdk_bit_pool_create(total_clusters);
    9927          10 :         if (!ctx->new_used_clusters) {
    9928           0 :                 bs_grow_live_done(ctx, -ENOMEM);
    9929           0 :                 return;
    9930             :         }
    9931             : 
    9932          10 :         ctx->super->clean = 0;
    9933          10 :         ctx->super->size = dev_size;
    9934          10 :         ctx->super->used_cluster_mask_len = used_cluster_mask_len;
    9935          10 :         bs_write_super(seq, ctx->bs, ctx->super, bs_grow_live_super_write_cpl, ctx);
    9936          40 : }
    9937             : 
    9938             : void
    9939          40 : spdk_bs_grow_live(struct spdk_blob_store *bs,
    9940             :                   spdk_bs_op_complete cb_fn, void *cb_arg)
    9941             : {
    9942             :         struct spdk_bs_cpl      cpl;
    9943             :         struct spdk_bs_grow_ctx *ctx;
    9944             : 
    9945          40 :         assert(spdk_get_thread() == bs->md_thread);
    9946             : 
    9947          40 :         SPDK_DEBUGLOG(blob, "Growing blobstore on dev %p\n", bs->dev);
    9948             : 
    9949          40 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    9950          40 :         cpl.u.bs_basic.cb_fn = cb_fn;
    9951          40 :         cpl.u.bs_basic.cb_arg = cb_arg;
    9952             : 
    9953          40 :         ctx = calloc(1, sizeof(struct spdk_bs_grow_ctx));
    9954          40 :         if (!ctx) {
    9955           0 :                 cb_fn(cb_arg, -ENOMEM);
    9956           0 :                 return;
    9957             :         }
    9958          40 :         ctx->bs = bs;
    9959             : 
    9960          40 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    9961             :                                   SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    9962          40 :         if (!ctx->super) {
    9963           0 :                 free(ctx);
    9964           0 :                 cb_fn(cb_arg, -ENOMEM);
    9965           0 :                 return;
    9966             :         }
    9967             : 
    9968          40 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    9969          40 :         if (!ctx->seq) {
    9970           0 :                 spdk_free(ctx->super);
    9971           0 :                 free(ctx);
    9972           0 :                 cb_fn(cb_arg, -ENOMEM);
    9973           0 :                 return;
    9974             :         }
    9975             : 
    9976             :         /* Read the super block */
    9977          80 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    9978          40 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    9979          40 :                              bs_grow_live_load_super_cpl, ctx);
    9980          40 : }
    9981             : 
    9982             : void
    9983           5 : spdk_bs_grow(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
    9984             :              spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
    9985             : {
    9986             :         struct spdk_blob_store  *bs;
    9987             :         struct spdk_bs_cpl      cpl;
    9988             :         struct spdk_bs_load_ctx *ctx;
    9989           5 :         struct spdk_bs_opts     opts = {};
    9990             :         int err;
    9991             : 
    9992           5 :         SPDK_DEBUGLOG(blob, "Loading blobstore from dev %p\n", dev);
    9993             : 
    9994           5 :         if ((dev->phys_blocklen % dev->blocklen) != 0) {
    9995           0 :                 SPDK_DEBUGLOG(blob, "unsupported dev block length of %d\n", dev->blocklen);
    9996           0 :                 dev->destroy(dev);
    9997           0 :                 cb_fn(cb_arg, NULL, -EINVAL);
    9998           0 :                 return;
    9999             :         }
   10000             : 
   10001           5 :         spdk_bs_opts_init(&opts, sizeof(opts));
   10002           5 :         if (o) {
   10003           5 :                 if (bs_opts_copy(o, &opts)) {
   10004           0 :                         dev->destroy(dev);
   10005           0 :                         cb_fn(cb_arg, NULL, -EINVAL);
   10006           0 :                         return;
   10007             :                 }
   10008           5 :         }
   10009             : 
   10010           5 :         if (opts.max_md_ops == 0 || opts.max_channel_ops == 0) {
   10011           0 :                 dev->destroy(dev);
   10012           0 :                 cb_fn(cb_arg, NULL, -EINVAL);
   10013           0 :                 return;
   10014             :         }
   10015             : 
   10016           5 :         err = bs_alloc(dev, &opts, &bs, &ctx);
   10017           5 :         if (err) {
   10018           0 :                 dev->destroy(dev);
   10019           0 :                 cb_fn(cb_arg, NULL, err);
   10020           0 :                 return;
   10021             :         }
   10022             : 
   10023           5 :         cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
   10024           5 :         cpl.u.bs_handle.cb_fn = cb_fn;
   10025           5 :         cpl.u.bs_handle.cb_arg = cb_arg;
   10026           5 :         cpl.u.bs_handle.bs = bs;
   10027             : 
   10028           5 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
   10029           5 :         if (!ctx->seq) {
   10030           0 :                 spdk_free(ctx->super);
   10031           0 :                 free(ctx);
   10032           0 :                 bs_free(bs);
   10033           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
   10034           0 :                 return;
   10035             :         }
   10036             : 
   10037             :         /* Read the super block */
   10038          10 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
   10039           5 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
   10040           5 :                              bs_grow_load_super_cpl, ctx);
   10041           5 : }
   10042             : 
   10043             : int
   10044          30 : spdk_blob_get_esnap_id(struct spdk_blob *blob, const void **id, size_t *len)
   10045             : {
   10046          30 :         if (!blob_is_esnap_clone(blob)) {
   10047          15 :                 return -EINVAL;
   10048             :         }
   10049             : 
   10050          15 :         return blob_get_xattr_value(blob, BLOB_EXTERNAL_SNAPSHOT_ID, id, len, true);
   10051          30 : }
   10052             : 
   10053             : struct spdk_io_channel *
   10054       17482 : blob_esnap_get_io_channel(struct spdk_io_channel *ch, struct spdk_blob *blob)
   10055             : {
   10056       17482 :         struct spdk_bs_channel          *bs_channel = spdk_io_channel_get_ctx(ch);
   10057       17482 :         struct spdk_bs_dev              *bs_dev = blob->back_bs_dev;
   10058       17482 :         struct blob_esnap_channel       find = {};
   10059             :         struct blob_esnap_channel       *esnap_channel, *existing;
   10060             : 
   10061       17482 :         find.blob_id = blob->id;
   10062       17482 :         esnap_channel = RB_FIND(blob_esnap_channel_tree, &bs_channel->esnap_channels, &find);
   10063       17482 :         if (spdk_likely(esnap_channel != NULL)) {
   10064       17427 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": using cached channel on thread %s\n",
   10065             :                               blob->id, spdk_thread_get_name(spdk_get_thread()));
   10066       17427 :                 return esnap_channel->channel;
   10067             :         }
   10068             : 
   10069          55 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": allocating channel on thread %s\n",
   10070             :                       blob->id, spdk_thread_get_name(spdk_get_thread()));
   10071             : 
   10072          55 :         esnap_channel = calloc(1, sizeof(*esnap_channel));
   10073          55 :         if (esnap_channel == NULL) {
   10074           0 :                 SPDK_NOTICELOG("blob 0x%" PRIx64 " channel allocation failed: no memory\n",
   10075             :                                find.blob_id);
   10076           0 :                 return NULL;
   10077             :         }
   10078          55 :         esnap_channel->channel = bs_dev->create_channel(bs_dev);
   10079          55 :         if (esnap_channel->channel == NULL) {
   10080           0 :                 SPDK_NOTICELOG("blob 0x%" PRIx64 " back channel allocation failed\n", blob->id);
   10081           0 :                 free(esnap_channel);
   10082           0 :                 return NULL;
   10083             :         }
   10084          55 :         esnap_channel->blob_id = find.blob_id;
   10085          55 :         existing = RB_INSERT(blob_esnap_channel_tree, &bs_channel->esnap_channels, esnap_channel);
   10086          55 :         if (spdk_unlikely(existing != NULL)) {
   10087             :                 /*
   10088             :                  * This should be unreachable: all modifications to this tree happen on this thread.
   10089             :                  */
   10090           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 "lost race to allocate a channel\n", find.blob_id);
   10091           0 :                 assert(false);
   10092             : 
   10093             :                 bs_dev->destroy_channel(bs_dev, esnap_channel->channel);
   10094             :                 free(esnap_channel);
   10095             : 
   10096             :                 return existing->channel;
   10097             :         }
   10098             : 
   10099          55 :         return esnap_channel->channel;
   10100       17482 : }
   10101             : 
   10102             : static int
   10103       17452 : blob_esnap_channel_compare(struct blob_esnap_channel *c1, struct blob_esnap_channel *c2)
   10104             : {
   10105       17452 :         return (c1->blob_id < c2->blob_id ? -1 : c1->blob_id > c2->blob_id);
   10106             : }
   10107             : 
   10108             : struct blob_esnap_destroy_ctx {
   10109             :         spdk_blob_op_with_handle_complete       cb_fn;
   10110             :         void                                    *cb_arg;
   10111             :         struct spdk_blob                        *blob;
   10112             :         struct spdk_bs_dev                      *back_bs_dev;
   10113             :         bool                                    abort_io;
   10114             : };
   10115             : 
   10116             : static void
   10117         170 : blob_esnap_destroy_channels_done(struct spdk_io_channel_iter *i, int status)
   10118             : {
   10119         170 :         struct blob_esnap_destroy_ctx   *ctx = spdk_io_channel_iter_get_ctx(i);
   10120         170 :         struct spdk_blob                *blob = ctx->blob;
   10121         170 :         struct spdk_blob_store          *bs = blob->bs;
   10122             : 
   10123         170 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": done destroying channels for this blob\n",
   10124             :                       blob->id);
   10125             : 
   10126         170 :         if (ctx->cb_fn != NULL) {
   10127         155 :                 ctx->cb_fn(ctx->cb_arg, blob, status);
   10128         155 :         }
   10129         170 :         free(ctx);
   10130             : 
   10131         170 :         bs->esnap_channels_unloading--;
   10132         170 :         if (bs->esnap_channels_unloading == 0 && bs->esnap_unload_cb_fn != NULL) {
   10133           5 :                 spdk_bs_unload(bs, bs->esnap_unload_cb_fn, bs->esnap_unload_cb_arg);
   10134           5 :         }
   10135         170 : }
   10136             : 
   10137             : static void
   10138         180 : blob_esnap_destroy_one_channel(struct spdk_io_channel_iter *i)
   10139             : {
   10140         180 :         struct blob_esnap_destroy_ctx   *ctx = spdk_io_channel_iter_get_ctx(i);
   10141         180 :         struct spdk_blob                *blob = ctx->blob;
   10142         180 :         struct spdk_bs_dev              *bs_dev = ctx->back_bs_dev;
   10143         180 :         struct spdk_io_channel          *channel = spdk_io_channel_iter_get_channel(i);
   10144         180 :         struct spdk_bs_channel          *bs_channel = spdk_io_channel_get_ctx(channel);
   10145             :         struct blob_esnap_channel       *esnap_channel;
   10146         180 :         struct blob_esnap_channel       find = {};
   10147             : 
   10148         180 :         assert(spdk_get_thread() == spdk_io_channel_get_thread(channel));
   10149             : 
   10150         180 :         find.blob_id = blob->id;
   10151         180 :         esnap_channel = RB_FIND(blob_esnap_channel_tree, &bs_channel->esnap_channels, &find);
   10152         180 :         if (esnap_channel != NULL) {
   10153          15 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": destroying channel on thread %s\n",
   10154             :                               blob->id, spdk_thread_get_name(spdk_get_thread()));
   10155          15 :                 RB_REMOVE(blob_esnap_channel_tree, &bs_channel->esnap_channels, esnap_channel);
   10156             : 
   10157          15 :                 if (ctx->abort_io) {
   10158             :                         spdk_bs_user_op_t *op, *tmp;
   10159             : 
   10160          10 :                         TAILQ_FOREACH_SAFE(op, &bs_channel->queued_io, link, tmp) {
   10161           0 :                                 if (op->back_channel == esnap_channel->channel) {
   10162           0 :                                         TAILQ_REMOVE(&bs_channel->queued_io, op, link);
   10163           0 :                                         bs_user_op_abort(op, -EIO);
   10164           0 :                                 }
   10165           0 :                         }
   10166          10 :                 }
   10167             : 
   10168          15 :                 bs_dev->destroy_channel(bs_dev, esnap_channel->channel);
   10169          15 :                 free(esnap_channel);
   10170          15 :         }
   10171             : 
   10172         180 :         spdk_for_each_channel_continue(i, 0);
   10173         180 : }
   10174             : 
   10175             : /*
   10176             :  * Destroy the channels for a specific blob on each thread with a blobstore channel. This should be
   10177             :  * used when closing an esnap clone blob and after decoupling from the parent.
   10178             :  */
   10179             : static void
   10180         606 : blob_esnap_destroy_bs_dev_channels(struct spdk_blob *blob, bool abort_io,
   10181             :                                    spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
   10182             : {
   10183             :         struct blob_esnap_destroy_ctx   *ctx;
   10184             : 
   10185         606 :         if (!blob_is_esnap_clone(blob) || blob->back_bs_dev == NULL) {
   10186         436 :                 if (cb_fn != NULL) {
   10187         436 :                         cb_fn(cb_arg, blob, 0);
   10188         436 :                 }
   10189         436 :                 return;
   10190             :         }
   10191             : 
   10192         170 :         ctx = calloc(1, sizeof(*ctx));
   10193         170 :         if (ctx == NULL) {
   10194           0 :                 if (cb_fn != NULL) {
   10195           0 :                         cb_fn(cb_arg, blob, -ENOMEM);
   10196           0 :                 }
   10197           0 :                 return;
   10198             :         }
   10199         170 :         ctx->cb_fn = cb_fn;
   10200         170 :         ctx->cb_arg = cb_arg;
   10201         170 :         ctx->blob = blob;
   10202         170 :         ctx->back_bs_dev = blob->back_bs_dev;
   10203         170 :         ctx->abort_io = abort_io;
   10204             : 
   10205         170 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": destroying channels for this blob\n",
   10206             :                       blob->id);
   10207             : 
   10208         170 :         blob->bs->esnap_channels_unloading++;
   10209         170 :         spdk_for_each_channel(blob->bs, blob_esnap_destroy_one_channel, ctx,
   10210             :                               blob_esnap_destroy_channels_done);
   10211         606 : }
   10212             : 
   10213             : /*
   10214             :  * Destroy all bs_dev channels on a specific blobstore channel. This should be used when a
   10215             :  * bs_channel is destroyed.
   10216             :  */
   10217             : static void
   10218        1284 : blob_esnap_destroy_bs_channel(struct spdk_bs_channel *ch)
   10219             : {
   10220             :         struct blob_esnap_channel *esnap_channel, *esnap_channel_tmp;
   10221             : 
   10222        1284 :         assert(spdk_get_thread() == spdk_io_channel_get_thread(spdk_io_channel_from_ctx(ch)));
   10223             : 
   10224        1284 :         SPDK_DEBUGLOG(blob_esnap, "destroying channels on thread %s\n",
   10225             :                       spdk_thread_get_name(spdk_get_thread()));
   10226        1324 :         RB_FOREACH_SAFE(esnap_channel, blob_esnap_channel_tree, &ch->esnap_channels,
   10227             :                         esnap_channel_tmp) {
   10228          40 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64
   10229             :                               ": destroying one channel in thread %s\n",
   10230             :                               esnap_channel->blob_id, spdk_thread_get_name(spdk_get_thread()));
   10231          40 :                 RB_REMOVE(blob_esnap_channel_tree, &ch->esnap_channels, esnap_channel);
   10232          40 :                 spdk_put_io_channel(esnap_channel->channel);
   10233          40 :                 free(esnap_channel);
   10234          40 :         }
   10235        1284 :         SPDK_DEBUGLOG(blob_esnap, "done destroying channels on thread %s\n",
   10236             :                       spdk_thread_get_name(spdk_get_thread()));
   10237        1284 : }
   10238             : 
   10239             : static void
   10240          35 : blob_set_back_bs_dev_done(void *_ctx, int bserrno)
   10241             : {
   10242          35 :         struct set_bs_dev_ctx   *ctx = _ctx;
   10243             : 
   10244          35 :         if (bserrno != 0) {
   10245             :                 /* Even though the unfreeze failed, the update may have succeed. */
   10246           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": unfreeze failed with error %d\n", ctx->blob->id,
   10247             :                             bserrno);
   10248           0 :         }
   10249          35 :         ctx->cb_fn(ctx->cb_arg, ctx->bserrno);
   10250          35 :         free(ctx);
   10251          35 : }
   10252             : 
   10253             : static void
   10254          35 : blob_frozen_set_back_bs_dev(void *_ctx, struct spdk_blob *blob, int bserrno)
   10255             : {
   10256          35 :         struct set_bs_dev_ctx   *ctx = _ctx;
   10257             :         int rc;
   10258             : 
   10259          35 :         if (bserrno != 0) {
   10260           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to release old back_bs_dev with error %d\n",
   10261             :                             blob->id, bserrno);
   10262           0 :                 ctx->bserrno = bserrno;
   10263           0 :                 blob_unfreeze_io(blob, blob_set_back_bs_dev_done, ctx);
   10264           0 :                 return;
   10265             :         }
   10266             : 
   10267          35 :         if (blob->back_bs_dev != NULL) {
   10268          35 :                 blob_unref_back_bs_dev(blob);
   10269          35 :         }
   10270             : 
   10271          35 :         if (ctx->parent_refs_cb_fn) {
   10272          25 :                 rc = ctx->parent_refs_cb_fn(blob, ctx->parent_refs_cb_arg);
   10273          25 :                 if (rc != 0) {
   10274           0 :                         ctx->bserrno = rc;
   10275           0 :                         blob_unfreeze_io(blob, blob_set_back_bs_dev_done, ctx);
   10276           0 :                         return;
   10277             :                 }
   10278          25 :         }
   10279             : 
   10280          35 :         SPDK_NOTICELOG("blob 0x%" PRIx64 ": hotplugged back_bs_dev\n", blob->id);
   10281          35 :         blob->back_bs_dev = ctx->back_bs_dev;
   10282          35 :         ctx->bserrno = 0;
   10283             : 
   10284          35 :         blob_unfreeze_io(blob, blob_set_back_bs_dev_done, ctx);
   10285          35 : }
   10286             : 
   10287             : static void
   10288          35 : blob_set_back_bs_dev_frozen(void *_ctx, int bserrno)
   10289             : {
   10290          35 :         struct set_bs_dev_ctx   *ctx = _ctx;
   10291          35 :         struct spdk_blob        *blob = ctx->blob;
   10292             : 
   10293          35 :         if (bserrno != 0) {
   10294           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to freeze with error %d\n", blob->id,
   10295             :                             bserrno);
   10296           0 :                 ctx->cb_fn(ctx->cb_arg, bserrno);
   10297           0 :                 free(ctx);
   10298           0 :                 return;
   10299             :         }
   10300             : 
   10301             :         /*
   10302             :          * This does not prevent future reads from the esnap device because any future IO will
   10303             :          * lazily create a new esnap IO channel.
   10304             :          */
   10305          35 :         blob_esnap_destroy_bs_dev_channels(blob, true, blob_frozen_set_back_bs_dev, ctx);
   10306          35 : }
   10307             : 
   10308             : void
   10309          10 : spdk_blob_set_esnap_bs_dev(struct spdk_blob *blob, struct spdk_bs_dev *back_bs_dev,
   10310             :                            spdk_blob_op_complete cb_fn, void *cb_arg)
   10311             : {
   10312          10 :         if (!blob_is_esnap_clone(blob)) {
   10313           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": not an esnap clone\n", blob->id);
   10314           0 :                 cb_fn(cb_arg, -EINVAL);
   10315           0 :                 return;
   10316             :         }
   10317             : 
   10318          10 :         blob_set_back_bs_dev(blob, back_bs_dev, NULL, NULL, cb_fn, cb_arg);
   10319          10 : }
   10320             : 
   10321             : struct spdk_bs_dev *
   10322           5 : spdk_blob_get_esnap_bs_dev(const struct spdk_blob *blob)
   10323             : {
   10324           5 :         if (!blob_is_esnap_clone(blob)) {
   10325           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": not an esnap clone\n", blob->id);
   10326           0 :                 return NULL;
   10327             :         }
   10328             : 
   10329           5 :         return blob->back_bs_dev;
   10330           5 : }
   10331             : 
   10332             : bool
   10333          35 : spdk_blob_is_degraded(const struct spdk_blob *blob)
   10334             : {
   10335          35 :         if (blob->bs->dev->is_degraded != NULL && blob->bs->dev->is_degraded(blob->bs->dev)) {
   10336           5 :                 return true;
   10337             :         }
   10338          30 :         if (blob->back_bs_dev == NULL || blob->back_bs_dev->is_degraded == NULL) {
   10339          15 :                 return false;
   10340             :         }
   10341             : 
   10342          15 :         return blob->back_bs_dev->is_degraded(blob->back_bs_dev);
   10343          35 : }
   10344             : 
   10345           3 : SPDK_LOG_REGISTER_COMPONENT(blob)
   10346           3 : SPDK_LOG_REGISTER_COMPONENT(blob_esnap)
   10347             : 
   10348             : static void
   10349           0 : blob_trace(void)
   10350             : {
   10351           0 :         struct spdk_trace_tpoint_opts opts[] = {
   10352             :                 {
   10353             :                         "BLOB_REQ_SET_START", TRACE_BLOB_REQ_SET_START,
   10354             :                         OWNER_TYPE_NONE, OBJECT_BLOB_CB_ARG, 1,
   10355             :                         {
   10356             :                                 { "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }
   10357             :                         }
   10358             :                 },
   10359             :                 {
   10360             :                         "BLOB_REQ_SET_COMPLETE", TRACE_BLOB_REQ_SET_COMPLETE,
   10361             :                         OWNER_TYPE_NONE, OBJECT_BLOB_CB_ARG, 0,
   10362             :                         {
   10363             :                                 { "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }
   10364             :                         }
   10365             :                 },
   10366             :         };
   10367             : 
   10368           0 :         spdk_trace_register_object(OBJECT_BLOB_CB_ARG, 'a');
   10369           0 :         spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts));
   10370           0 :         spdk_trace_tpoint_register_relation(TRACE_BDEV_IO_START, OBJECT_BLOB_CB_ARG, 1);
   10371           0 :         spdk_trace_tpoint_register_relation(TRACE_BDEV_IO_DONE, OBJECT_BLOB_CB_ARG, 0);
   10372           0 : }
   10373           3 : SPDK_TRACE_REGISTER_FN(blob_trace, "blob", TRACE_GROUP_BLOB)

Generated by: LCOV version 1.15