LCOV - code coverage report
Current view: top level - lib/blob - blobstore.c (source / functions) Hit Total Coverage
Test: ut_cov_unit.info Lines: 4542 5642 80.5 %
Date: 2024-12-05 11:28:15 Functions: 339 355 95.5 %

          Line data    Source code
       1             : /*   SPDX-License-Identifier: BSD-3-Clause
       2             :  *   Copyright (C) 2017 Intel Corporation.
       3             :  *   All rights reserved.
       4             :  *   Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
       5             :  */
       6             : 
       7             : #include "spdk/stdinc.h"
       8             : 
       9             : #include "spdk/blob.h"
      10             : #include "spdk/crc32.h"
      11             : #include "spdk/env.h"
      12             : #include "spdk/queue.h"
      13             : #include "spdk/thread.h"
      14             : #include "spdk/bit_array.h"
      15             : #include "spdk/bit_pool.h"
      16             : #include "spdk/likely.h"
      17             : #include "spdk/util.h"
      18             : #include "spdk/string.h"
      19             : #include "spdk/trace.h"
      20             : 
      21             : #include "spdk_internal/assert.h"
      22             : #include "spdk_internal/trace_defs.h"
      23             : #include "spdk/log.h"
      24             : 
      25             : #include "blobstore.h"
      26             : 
      27             : #define BLOB_CRC32C_INITIAL    0xffffffffUL
      28             : 
      29             : static int bs_register_md_thread(struct spdk_blob_store *bs);
      30             : static int bs_unregister_md_thread(struct spdk_blob_store *bs);
      31             : static void blob_close_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno);
      32             : static void blob_insert_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
      33             :                 uint64_t cluster, uint32_t extent, struct spdk_blob_md_page *page,
      34             :                 spdk_blob_op_complete cb_fn, void *cb_arg);
      35             : static void blob_free_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
      36             :                 uint32_t extent_page, struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg);
      37             : 
      38             : static int blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
      39             :                           uint16_t value_len, bool internal);
      40             : static int blob_get_xattr_value(struct spdk_blob *blob, const char *name,
      41             :                                 const void **value, size_t *value_len, bool internal);
      42             : static int blob_remove_xattr(struct spdk_blob *blob, const char *name, bool internal);
      43             : 
      44             : static void blob_write_extent_page(struct spdk_blob *blob, uint32_t extent, uint64_t cluster_num,
      45             :                                    struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg);
      46             : static void blob_freeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg);
      47             : 
      48             : static void bs_shallow_copy_cluster_find_next(void *cb_arg);
      49             : 
      50             : /*
      51             :  * External snapshots require a channel per thread per esnap bdev.  The tree
      52             :  * is populated lazily as blob IOs are handled by the back_bs_dev. When this
      53             :  * channel is destroyed, all the channels in the tree are destroyed.
      54             :  */
      55             : 
      56             : struct blob_esnap_channel {
      57             :         RB_ENTRY(blob_esnap_channel)    node;
      58             :         spdk_blob_id                    blob_id;
      59             :         struct spdk_io_channel          *channel;
      60             : };
      61             : 
      62             : static int blob_esnap_channel_compare(struct blob_esnap_channel *c1, struct blob_esnap_channel *c2);
      63             : static void blob_esnap_destroy_bs_dev_channels(struct spdk_blob *blob, bool abort_io,
      64             :                 spdk_blob_op_with_handle_complete cb_fn, void *cb_arg);
      65             : static void blob_esnap_destroy_bs_channel(struct spdk_bs_channel *ch);
      66             : static void blob_set_back_bs_dev_frozen(void *_ctx, int bserrno);
      67       19256 : RB_GENERATE_STATIC(blob_esnap_channel_tree, blob_esnap_channel, node, blob_esnap_channel_compare)
      68             : 
      69             : static inline bool
      70       68456 : blob_is_esnap_clone(const struct spdk_blob *blob)
      71             : {
      72       68456 :         assert(blob != NULL);
      73       68456 :         return !!(blob->invalid_flags & SPDK_BLOB_EXTERNAL_SNAPSHOT);
      74             : }
      75             : 
      76             : static int
      77        2875 : blob_id_cmp(struct spdk_blob *blob1, struct spdk_blob *blob2)
      78             : {
      79        2875 :         assert(blob1 != NULL && blob2 != NULL);
      80        2875 :         return (blob1->id < blob2->id ? -1 : blob1->id > blob2->id);
      81             : }
      82             : 
      83       20559 : RB_GENERATE_STATIC(spdk_blob_tree, spdk_blob, link, blob_id_cmp);
      84             : 
      85             : static void
      86       46177 : blob_verify_md_op(struct spdk_blob *blob)
      87             : {
      88       46177 :         assert(blob != NULL);
      89       46177 :         assert(spdk_get_thread() == blob->bs->md_thread);
      90       46177 :         assert(blob->state != SPDK_BLOB_STATE_LOADING);
      91       46177 : }
      92             : 
      93             : static struct spdk_blob_list *
      94        4783 : bs_get_snapshot_entry(struct spdk_blob_store *bs, spdk_blob_id blobid)
      95             : {
      96        4783 :         struct spdk_blob_list *snapshot_entry = NULL;
      97             : 
      98        6018 :         TAILQ_FOREACH(snapshot_entry, &bs->snapshots, link) {
      99        2207 :                 if (snapshot_entry->id == blobid) {
     100         972 :                         break;
     101             :                 }
     102        1235 :         }
     103             : 
     104        4783 :         return snapshot_entry;
     105             : }
     106             : 
     107             : static void
     108        3807 : bs_claim_md_page(struct spdk_blob_store *bs, uint32_t page)
     109             : {
     110        3807 :         assert(spdk_spin_held(&bs->used_lock));
     111        3807 :         assert(page < spdk_bit_array_capacity(bs->used_md_pages));
     112        3807 :         assert(spdk_bit_array_get(bs->used_md_pages, page) == false);
     113             : 
     114        3807 :         spdk_bit_array_set(bs->used_md_pages, page);
     115        3807 : }
     116             : 
     117             : static void
     118        2901 : bs_release_md_page(struct spdk_blob_store *bs, uint32_t page)
     119             : {
     120        2901 :         assert(spdk_spin_held(&bs->used_lock));
     121        2901 :         assert(page < spdk_bit_array_capacity(bs->used_md_pages));
     122        2901 :         assert(spdk_bit_array_get(bs->used_md_pages, page) == true);
     123             : 
     124        2901 :         spdk_bit_array_clear(bs->used_md_pages, page);
     125        2901 : }
     126             : 
     127             : static uint32_t
     128       10283 : bs_claim_cluster(struct spdk_blob_store *bs)
     129             : {
     130             :         uint32_t cluster_num;
     131             : 
     132       10283 :         assert(spdk_spin_held(&bs->used_lock));
     133             : 
     134       10283 :         cluster_num = spdk_bit_pool_allocate_bit(bs->used_clusters);
     135       10283 :         if (cluster_num == UINT32_MAX) {
     136           0 :                 return UINT32_MAX;
     137             :         }
     138             : 
     139       10283 :         SPDK_DEBUGLOG(blob, "Claiming cluster %u\n", cluster_num);
     140       10283 :         bs->num_free_clusters--;
     141             : 
     142       10283 :         return cluster_num;
     143       10283 : }
     144             : 
     145             : static void
     146        2996 : bs_release_cluster(struct spdk_blob_store *bs, uint32_t cluster_num)
     147             : {
     148        2996 :         assert(spdk_spin_held(&bs->used_lock));
     149        2996 :         assert(cluster_num < spdk_bit_pool_capacity(bs->used_clusters));
     150        2996 :         assert(spdk_bit_pool_is_allocated(bs->used_clusters, cluster_num) == true);
     151        2996 :         assert(bs->num_free_clusters < bs->total_clusters);
     152             : 
     153        2996 :         SPDK_DEBUGLOG(blob, "Releasing cluster %u\n", cluster_num);
     154             : 
     155        2996 :         spdk_bit_pool_free_bit(bs->used_clusters, cluster_num);
     156        2996 :         bs->num_free_clusters++;
     157        2996 : }
     158             : 
     159             : static int
     160       10283 : blob_insert_cluster(struct spdk_blob *blob, uint32_t cluster_num, uint64_t cluster)
     161             : {
     162       10283 :         uint64_t *cluster_lba = &blob->active.clusters[cluster_num];
     163             : 
     164       10283 :         blob_verify_md_op(blob);
     165             : 
     166       10283 :         if (*cluster_lba != 0) {
     167           5 :                 return -EEXIST;
     168             :         }
     169             : 
     170       10278 :         *cluster_lba = bs_cluster_to_lba(blob->bs, cluster);
     171       10278 :         blob->active.num_allocated_clusters++;
     172             : 
     173       10278 :         return 0;
     174       10283 : }
     175             : 
     176             : static int
     177       10283 : bs_allocate_cluster(struct spdk_blob *blob, uint32_t cluster_num,
     178             :                     uint64_t *cluster, uint32_t *lowest_free_md_page, bool update_map)
     179             : {
     180       10283 :         uint32_t *extent_page = 0;
     181             : 
     182       10283 :         assert(spdk_spin_held(&blob->bs->used_lock));
     183             : 
     184       10283 :         *cluster = bs_claim_cluster(blob->bs);
     185       10283 :         if (*cluster == UINT32_MAX) {
     186             :                 /* No more free clusters. Cannot satisfy the request */
     187           0 :                 return -ENOSPC;
     188             :         }
     189             : 
     190       10283 :         if (blob->use_extent_table) {
     191        6227 :                 extent_page = bs_cluster_to_extent_page(blob, cluster_num);
     192        6227 :                 if (*extent_page == 0) {
     193             :                         /* Extent page shall never occupy md_page so start the search from 1 */
     194        1087 :                         if (*lowest_free_md_page == 0) {
     195        1084 :                                 *lowest_free_md_page = 1;
     196        1084 :                         }
     197             :                         /* No extent_page is allocated for the cluster */
     198        2174 :                         *lowest_free_md_page = spdk_bit_array_find_first_clear(blob->bs->used_md_pages,
     199        1087 :                                                *lowest_free_md_page);
     200        1087 :                         if (*lowest_free_md_page == UINT32_MAX) {
     201             :                                 /* No more free md pages. Cannot satisfy the request */
     202           0 :                                 bs_release_cluster(blob->bs, *cluster);
     203           0 :                                 return -ENOSPC;
     204             :                         }
     205        1087 :                         bs_claim_md_page(blob->bs, *lowest_free_md_page);
     206        1087 :                 }
     207        6227 :         }
     208             : 
     209       10283 :         SPDK_DEBUGLOG(blob, "Claiming cluster %" PRIu64 " for blob 0x%" PRIx64 "\n", *cluster,
     210             :                       blob->id);
     211             : 
     212       10283 :         if (update_map) {
     213        9253 :                 blob_insert_cluster(blob, cluster_num, *cluster);
     214        9253 :                 if (blob->use_extent_table && *extent_page == 0) {
     215         958 :                         *extent_page = *lowest_free_md_page;
     216         958 :                 }
     217        9253 :         }
     218             : 
     219       10283 :         return 0;
     220       10283 : }
     221             : 
     222             : static void
     223        6977 : blob_xattrs_init(struct spdk_blob_xattr_opts *xattrs)
     224             : {
     225        6977 :         xattrs->count = 0;
     226        6977 :         xattrs->names = NULL;
     227        6977 :         xattrs->ctx = NULL;
     228        6977 :         xattrs->get_value = NULL;
     229        6977 : }
     230             : 
     231             : void
     232        4611 : spdk_blob_opts_init(struct spdk_blob_opts *opts, size_t opts_size)
     233             : {
     234        4611 :         if (!opts) {
     235           0 :                 SPDK_ERRLOG("opts should not be NULL\n");
     236           0 :                 return;
     237             :         }
     238             : 
     239        4611 :         if (!opts_size) {
     240           0 :                 SPDK_ERRLOG("opts_size should not be zero value\n");
     241           0 :                 return;
     242             :         }
     243             : 
     244        4611 :         memset(opts, 0, opts_size);
     245        4611 :         opts->opts_size = opts_size;
     246             : 
     247             : #define FIELD_OK(field) \
     248             :         offsetof(struct spdk_blob_opts, field) + sizeof(opts->field) <= opts_size
     249             : 
     250             : #define SET_FIELD(field, value) \
     251             :         if (FIELD_OK(field)) { \
     252             :                 opts->field = value; \
     253             :         } \
     254             : 
     255        4611 :         SET_FIELD(num_clusters, 0);
     256        4611 :         SET_FIELD(thin_provision, false);
     257        4611 :         SET_FIELD(clear_method, BLOB_CLEAR_WITH_DEFAULT);
     258             : 
     259        4611 :         if (FIELD_OK(xattrs)) {
     260        4611 :                 blob_xattrs_init(&opts->xattrs);
     261        4611 :         }
     262             : 
     263        4611 :         SET_FIELD(use_extent_table, true);
     264             : 
     265             : #undef FIELD_OK
     266             : #undef SET_FIELD
     267        4611 : }
     268             : 
     269             : void
     270        4346 : spdk_blob_open_opts_init(struct spdk_blob_open_opts *opts, size_t opts_size)
     271             : {
     272        4346 :         if (!opts) {
     273           0 :                 SPDK_ERRLOG("opts should not be NULL\n");
     274           0 :                 return;
     275             :         }
     276             : 
     277        4346 :         if (!opts_size) {
     278           0 :                 SPDK_ERRLOG("opts_size should not be zero value\n");
     279           0 :                 return;
     280             :         }
     281             : 
     282        4346 :         memset(opts, 0, opts_size);
     283        4346 :         opts->opts_size = opts_size;
     284             : 
     285             : #define FIELD_OK(field) \
     286             :         offsetof(struct spdk_blob_open_opts, field) + sizeof(opts->field) <= opts_size
     287             : 
     288             : #define SET_FIELD(field, value) \
     289             :         if (FIELD_OK(field)) { \
     290             :                 opts->field = value; \
     291             :         } \
     292             : 
     293        4346 :         SET_FIELD(clear_method, BLOB_CLEAR_WITH_DEFAULT);
     294             : 
     295             : #undef FIELD_OK
     296             : #undef SET_FILED
     297        4346 : }
     298             : 
     299             : static struct spdk_blob *
     300        6707 : blob_alloc(struct spdk_blob_store *bs, spdk_blob_id id)
     301             : {
     302             :         struct spdk_blob *blob;
     303             : 
     304        6707 :         blob = calloc(1, sizeof(*blob));
     305        6707 :         if (!blob) {
     306           0 :                 return NULL;
     307             :         }
     308             : 
     309        6707 :         blob->id = id;
     310        6707 :         blob->bs = bs;
     311             : 
     312        6707 :         blob->parent_id = SPDK_BLOBID_INVALID;
     313             : 
     314        6707 :         blob->state = SPDK_BLOB_STATE_DIRTY;
     315        6707 :         blob->extent_rle_found = false;
     316        6707 :         blob->extent_table_found = false;
     317        6707 :         blob->active.num_pages = 1;
     318        6707 :         blob->active.pages = calloc(1, sizeof(*blob->active.pages));
     319        6707 :         if (!blob->active.pages) {
     320           0 :                 free(blob);
     321           0 :                 return NULL;
     322             :         }
     323             : 
     324        6707 :         blob->active.pages[0] = bs_blobid_to_page(id);
     325             : 
     326        6707 :         TAILQ_INIT(&blob->xattrs);
     327        6707 :         TAILQ_INIT(&blob->xattrs_internal);
     328        6707 :         TAILQ_INIT(&blob->pending_persists);
     329        6707 :         TAILQ_INIT(&blob->persists_to_complete);
     330             : 
     331        6707 :         return blob;
     332        6707 : }
     333             : 
     334             : static void
     335       13414 : xattrs_free(struct spdk_xattr_tailq *xattrs)
     336             : {
     337             :         struct spdk_xattr       *xattr, *xattr_tmp;
     338             : 
     339       15601 :         TAILQ_FOREACH_SAFE(xattr, xattrs, link, xattr_tmp) {
     340        2187 :                 TAILQ_REMOVE(xattrs, xattr, link);
     341        2187 :                 free(xattr->name);
     342        2187 :                 free(xattr->value);
     343        2187 :                 free(xattr);
     344        2187 :         }
     345       13414 : }
     346             : 
     347             : static void
     348        1398 : blob_unref_back_bs_dev(struct spdk_blob *blob)
     349             : {
     350        1398 :         blob->back_bs_dev->destroy(blob->back_bs_dev);
     351        1398 :         blob->back_bs_dev = NULL;
     352        1398 : }
     353             : 
     354             : static void
     355        6707 : blob_free(struct spdk_blob *blob)
     356             : {
     357        6707 :         assert(blob != NULL);
     358        6707 :         assert(TAILQ_EMPTY(&blob->pending_persists));
     359        6707 :         assert(TAILQ_EMPTY(&blob->persists_to_complete));
     360             : 
     361        6707 :         free(blob->active.extent_pages);
     362        6707 :         free(blob->clean.extent_pages);
     363        6707 :         free(blob->active.clusters);
     364        6707 :         free(blob->clean.clusters);
     365        6707 :         free(blob->active.pages);
     366        6707 :         free(blob->clean.pages);
     367             : 
     368        6707 :         xattrs_free(&blob->xattrs);
     369        6707 :         xattrs_free(&blob->xattrs_internal);
     370             : 
     371        6707 :         if (blob->back_bs_dev) {
     372        1363 :                 blob_unref_back_bs_dev(blob);
     373        1363 :         }
     374             : 
     375        6707 :         free(blob);
     376        6707 : }
     377             : 
     378             : static void
     379         406 : blob_back_bs_destroy_esnap_done(void *ctx, struct spdk_blob *blob, int bserrno)
     380             : {
     381         406 :         struct spdk_bs_dev      *bs_dev = ctx;
     382             : 
     383         406 :         if (bserrno != 0) {
     384             :                 /*
     385             :                  * This is probably due to a memory allocation failure when creating the
     386             :                  * blob_esnap_destroy_ctx before iterating threads.
     387             :                  */
     388           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": Unable to destroy bs dev channels: error %d\n",
     389             :                             blob->id, bserrno);
     390           0 :                 assert(false);
     391             :         }
     392             : 
     393         406 :         if (bs_dev == NULL) {
     394             :                 /*
     395             :                  * This check exists to make scanbuild happy.
     396             :                  *
     397             :                  * blob->back_bs_dev for an esnap is NULL during the first iteration of blobs while
     398             :                  * the blobstore is being loaded. It could also be NULL if there was an error
     399             :                  * opening the esnap device. In each of these cases, no channels could have been
     400             :                  * created because back_bs_dev->create_channel() would have led to a NULL pointer
     401             :                  * deref.
     402             :                  */
     403           0 :                 assert(false);
     404             :                 return;
     405             :         }
     406             : 
     407         406 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": calling destroy on back_bs_dev\n", blob->id);
     408         406 :         bs_dev->destroy(bs_dev);
     409         406 : }
     410             : 
     411             : static void
     412         406 : blob_back_bs_destroy(struct spdk_blob *blob)
     413             : {
     414         406 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": preparing to destroy back_bs_dev\n",
     415             :                       blob->id);
     416             : 
     417         812 :         blob_esnap_destroy_bs_dev_channels(blob, false, blob_back_bs_destroy_esnap_done,
     418         406 :                                            blob->back_bs_dev);
     419         406 :         blob->back_bs_dev = NULL;
     420         406 : }
     421             : 
     422             : struct blob_parent {
     423             :         union {
     424             :                 struct {
     425             :                         spdk_blob_id id;
     426             :                         struct spdk_blob *blob;
     427             :                 } snapshot;
     428             : 
     429             :                 struct {
     430             :                         void *id;
     431             :                         uint32_t id_len;
     432             :                         struct spdk_bs_dev *back_bs_dev;
     433             :                 } esnap;
     434             :         } u;
     435             : };
     436             : 
     437             : typedef int (*set_parent_refs_cb)(struct spdk_blob *blob, struct blob_parent *parent);
     438             : 
     439             : struct set_bs_dev_ctx {
     440             :         struct spdk_blob        *blob;
     441             :         struct spdk_bs_dev      *back_bs_dev;
     442             : 
     443             :         /*
     444             :          * This callback is used during a set parent operation to change the references
     445             :          * to the parent of the blob.
     446             :          */
     447             :         set_parent_refs_cb      parent_refs_cb_fn;
     448             :         struct blob_parent      *parent_refs_cb_arg;
     449             : 
     450             :         spdk_blob_op_complete   cb_fn;
     451             :         void                    *cb_arg;
     452             :         int                     bserrno;
     453             : };
     454             : 
     455             : static void
     456          35 : blob_set_back_bs_dev(struct spdk_blob *blob, struct spdk_bs_dev *back_bs_dev,
     457             :                      set_parent_refs_cb parent_refs_cb_fn, struct blob_parent *parent_refs_cb_arg,
     458             :                      spdk_blob_op_complete cb_fn, void *cb_arg)
     459             : {
     460             :         struct set_bs_dev_ctx   *ctx;
     461             : 
     462          35 :         ctx = calloc(1, sizeof(*ctx));
     463          35 :         if (ctx == NULL) {
     464           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": out of memory while setting back_bs_dev\n",
     465             :                             blob->id);
     466           0 :                 cb_fn(cb_arg, -ENOMEM);
     467           0 :                 return;
     468             :         }
     469             : 
     470          35 :         ctx->parent_refs_cb_fn = parent_refs_cb_fn;
     471          35 :         ctx->parent_refs_cb_arg = parent_refs_cb_arg;
     472          35 :         ctx->cb_fn = cb_fn;
     473          35 :         ctx->cb_arg = cb_arg;
     474          35 :         ctx->back_bs_dev = back_bs_dev;
     475          35 :         ctx->blob = blob;
     476             : 
     477          35 :         blob_freeze_io(blob, blob_set_back_bs_dev_frozen, ctx);
     478          35 : }
     479             : 
     480             : struct freeze_io_ctx {
     481             :         struct spdk_bs_cpl cpl;
     482             :         struct spdk_blob *blob;
     483             : };
     484             : 
     485             : static void
     486         663 : blob_io_sync(struct spdk_io_channel_iter *i)
     487             : {
     488         663 :         spdk_for_each_channel_continue(i, 0);
     489         663 : }
     490             : 
     491             : static void
     492         648 : blob_execute_queued_io(struct spdk_io_channel_iter *i)
     493             : {
     494         648 :         struct spdk_io_channel *_ch = spdk_io_channel_iter_get_channel(i);
     495         648 :         struct spdk_bs_channel *ch = spdk_io_channel_get_ctx(_ch);
     496         648 :         struct freeze_io_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
     497             :         struct spdk_bs_request_set      *set;
     498             :         struct spdk_bs_user_op_args     *args;
     499             :         spdk_bs_user_op_t *op, *tmp;
     500             : 
     501         653 :         TAILQ_FOREACH_SAFE(op, &ch->queued_io, link, tmp) {
     502           5 :                 set = (struct spdk_bs_request_set *)op;
     503           5 :                 args = &set->u.user_op;
     504             : 
     505           5 :                 if (args->blob == ctx->blob) {
     506           5 :                         TAILQ_REMOVE(&ch->queued_io, op, link);
     507           5 :                         bs_user_op_execute(op);
     508           5 :                 }
     509           5 :         }
     510             : 
     511         648 :         spdk_for_each_channel_continue(i, 0);
     512         648 : }
     513             : 
     514             : static void
     515        1271 : blob_io_cpl(struct spdk_io_channel_iter *i, int status)
     516             : {
     517        1271 :         struct freeze_io_ctx *ctx = spdk_io_channel_iter_get_ctx(i);
     518             : 
     519        1271 :         ctx->cpl.u.blob_basic.cb_fn(ctx->cpl.u.blob_basic.cb_arg, 0);
     520             : 
     521        1271 :         free(ctx);
     522        1271 : }
     523             : 
     524             : static void
     525         643 : blob_freeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
     526             : {
     527             :         struct freeze_io_ctx *ctx;
     528             : 
     529         643 :         blob_verify_md_op(blob);
     530             : 
     531         643 :         ctx = calloc(1, sizeof(*ctx));
     532         643 :         if (!ctx) {
     533           0 :                 cb_fn(cb_arg, -ENOMEM);
     534           0 :                 return;
     535             :         }
     536             : 
     537         643 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
     538         643 :         ctx->cpl.u.blob_basic.cb_fn = cb_fn;
     539         643 :         ctx->cpl.u.blob_basic.cb_arg = cb_arg;
     540         643 :         ctx->blob = blob;
     541             : 
     542             :         /* Freeze I/O on blob */
     543         643 :         blob->frozen_refcnt++;
     544             : 
     545         643 :         spdk_for_each_channel(blob->bs, blob_io_sync, ctx, blob_io_cpl);
     546         643 : }
     547             : 
     548             : static void
     549         628 : blob_unfreeze_io(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
     550             : {
     551             :         struct freeze_io_ctx *ctx;
     552             : 
     553         628 :         blob_verify_md_op(blob);
     554             : 
     555         628 :         ctx = calloc(1, sizeof(*ctx));
     556         628 :         if (!ctx) {
     557           0 :                 cb_fn(cb_arg, -ENOMEM);
     558           0 :                 return;
     559             :         }
     560             : 
     561         628 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
     562         628 :         ctx->cpl.u.blob_basic.cb_fn = cb_fn;
     563         628 :         ctx->cpl.u.blob_basic.cb_arg = cb_arg;
     564         628 :         ctx->blob = blob;
     565             : 
     566         628 :         assert(blob->frozen_refcnt > 0);
     567             : 
     568         628 :         blob->frozen_refcnt--;
     569             : 
     570         628 :         spdk_for_each_channel(blob->bs, blob_execute_queued_io, ctx, blob_io_cpl);
     571         628 : }
     572             : 
     573             : static int
     574       10498 : blob_mark_clean(struct spdk_blob *blob)
     575             : {
     576       10498 :         uint32_t *extent_pages = NULL;
     577       10498 :         uint64_t *clusters = NULL;
     578       10498 :         uint32_t *pages = NULL;
     579             : 
     580       10498 :         assert(blob != NULL);
     581             : 
     582       10498 :         if (blob->active.num_extent_pages) {
     583        4258 :                 assert(blob->active.extent_pages);
     584        4258 :                 extent_pages = calloc(blob->active.num_extent_pages, sizeof(*blob->active.extent_pages));
     585        4258 :                 if (!extent_pages) {
     586           0 :                         return -ENOMEM;
     587             :                 }
     588        8516 :                 memcpy(extent_pages, blob->active.extent_pages,
     589        4258 :                        blob->active.num_extent_pages * sizeof(*extent_pages));
     590        4258 :         }
     591             : 
     592       10498 :         if (blob->active.num_clusters) {
     593        7349 :                 assert(blob->active.clusters);
     594        7349 :                 clusters = calloc(blob->active.num_clusters, sizeof(*blob->active.clusters));
     595        7349 :                 if (!clusters) {
     596           0 :                         free(extent_pages);
     597           0 :                         return -ENOMEM;
     598             :                 }
     599        7349 :                 memcpy(clusters, blob->active.clusters, blob->active.num_clusters * sizeof(*blob->active.clusters));
     600        7349 :         }
     601             : 
     602       10498 :         if (blob->active.num_pages) {
     603        8641 :                 assert(blob->active.pages);
     604        8641 :                 pages = calloc(blob->active.num_pages, sizeof(*blob->active.pages));
     605        8641 :                 if (!pages) {
     606           0 :                         free(extent_pages);
     607           0 :                         free(clusters);
     608           0 :                         return -ENOMEM;
     609             :                 }
     610        8641 :                 memcpy(pages, blob->active.pages, blob->active.num_pages * sizeof(*blob->active.pages));
     611        8641 :         }
     612             : 
     613       10498 :         free(blob->clean.extent_pages);
     614       10498 :         free(blob->clean.clusters);
     615       10498 :         free(blob->clean.pages);
     616             : 
     617       10498 :         blob->clean.num_extent_pages = blob->active.num_extent_pages;
     618       10498 :         blob->clean.extent_pages = blob->active.extent_pages;
     619       10498 :         blob->clean.num_clusters = blob->active.num_clusters;
     620       10498 :         blob->clean.clusters = blob->active.clusters;
     621       10498 :         blob->clean.num_allocated_clusters = blob->active.num_allocated_clusters;
     622       10498 :         blob->clean.num_pages = blob->active.num_pages;
     623       10498 :         blob->clean.pages = blob->active.pages;
     624             : 
     625       10498 :         blob->active.extent_pages = extent_pages;
     626       10498 :         blob->active.clusters = clusters;
     627       10498 :         blob->active.pages = pages;
     628             : 
     629             :         /* If the metadata was dirtied again while the metadata was being written to disk,
     630             :          *  we do not want to revert the DIRTY state back to CLEAN here.
     631             :          */
     632       10498 :         if (blob->state == SPDK_BLOB_STATE_LOADING) {
     633        4259 :                 blob->state = SPDK_BLOB_STATE_CLEAN;
     634        4259 :         }
     635             : 
     636       10498 :         return 0;
     637       10498 : }
     638             : 
     639             : static int
     640        1592 : blob_deserialize_xattr(struct spdk_blob *blob,
     641             :                        struct spdk_blob_md_descriptor_xattr *desc_xattr, bool internal)
     642             : {
     643             :         struct spdk_xattr                       *xattr;
     644             : 
     645        3184 :         if (desc_xattr->length != sizeof(desc_xattr->name_length) +
     646        1592 :             sizeof(desc_xattr->value_length) +
     647        3184 :             desc_xattr->name_length + desc_xattr->value_length) {
     648           0 :                 return -EINVAL;
     649             :         }
     650             : 
     651        1592 :         xattr = calloc(1, sizeof(*xattr));
     652        1592 :         if (xattr == NULL) {
     653           0 :                 return -ENOMEM;
     654             :         }
     655             : 
     656        1592 :         xattr->name = malloc(desc_xattr->name_length + 1);
     657        1592 :         if (xattr->name == NULL) {
     658           0 :                 free(xattr);
     659           0 :                 return -ENOMEM;
     660             :         }
     661             : 
     662        1592 :         xattr->value = malloc(desc_xattr->value_length);
     663        1592 :         if (xattr->value == NULL) {
     664           0 :                 free(xattr->name);
     665           0 :                 free(xattr);
     666           0 :                 return -ENOMEM;
     667             :         }
     668             : 
     669        1592 :         memcpy(xattr->name, desc_xattr->name, desc_xattr->name_length);
     670        1592 :         xattr->name[desc_xattr->name_length] = '\0';
     671        1592 :         xattr->value_len = desc_xattr->value_length;
     672        3184 :         memcpy(xattr->value,
     673        1592 :                (void *)((uintptr_t)desc_xattr->name + desc_xattr->name_length),
     674        1592 :                desc_xattr->value_length);
     675             : 
     676        1592 :         TAILQ_INSERT_TAIL(internal ? &blob->xattrs_internal : &blob->xattrs, xattr, link);
     677             : 
     678        1592 :         return 0;
     679        1592 : }
     680             : 
     681             : 
     682             : static int
     683        5980 : blob_parse_page(const struct spdk_blob_md_page *page, struct spdk_blob *blob)
     684             : {
     685             :         struct spdk_blob_md_descriptor *desc;
     686        5980 :         size_t  cur_desc = 0;
     687             :         void *tmp;
     688             : 
     689        5980 :         desc = (struct spdk_blob_md_descriptor *)page->descriptors;
     690       17389 :         while (cur_desc < sizeof(page->descriptors)) {
     691       17389 :                 if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
     692        5920 :                         if (desc->length == 0) {
     693             :                                 /* If padding and length are 0, this terminates the page */
     694        5920 :                                 break;
     695             :                         }
     696       11469 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
     697             :                         struct spdk_blob_md_descriptor_flags    *desc_flags;
     698             : 
     699        4301 :                         desc_flags = (struct spdk_blob_md_descriptor_flags *)desc;
     700             : 
     701        4301 :                         if (desc_flags->length != sizeof(*desc_flags) - sizeof(*desc)) {
     702           0 :                                 return -EINVAL;
     703             :                         }
     704             : 
     705        4301 :                         if ((desc_flags->invalid_flags | SPDK_BLOB_INVALID_FLAGS_MASK) !=
     706             :                             SPDK_BLOB_INVALID_FLAGS_MASK) {
     707          10 :                                 return -EINVAL;
     708             :                         }
     709             : 
     710        4291 :                         if ((desc_flags->data_ro_flags | SPDK_BLOB_DATA_RO_FLAGS_MASK) !=
     711             :                             SPDK_BLOB_DATA_RO_FLAGS_MASK) {
     712          15 :                                 blob->data_ro = true;
     713          15 :                                 blob->md_ro = true;
     714          15 :                         }
     715             : 
     716        4291 :                         if ((desc_flags->md_ro_flags | SPDK_BLOB_MD_RO_FLAGS_MASK) !=
     717             :                             SPDK_BLOB_MD_RO_FLAGS_MASK) {
     718          15 :                                 blob->md_ro = true;
     719          15 :                         }
     720             : 
     721        4291 :                         if ((desc_flags->data_ro_flags & SPDK_BLOB_READ_ONLY)) {
     722         712 :                                 blob->data_ro = true;
     723         712 :                                 blob->md_ro = true;
     724         712 :                         }
     725             : 
     726        4291 :                         blob->invalid_flags = desc_flags->invalid_flags;
     727        4291 :                         blob->data_ro_flags = desc_flags->data_ro_flags;
     728        4291 :                         blob->md_ro_flags = desc_flags->md_ro_flags;
     729             : 
     730       11459 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE) {
     731             :                         struct spdk_blob_md_descriptor_extent_rle       *desc_extent_rle;
     732             :                         unsigned int                            i, j;
     733        1396 :                         unsigned int                            cluster_count = blob->active.num_clusters;
     734             : 
     735        1396 :                         if (blob->extent_table_found) {
     736             :                                 /* Extent Table already present in the md,
     737             :                                  * both descriptors should never be at the same time. */
     738           0 :                                 return -EINVAL;
     739             :                         }
     740        1396 :                         blob->extent_rle_found = true;
     741             : 
     742        1396 :                         desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)desc;
     743             : 
     744        1396 :                         if (desc_extent_rle->length == 0 ||
     745        1396 :                             (desc_extent_rle->length % sizeof(desc_extent_rle->extents[0]) != 0)) {
     746           0 :                                 return -EINVAL;
     747             :                         }
     748             : 
     749        2968 :                         for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
     750       21240 :                                 for (j = 0; j < desc_extent_rle->extents[i].length; j++) {
     751       19668 :                                         if (desc_extent_rle->extents[i].cluster_idx != 0) {
     752       13384 :                                                 if (!spdk_bit_pool_is_allocated(blob->bs->used_clusters,
     753        6692 :                                                                                 desc_extent_rle->extents[i].cluster_idx + j)) {
     754           0 :                                                         return -EINVAL;
     755             :                                                 }
     756        6692 :                                         }
     757       19668 :                                         cluster_count++;
     758       19668 :                                 }
     759        1572 :                         }
     760             : 
     761        1396 :                         if (cluster_count == 0) {
     762           0 :                                 return -EINVAL;
     763             :                         }
     764        1396 :                         tmp = realloc(blob->active.clusters, cluster_count * sizeof(*blob->active.clusters));
     765        1396 :                         if (tmp == NULL) {
     766           0 :                                 return -ENOMEM;
     767             :                         }
     768        1396 :                         blob->active.clusters = tmp;
     769        1396 :                         blob->active.cluster_array_size = cluster_count;
     770             : 
     771        2968 :                         for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
     772       21240 :                                 for (j = 0; j < desc_extent_rle->extents[i].length; j++) {
     773       19668 :                                         if (desc_extent_rle->extents[i].cluster_idx != 0) {
     774       13384 :                                                 blob->active.clusters[blob->active.num_clusters++] = bs_cluster_to_lba(blob->bs,
     775        6692 :                                                                 desc_extent_rle->extents[i].cluster_idx + j);
     776        6692 :                                                 blob->active.num_allocated_clusters++;
     777       19668 :                                         } else if (spdk_blob_is_thin_provisioned(blob)) {
     778       12976 :                                                 blob->active.clusters[blob->active.num_clusters++] = 0;
     779       12976 :                                         } else {
     780           0 :                                                 return -EINVAL;
     781             :                                         }
     782       19668 :                                 }
     783        1572 :                         }
     784        7168 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
     785             :                         struct spdk_blob_md_descriptor_extent_table *desc_extent_table;
     786        2624 :                         uint32_t num_extent_pages = blob->active.num_extent_pages;
     787             :                         uint32_t i, j;
     788             :                         size_t extent_pages_length;
     789             : 
     790        2624 :                         desc_extent_table = (struct spdk_blob_md_descriptor_extent_table *)desc;
     791        2624 :                         extent_pages_length = desc_extent_table->length - sizeof(desc_extent_table->num_clusters);
     792             : 
     793        2624 :                         if (blob->extent_rle_found) {
     794             :                                 /* This means that Extent RLE is present in MD,
     795             :                                  * both should never be at the same time. */
     796           0 :                                 return -EINVAL;
     797        2624 :                         } else if (blob->extent_table_found &&
     798           0 :                                    desc_extent_table->num_clusters != blob->remaining_clusters_in_et) {
     799             :                                 /* Number of clusters in this ET does not match number
     800             :                                  * from previously read EXTENT_TABLE. */
     801           0 :                                 return -EINVAL;
     802             :                         }
     803             : 
     804        2624 :                         if (desc_extent_table->length == 0 ||
     805        2624 :                             (extent_pages_length % sizeof(desc_extent_table->extent_page[0]) != 0)) {
     806           0 :                                 return -EINVAL;
     807             :                         }
     808             : 
     809        2624 :                         blob->extent_table_found = true;
     810             : 
     811        4825 :                         for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
     812        2201 :                                 num_extent_pages += desc_extent_table->extent_page[i].num_pages;
     813        2201 :                         }
     814             : 
     815        2624 :                         if (num_extent_pages > 0) {
     816        2177 :                                 tmp = realloc(blob->active.extent_pages, num_extent_pages * sizeof(uint32_t));
     817        2177 :                                 if (tmp == NULL) {
     818           0 :                                         return -ENOMEM;
     819             :                                 }
     820        2177 :                                 blob->active.extent_pages = tmp;
     821        2177 :                         }
     822        2624 :                         blob->active.extent_pages_array_size = num_extent_pages;
     823             : 
     824        2624 :                         blob->remaining_clusters_in_et = desc_extent_table->num_clusters;
     825             : 
     826             :                         /* Extent table entries contain md page numbers for extent pages.
     827             :                          * Zeroes represent unallocated extent pages, those are run-length-encoded.
     828             :                          */
     829        4825 :                         for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
     830        2201 :                                 if (desc_extent_table->extent_page[i].page_idx != 0) {
     831        1565 :                                         assert(desc_extent_table->extent_page[i].num_pages == 1);
     832        1565 :                                         blob->active.extent_pages[blob->active.num_extent_pages++] =
     833        1565 :                                                 desc_extent_table->extent_page[i].page_idx;
     834        2201 :                                 } else if (spdk_blob_is_thin_provisioned(blob)) {
     835        1272 :                                         for (j = 0; j < desc_extent_table->extent_page[i].num_pages; j++) {
     836         636 :                                                 blob->active.extent_pages[blob->active.num_extent_pages++] = 0;
     837         636 :                                         }
     838         636 :                                 } else {
     839           0 :                                         return -EINVAL;
     840             :                                 }
     841        2201 :                         }
     842        5772 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
     843             :                         struct spdk_blob_md_descriptor_extent_page      *desc_extent;
     844             :                         unsigned int                                    i;
     845        1556 :                         unsigned int                                    cluster_count = 0;
     846             :                         size_t                                          cluster_idx_length;
     847             : 
     848        1556 :                         if (blob->extent_rle_found) {
     849             :                                 /* This means that Extent RLE is present in MD,
     850             :                                  * both should never be at the same time. */
     851           0 :                                 return -EINVAL;
     852             :                         }
     853             : 
     854        1556 :                         desc_extent = (struct spdk_blob_md_descriptor_extent_page *)desc;
     855        1556 :                         cluster_idx_length = desc_extent->length - sizeof(desc_extent->start_cluster_idx);
     856             : 
     857        1556 :                         if (desc_extent->length <= sizeof(desc_extent->start_cluster_idx) ||
     858        1556 :                             (cluster_idx_length % sizeof(desc_extent->cluster_idx[0]) != 0)) {
     859           0 :                                 return -EINVAL;
     860             :                         }
     861             : 
     862       24472 :                         for (i = 0; i < cluster_idx_length / sizeof(desc_extent->cluster_idx[0]); i++) {
     863       22916 :                                 if (desc_extent->cluster_idx[i] != 0) {
     864       10415 :                                         if (!spdk_bit_pool_is_allocated(blob->bs->used_clusters, desc_extent->cluster_idx[i])) {
     865           0 :                                                 return -EINVAL;
     866             :                                         }
     867       10415 :                                 }
     868       22916 :                                 cluster_count++;
     869       22916 :                         }
     870             : 
     871        1556 :                         if (cluster_count == 0) {
     872           0 :                                 return -EINVAL;
     873             :                         }
     874             : 
     875             :                         /* When reading extent pages sequentially starting cluster idx should match
     876             :                          * current size of a blob.
     877             :                          * If changed to batch reading, this check shall be removed. */
     878        1556 :                         if (desc_extent->start_cluster_idx != blob->active.num_clusters) {
     879           0 :                                 return -EINVAL;
     880             :                         }
     881             : 
     882        3112 :                         tmp = realloc(blob->active.clusters,
     883        1556 :                                       (cluster_count + blob->active.num_clusters) * sizeof(*blob->active.clusters));
     884        1556 :                         if (tmp == NULL) {
     885           0 :                                 return -ENOMEM;
     886             :                         }
     887        1556 :                         blob->active.clusters = tmp;
     888        1556 :                         blob->active.cluster_array_size = (cluster_count + blob->active.num_clusters);
     889             : 
     890       24472 :                         for (i = 0; i < cluster_idx_length / sizeof(desc_extent->cluster_idx[0]); i++) {
     891       22916 :                                 if (desc_extent->cluster_idx[i] != 0) {
     892       20830 :                                         blob->active.clusters[blob->active.num_clusters++] = bs_cluster_to_lba(blob->bs,
     893       10415 :                                                         desc_extent->cluster_idx[i]);
     894       10415 :                                         blob->active.num_allocated_clusters++;
     895       22916 :                                 } else if (spdk_blob_is_thin_provisioned(blob)) {
     896       12501 :                                         blob->active.clusters[blob->active.num_clusters++] = 0;
     897       12501 :                                 } else {
     898           0 :                                         return -EINVAL;
     899             :                                 }
     900       22916 :                         }
     901        1556 :                         assert(desc_extent->start_cluster_idx + cluster_count == blob->active.num_clusters);
     902        1556 :                         assert(blob->remaining_clusters_in_et >= cluster_count);
     903        1556 :                         blob->remaining_clusters_in_et -= cluster_count;
     904        3148 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
     905             :                         int rc;
     906             : 
     907         952 :                         rc = blob_deserialize_xattr(blob,
     908         476 :                                                     (struct spdk_blob_md_descriptor_xattr *) desc, false);
     909         476 :                         if (rc != 0) {
     910           0 :                                 return rc;
     911             :                         }
     912        1592 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
     913             :                         int rc;
     914             : 
     915        2232 :                         rc = blob_deserialize_xattr(blob,
     916        1116 :                                                     (struct spdk_blob_md_descriptor_xattr *) desc, true);
     917        1116 :                         if (rc != 0) {
     918           0 :                                 return rc;
     919             :                         }
     920        1116 :                 } else {
     921             :                         /* Unrecognized descriptor type.  Do not fail - just continue to the
     922             :                          *  next descriptor.  If this descriptor is associated with some feature
     923             :                          *  defined in a newer version of blobstore, that version of blobstore
     924             :                          *  should create and set an associated feature flag to specify if this
     925             :                          *  blob can be loaded or not.
     926             :                          */
     927             :                 }
     928             : 
     929             :                 /* Advance to the next descriptor */
     930       11459 :                 cur_desc += sizeof(*desc) + desc->length;
     931       11459 :                 if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
     932          50 :                         break;
     933             :                 }
     934       11409 :                 desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
     935             :         }
     936             : 
     937        5970 :         return 0;
     938        5980 : }
     939             : 
     940             : static bool bs_load_cur_extent_page_valid(struct spdk_blob_md_page *page);
     941             : 
     942             : static int
     943        1556 : blob_parse_extent_page(struct spdk_blob_md_page *extent_page, struct spdk_blob *blob)
     944             : {
     945        1556 :         assert(blob != NULL);
     946        1556 :         assert(blob->state == SPDK_BLOB_STATE_LOADING);
     947             : 
     948        1556 :         if (bs_load_cur_extent_page_valid(extent_page) == false) {
     949           0 :                 return -ENOENT;
     950             :         }
     951             : 
     952        1556 :         return blob_parse_page(extent_page, blob);
     953        1556 : }
     954             : 
     955             : static int
     956        4306 : blob_parse(const struct spdk_blob_md_page *pages, uint32_t page_count,
     957             :            struct spdk_blob *blob)
     958             : {
     959             :         const struct spdk_blob_md_page *page;
     960             :         uint32_t i;
     961             :         int rc;
     962             :         void *tmp;
     963             : 
     964        4306 :         assert(page_count > 0);
     965        4306 :         assert(pages[0].sequence_num == 0);
     966        4306 :         assert(blob != NULL);
     967        4306 :         assert(blob->state == SPDK_BLOB_STATE_LOADING);
     968        4306 :         assert(blob->active.clusters == NULL);
     969             : 
     970             :         /* The blobid provided doesn't match what's in the MD, this can
     971             :          * happen for example if a bogus blobid is passed in through open.
     972             :          */
     973        4306 :         if (blob->id != pages[0].id) {
     974           5 :                 SPDK_ERRLOG("Blobid (0x%" PRIx64 ") doesn't match what's in metadata "
     975             :                             "(0x%" PRIx64 ")\n", blob->id, pages[0].id);
     976           5 :                 return -ENOENT;
     977             :         }
     978             : 
     979        4301 :         tmp = realloc(blob->active.pages, page_count * sizeof(*blob->active.pages));
     980        4301 :         if (!tmp) {
     981           0 :                 return -ENOMEM;
     982             :         }
     983        4301 :         blob->active.pages = tmp;
     984             : 
     985        4301 :         blob->active.pages[0] = pages[0].id;
     986             : 
     987        4424 :         for (i = 1; i < page_count; i++) {
     988         123 :                 assert(spdk_bit_array_get(blob->bs->used_md_pages, pages[i - 1].next));
     989         123 :                 blob->active.pages[i] = pages[i - 1].next;
     990         123 :         }
     991        4301 :         blob->active.num_pages = page_count;
     992             : 
     993        8715 :         for (i = 0; i < page_count; i++) {
     994        4424 :                 page = &pages[i];
     995             : 
     996        4424 :                 assert(page->id == blob->id);
     997        4424 :                 assert(page->sequence_num == i);
     998             : 
     999        4424 :                 rc = blob_parse_page(page, blob);
    1000        4424 :                 if (rc != 0) {
    1001          10 :                         return rc;
    1002             :                 }
    1003        4414 :         }
    1004             : 
    1005        4291 :         return 0;
    1006        4306 : }
    1007             : 
    1008             : static int
    1009        5532 : blob_serialize_add_page(const struct spdk_blob *blob,
    1010             :                         struct spdk_blob_md_page **pages,
    1011             :                         uint32_t *page_count,
    1012             :                         struct spdk_blob_md_page **last_page)
    1013             : {
    1014             :         struct spdk_blob_md_page *page, *tmp_pages;
    1015             : 
    1016        5532 :         assert(pages != NULL);
    1017        5532 :         assert(page_count != NULL);
    1018             : 
    1019        5532 :         *last_page = NULL;
    1020        5532 :         if (*page_count == 0) {
    1021        5423 :                 assert(*pages == NULL);
    1022        5423 :                 *pages = spdk_malloc(blob->bs->md_page_size, 0,
    1023             :                                      NULL, SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    1024        5423 :                 if (*pages == NULL) {
    1025           0 :                         return -ENOMEM;
    1026             :                 }
    1027        5423 :                 *page_count = 1;
    1028        5423 :         } else {
    1029         109 :                 assert(*pages != NULL);
    1030         109 :                 tmp_pages = spdk_realloc(*pages, blob->bs->md_page_size * (*page_count + 1), 0);
    1031         109 :                 if (tmp_pages == NULL) {
    1032           0 :                         return -ENOMEM;
    1033             :                 }
    1034         109 :                 (*page_count)++;
    1035         109 :                 *pages = tmp_pages;
    1036             :         }
    1037             : 
    1038        5532 :         page = &(*pages)[*page_count - 1];
    1039        5532 :         memset(page, 0, sizeof(*page));
    1040        5532 :         page->id = blob->id;
    1041        5532 :         page->sequence_num = *page_count - 1;
    1042        5532 :         page->next = SPDK_INVALID_MD_PAGE;
    1043        5532 :         *last_page = page;
    1044             : 
    1045        5532 :         return 0;
    1046        5532 : }
    1047             : 
    1048             : /* Transform the in-memory representation 'xattr' into an on-disk xattr descriptor.
    1049             :  * Update required_sz on both success and failure.
    1050             :  *
    1051             :  */
    1052             : static int
    1053        2160 : blob_serialize_xattr(const struct spdk_xattr *xattr,
    1054             :                      uint8_t *buf, size_t buf_sz,
    1055             :                      size_t *required_sz, bool internal)
    1056             : {
    1057             :         struct spdk_blob_md_descriptor_xattr    *desc;
    1058             : 
    1059        4320 :         *required_sz = sizeof(struct spdk_blob_md_descriptor_xattr) +
    1060        4320 :                        strlen(xattr->name) +
    1061        2160 :                        xattr->value_len;
    1062             : 
    1063        2160 :         if (buf_sz < *required_sz) {
    1064          60 :                 return -1;
    1065             :         }
    1066             : 
    1067        2100 :         desc = (struct spdk_blob_md_descriptor_xattr *)buf;
    1068             : 
    1069        2100 :         desc->type = internal ? SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL : SPDK_MD_DESCRIPTOR_TYPE_XATTR;
    1070        2100 :         desc->length = sizeof(desc->name_length) +
    1071        2100 :                        sizeof(desc->value_length) +
    1072        4200 :                        strlen(xattr->name) +
    1073        2100 :                        xattr->value_len;
    1074        2100 :         desc->name_length = strlen(xattr->name);
    1075        2100 :         desc->value_length = xattr->value_len;
    1076             : 
    1077        2100 :         memcpy(desc->name, xattr->name, desc->name_length);
    1078        4200 :         memcpy((void *)((uintptr_t)desc->name + desc->name_length),
    1079        2100 :                xattr->value,
    1080        2100 :                desc->value_length);
    1081             : 
    1082        2100 :         return 0;
    1083        2160 : }
    1084             : 
    1085             : static void
    1086        2516 : blob_serialize_extent_table_entry(const struct spdk_blob *blob,
    1087             :                                   uint64_t start_ep, uint64_t *next_ep,
    1088             :                                   uint8_t **buf, size_t *remaining_sz)
    1089             : {
    1090             :         struct spdk_blob_md_descriptor_extent_table *desc;
    1091             :         size_t cur_sz;
    1092             :         uint64_t i, et_idx;
    1093             :         uint32_t extent_page, ep_len;
    1094             : 
    1095             :         /* The buffer must have room for at least num_clusters entry */
    1096        2516 :         cur_sz = sizeof(struct spdk_blob_md_descriptor) + sizeof(desc->num_clusters);
    1097        2516 :         if (*remaining_sz < cur_sz) {
    1098          30 :                 *next_ep = start_ep;
    1099          30 :                 return;
    1100             :         }
    1101             : 
    1102        2486 :         desc = (struct spdk_blob_md_descriptor_extent_table *)*buf;
    1103        2486 :         desc->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE;
    1104             : 
    1105        2486 :         desc->num_clusters = blob->active.num_clusters;
    1106             : 
    1107        2486 :         ep_len = 1;
    1108        2486 :         et_idx = 0;
    1109        6343 :         for (i = start_ep; i < blob->active.num_extent_pages; i++) {
    1110        3857 :                 if (*remaining_sz < cur_sz  + sizeof(desc->extent_page[0])) {
    1111             :                         /* If we ran out of buffer space, return */
    1112           0 :                         break;
    1113             :                 }
    1114             : 
    1115        3857 :                 extent_page = blob->active.extent_pages[i];
    1116             :                 /* Verify that next extent_page is unallocated */
    1117        5474 :                 if (extent_page == 0 &&
    1118        2283 :                     (i + 1 < blob->active.num_extent_pages && blob->active.extent_pages[i + 1] == 0)) {
    1119        1617 :                         ep_len++;
    1120        1617 :                         continue;
    1121             :                 }
    1122        2240 :                 desc->extent_page[et_idx].page_idx = extent_page;
    1123        2240 :                 desc->extent_page[et_idx].num_pages = ep_len;
    1124        2240 :                 et_idx++;
    1125             : 
    1126        2240 :                 ep_len = 1;
    1127        2240 :                 cur_sz += sizeof(desc->extent_page[et_idx]);
    1128        2240 :         }
    1129        2486 :         *next_ep = i;
    1130             : 
    1131        2486 :         desc->length = sizeof(desc->num_clusters) + sizeof(desc->extent_page[0]) * et_idx;
    1132        2486 :         *remaining_sz -= sizeof(struct spdk_blob_md_descriptor) + desc->length;
    1133        2486 :         *buf += sizeof(struct spdk_blob_md_descriptor) + desc->length;
    1134        2516 : }
    1135             : 
    1136             : static int
    1137        2489 : blob_serialize_extent_table(const struct spdk_blob *blob,
    1138             :                             struct spdk_blob_md_page **pages,
    1139             :                             struct spdk_blob_md_page *cur_page,
    1140             :                             uint32_t *page_count, uint8_t **buf,
    1141             :                             size_t *remaining_sz)
    1142             : {
    1143             :         uint64_t                                last_extent_page;
    1144             :         int                                     rc;
    1145             : 
    1146        2489 :         last_extent_page = 0;
    1147             :         /* At least single extent table entry has to be always persisted.
    1148             :          * Such case occurs with num_extent_pages == 0. */
    1149        2516 :         while (last_extent_page <= blob->active.num_extent_pages) {
    1150        5032 :                 blob_serialize_extent_table_entry(blob, last_extent_page, &last_extent_page, buf,
    1151        2516 :                                                   remaining_sz);
    1152             : 
    1153        2516 :                 if (last_extent_page == blob->active.num_extent_pages) {
    1154        2489 :                         break;
    1155             :                 }
    1156             : 
    1157          27 :                 rc = blob_serialize_add_page(blob, pages, page_count, &cur_page);
    1158          27 :                 if (rc < 0) {
    1159           0 :                         return rc;
    1160             :                 }
    1161             : 
    1162          27 :                 *buf = (uint8_t *)cur_page->descriptors;
    1163          27 :                 *remaining_sz = sizeof(cur_page->descriptors);
    1164             :         }
    1165             : 
    1166        2489 :         return 0;
    1167        2489 : }
    1168             : 
    1169             : static void
    1170        1751 : blob_serialize_extent_rle(const struct spdk_blob *blob,
    1171             :                           uint64_t start_cluster, uint64_t *next_cluster,
    1172             :                           uint8_t **buf, size_t *buf_sz)
    1173             : {
    1174             :         struct spdk_blob_md_descriptor_extent_rle *desc_extent_rle;
    1175             :         size_t cur_sz;
    1176             :         uint64_t i, extent_idx;
    1177             :         uint64_t lba, lba_per_cluster, lba_count;
    1178             : 
    1179             :         /* The buffer must have room for at least one extent */
    1180        1751 :         cur_sz = sizeof(struct spdk_blob_md_descriptor) + sizeof(desc_extent_rle->extents[0]);
    1181        1751 :         if (*buf_sz < cur_sz) {
    1182          18 :                 *next_cluster = start_cluster;
    1183          18 :                 return;
    1184             :         }
    1185             : 
    1186        1733 :         desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)*buf;
    1187        1733 :         desc_extent_rle->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE;
    1188             : 
    1189        1733 :         lba_per_cluster = bs_cluster_to_lba(blob->bs, 1);
    1190             :         /* Assert for scan-build false positive */
    1191        1733 :         assert(lba_per_cluster > 0);
    1192             : 
    1193        1733 :         lba = blob->active.clusters[start_cluster];
    1194        1733 :         lba_count = lba_per_cluster;
    1195        1733 :         extent_idx = 0;
    1196      810464 :         for (i = start_cluster + 1; i < blob->active.num_clusters; i++) {
    1197      808735 :                 if ((lba + lba_count) == blob->active.clusters[i] && lba != 0) {
    1198             :                         /* Run-length encode sequential non-zero LBA */
    1199        7276 :                         lba_count += lba_per_cluster;
    1200        7276 :                         continue;
    1201      801459 :                 } else if (lba == 0 && blob->active.clusters[i] == 0) {
    1202             :                         /* Run-length encode unallocated clusters */
    1203      800266 :                         lba_count += lba_per_cluster;
    1204      800266 :                         continue;
    1205             :                 }
    1206        1193 :                 desc_extent_rle->extents[extent_idx].cluster_idx = lba / lba_per_cluster;
    1207        1193 :                 desc_extent_rle->extents[extent_idx].length = lba_count / lba_per_cluster;
    1208        1193 :                 extent_idx++;
    1209             : 
    1210        1193 :                 cur_sz += sizeof(desc_extent_rle->extents[extent_idx]);
    1211             : 
    1212        1193 :                 if (*buf_sz < cur_sz) {
    1213             :                         /* If we ran out of buffer space, return */
    1214           4 :                         *next_cluster = i;
    1215           4 :                         break;
    1216             :                 }
    1217             : 
    1218        1189 :                 lba = blob->active.clusters[i];
    1219        1189 :                 lba_count = lba_per_cluster;
    1220        1189 :         }
    1221             : 
    1222        1733 :         if (*buf_sz >= cur_sz) {
    1223        1729 :                 desc_extent_rle->extents[extent_idx].cluster_idx = lba / lba_per_cluster;
    1224        1729 :                 desc_extent_rle->extents[extent_idx].length = lba_count / lba_per_cluster;
    1225        1729 :                 extent_idx++;
    1226             : 
    1227        1729 :                 *next_cluster = blob->active.num_clusters;
    1228        1729 :         }
    1229             : 
    1230        1733 :         desc_extent_rle->length = sizeof(desc_extent_rle->extents[0]) * extent_idx;
    1231        1733 :         *buf_sz -= sizeof(struct spdk_blob_md_descriptor) + desc_extent_rle->length;
    1232        1733 :         *buf += sizeof(struct spdk_blob_md_descriptor) + desc_extent_rle->length;
    1233        1751 : }
    1234             : 
    1235             : static int
    1236        1943 : blob_serialize_extents_rle(const struct spdk_blob *blob,
    1237             :                            struct spdk_blob_md_page **pages,
    1238             :                            struct spdk_blob_md_page *cur_page,
    1239             :                            uint32_t *page_count, uint8_t **buf,
    1240             :                            size_t *remaining_sz)
    1241             : {
    1242             :         uint64_t                                last_cluster;
    1243             :         int                                     rc;
    1244             : 
    1245        1943 :         last_cluster = 0;
    1246        1965 :         while (last_cluster < blob->active.num_clusters) {
    1247        1751 :                 blob_serialize_extent_rle(blob, last_cluster, &last_cluster, buf, remaining_sz);
    1248             : 
    1249        1751 :                 if (last_cluster == blob->active.num_clusters) {
    1250        1729 :                         break;
    1251             :                 }
    1252             : 
    1253          22 :                 rc = blob_serialize_add_page(blob, pages, page_count, &cur_page);
    1254          22 :                 if (rc < 0) {
    1255           0 :                         return rc;
    1256             :                 }
    1257             : 
    1258          22 :                 *buf = (uint8_t *)cur_page->descriptors;
    1259          22 :                 *remaining_sz = sizeof(cur_page->descriptors);
    1260             :         }
    1261             : 
    1262        1943 :         return 0;
    1263        1943 : }
    1264             : 
    1265             : static void
    1266        1648 : blob_serialize_extent_page(const struct spdk_blob *blob,
    1267             :                            uint64_t cluster, struct spdk_blob_md_page *page)
    1268             : {
    1269             :         struct spdk_blob_md_descriptor_extent_page *desc_extent;
    1270             :         uint64_t i, extent_idx;
    1271             :         uint64_t lba, lba_per_cluster;
    1272        1648 :         uint64_t start_cluster_idx = (cluster / SPDK_EXTENTS_PER_EP) * SPDK_EXTENTS_PER_EP;
    1273             : 
    1274        1648 :         desc_extent = (struct spdk_blob_md_descriptor_extent_page *) page->descriptors;
    1275        1648 :         desc_extent->type = SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE;
    1276             : 
    1277        1648 :         lba_per_cluster = bs_cluster_to_lba(blob->bs, 1);
    1278             : 
    1279        1648 :         desc_extent->start_cluster_idx = start_cluster_idx;
    1280        1648 :         extent_idx = 0;
    1281       63582 :         for (i = start_cluster_idx; i < blob->active.num_clusters; i++) {
    1282       62033 :                 lba = blob->active.clusters[i];
    1283       62033 :                 desc_extent->cluster_idx[extent_idx++] = lba / lba_per_cluster;
    1284       62033 :                 if (extent_idx >= SPDK_EXTENTS_PER_EP) {
    1285          99 :                         break;
    1286             :                 }
    1287       61934 :         }
    1288        1648 :         desc_extent->length = sizeof(desc_extent->start_cluster_idx) +
    1289        1648 :                               sizeof(desc_extent->cluster_idx[0]) * extent_idx;
    1290        1648 : }
    1291             : 
    1292             : static void
    1293        4432 : blob_serialize_flags(const struct spdk_blob *blob,
    1294             :                      uint8_t *buf, size_t *buf_sz)
    1295             : {
    1296             :         struct spdk_blob_md_descriptor_flags *desc;
    1297             : 
    1298             :         /*
    1299             :          * Flags get serialized first, so we should always have room for the flags
    1300             :          *  descriptor.
    1301             :          */
    1302        4432 :         assert(*buf_sz >= sizeof(*desc));
    1303             : 
    1304        4432 :         desc = (struct spdk_blob_md_descriptor_flags *)buf;
    1305        4432 :         desc->type = SPDK_MD_DESCRIPTOR_TYPE_FLAGS;
    1306        4432 :         desc->length = sizeof(*desc) - sizeof(struct spdk_blob_md_descriptor);
    1307        4432 :         desc->invalid_flags = blob->invalid_flags;
    1308        4432 :         desc->data_ro_flags = blob->data_ro_flags;
    1309        4432 :         desc->md_ro_flags = blob->md_ro_flags;
    1310             : 
    1311        4432 :         *buf_sz -= sizeof(*desc);
    1312        4432 : }
    1313             : 
    1314             : static int
    1315        8864 : blob_serialize_xattrs(const struct spdk_blob *blob,
    1316             :                       const struct spdk_xattr_tailq *xattrs, bool internal,
    1317             :                       struct spdk_blob_md_page **pages,
    1318             :                       struct spdk_blob_md_page *cur_page,
    1319             :                       uint32_t *page_count, uint8_t **buf,
    1320             :                       size_t *remaining_sz)
    1321             : {
    1322             :         const struct spdk_xattr *xattr;
    1323             :         int     rc;
    1324             : 
    1325       10964 :         TAILQ_FOREACH(xattr, xattrs, link) {
    1326        2100 :                 size_t required_sz = 0;
    1327             : 
    1328        4200 :                 rc = blob_serialize_xattr(xattr,
    1329        2100 :                                           *buf, *remaining_sz,
    1330        2100 :                                           &required_sz, internal);
    1331        2100 :                 if (rc < 0) {
    1332             :                         /* Need to add a new page to the chain */
    1333          60 :                         rc = blob_serialize_add_page(blob, pages, page_count,
    1334             :                                                      &cur_page);
    1335          60 :                         if (rc < 0) {
    1336           0 :                                 spdk_free(*pages);
    1337           0 :                                 *pages = NULL;
    1338           0 :                                 *page_count = 0;
    1339           0 :                                 return rc;
    1340             :                         }
    1341             : 
    1342          60 :                         *buf = (uint8_t *)cur_page->descriptors;
    1343          60 :                         *remaining_sz = sizeof(cur_page->descriptors);
    1344             : 
    1345             :                         /* Try again */
    1346          60 :                         required_sz = 0;
    1347         120 :                         rc = blob_serialize_xattr(xattr,
    1348          60 :                                                   *buf, *remaining_sz,
    1349          60 :                                                   &required_sz, internal);
    1350             : 
    1351          60 :                         if (rc < 0) {
    1352           0 :                                 spdk_free(*pages);
    1353           0 :                                 *pages = NULL;
    1354           0 :                                 *page_count = 0;
    1355           0 :                                 return rc;
    1356             :                         }
    1357          60 :                 }
    1358             : 
    1359        2100 :                 *remaining_sz -= required_sz;
    1360        2100 :                 *buf += required_sz;
    1361        2100 :         }
    1362             : 
    1363        8864 :         return 0;
    1364        8864 : }
    1365             : 
    1366             : static int
    1367        4432 : blob_serialize(const struct spdk_blob *blob, struct spdk_blob_md_page **pages,
    1368             :                uint32_t *page_count)
    1369             : {
    1370             :         struct spdk_blob_md_page                *cur_page;
    1371             :         int                                     rc;
    1372             :         uint8_t                                 *buf;
    1373             :         size_t                                  remaining_sz;
    1374             : 
    1375        4432 :         assert(pages != NULL);
    1376        4432 :         assert(page_count != NULL);
    1377        4432 :         assert(blob != NULL);
    1378        4432 :         assert(blob->state == SPDK_BLOB_STATE_DIRTY);
    1379             : 
    1380        4432 :         *pages = NULL;
    1381        4432 :         *page_count = 0;
    1382             : 
    1383             :         /* A blob always has at least 1 page, even if it has no descriptors */
    1384        4432 :         rc = blob_serialize_add_page(blob, pages, page_count, &cur_page);
    1385        4432 :         if (rc < 0) {
    1386           0 :                 return rc;
    1387             :         }
    1388             : 
    1389        4432 :         buf = (uint8_t *)cur_page->descriptors;
    1390        4432 :         remaining_sz = sizeof(cur_page->descriptors);
    1391             : 
    1392             :         /* Serialize flags */
    1393        4432 :         blob_serialize_flags(blob, buf, &remaining_sz);
    1394        4432 :         buf += sizeof(struct spdk_blob_md_descriptor_flags);
    1395             : 
    1396             :         /* Serialize xattrs */
    1397        8864 :         rc = blob_serialize_xattrs(blob, &blob->xattrs, false,
    1398        4432 :                                    pages, cur_page, page_count, &buf, &remaining_sz);
    1399        4432 :         if (rc < 0) {
    1400           0 :                 return rc;
    1401             :         }
    1402             : 
    1403             :         /* Serialize internal xattrs */
    1404        8864 :         rc = blob_serialize_xattrs(blob, &blob->xattrs_internal, true,
    1405        4432 :                                    pages, cur_page, page_count, &buf, &remaining_sz);
    1406        4432 :         if (rc < 0) {
    1407           0 :                 return rc;
    1408             :         }
    1409             : 
    1410        4432 :         if (blob->use_extent_table) {
    1411             :                 /* Serialize extent table */
    1412        2489 :                 rc = blob_serialize_extent_table(blob, pages, cur_page, page_count, &buf, &remaining_sz);
    1413        2489 :         } else {
    1414             :                 /* Serialize extents */
    1415        1943 :                 rc = blob_serialize_extents_rle(blob, pages, cur_page, page_count, &buf, &remaining_sz);
    1416             :         }
    1417             : 
    1418        4432 :         return rc;
    1419        4432 : }
    1420             : 
    1421             : struct spdk_blob_load_ctx {
    1422             :         struct spdk_blob                *blob;
    1423             : 
    1424             :         struct spdk_blob_md_page        *pages;
    1425             :         uint32_t                        num_pages;
    1426             :         uint32_t                        next_extent_page;
    1427             :         spdk_bs_sequence_t              *seq;
    1428             : 
    1429             :         spdk_bs_sequence_cpl            cb_fn;
    1430             :         void                            *cb_arg;
    1431             : };
    1432             : 
    1433             : static uint32_t
    1434       25744 : blob_md_page_calc_crc(void *page)
    1435             : {
    1436             :         uint32_t                crc;
    1437             : 
    1438       25744 :         crc = BLOB_CRC32C_INITIAL;
    1439       25744 :         crc = spdk_crc32c_update(page, SPDK_BS_PAGE_SIZE - 4, crc);
    1440       25744 :         crc ^= BLOB_CRC32C_INITIAL;
    1441             : 
    1442       25744 :         return crc;
    1443             : 
    1444             : }
    1445             : 
    1446             : static void
    1447        4341 : blob_load_final(struct spdk_blob_load_ctx *ctx, int bserrno)
    1448             : {
    1449        4341 :         struct spdk_blob                *blob = ctx->blob;
    1450             : 
    1451        4341 :         if (bserrno == 0) {
    1452        4259 :                 blob_mark_clean(blob);
    1453        4259 :         }
    1454             : 
    1455        4341 :         ctx->cb_fn(ctx->seq, ctx->cb_arg, bserrno);
    1456             : 
    1457             :         /* Free the memory */
    1458        4341 :         spdk_free(ctx->pages);
    1459        4341 :         free(ctx);
    1460        4341 : }
    1461             : 
    1462             : static void
    1463         575 : blob_load_snapshot_cpl(void *cb_arg, struct spdk_blob *snapshot, int bserrno)
    1464             : {
    1465         575 :         struct spdk_blob_load_ctx       *ctx = cb_arg;
    1466         575 :         struct spdk_blob                *blob = ctx->blob;
    1467             : 
    1468         575 :         if (bserrno == 0) {
    1469         567 :                 blob->back_bs_dev = bs_create_blob_bs_dev(snapshot);
    1470         567 :                 if (blob->back_bs_dev == NULL) {
    1471           0 :                         bserrno = -ENOMEM;
    1472           0 :                 }
    1473         567 :         }
    1474         575 :         if (bserrno != 0) {
    1475           8 :                 SPDK_ERRLOG("Snapshot fail\n");
    1476           8 :         }
    1477             : 
    1478         575 :         blob_load_final(ctx, bserrno);
    1479         575 : }
    1480             : 
    1481             : static void blob_update_clear_method(struct spdk_blob *blob);
    1482             : 
    1483             : static int
    1484         150 : blob_load_esnap(struct spdk_blob *blob, void *blob_ctx)
    1485             : {
    1486         150 :         struct spdk_blob_store *bs = blob->bs;
    1487         150 :         struct spdk_bs_dev *bs_dev = NULL;
    1488         150 :         const void *esnap_id = NULL;
    1489         150 :         size_t id_len = 0;
    1490             :         int rc;
    1491             : 
    1492         150 :         if (bs->esnap_bs_dev_create == NULL) {
    1493          10 :                 SPDK_NOTICELOG("blob 0x%" PRIx64 " is an esnap clone but the blobstore was opened "
    1494             :                                "without support for esnap clones\n", blob->id);
    1495          10 :                 return -ENOTSUP;
    1496             :         }
    1497         140 :         assert(blob->back_bs_dev == NULL);
    1498             : 
    1499         140 :         rc = blob_get_xattr_value(blob, BLOB_EXTERNAL_SNAPSHOT_ID, &esnap_id, &id_len, true);
    1500         140 :         if (rc != 0) {
    1501           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " is an esnap clone but has no esnap ID\n", blob->id);
    1502           0 :                 return -EINVAL;
    1503             :         }
    1504         140 :         assert(id_len > 0 && id_len < UINT32_MAX);
    1505             : 
    1506         140 :         SPDK_INFOLOG(blob, "Creating external snapshot device\n");
    1507             : 
    1508         140 :         rc = bs->esnap_bs_dev_create(bs->esnap_ctx, blob_ctx, blob, esnap_id, (uint32_t)id_len,
    1509             :                                      &bs_dev);
    1510         140 :         if (rc != 0) {
    1511           0 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": failed to load back_bs_dev "
    1512             :                               "with error %d\n", blob->id, rc);
    1513           0 :                 return rc;
    1514             :         }
    1515             : 
    1516             :         /*
    1517             :          * Note: bs_dev might be NULL if the consumer chose to not open the external snapshot.
    1518             :          * This especially might happen during spdk_bs_load() iteration.
    1519             :          */
    1520         140 :         if (bs_dev != NULL) {
    1521         140 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": loaded back_bs_dev\n", blob->id);
    1522         140 :                 if ((bs->io_unit_size % bs_dev->blocklen) != 0) {
    1523           5 :                         SPDK_NOTICELOG("blob 0x%" PRIx64 " external snapshot device block size %u "
    1524             :                                        "is not compatible with blobstore block size %u\n",
    1525             :                                        blob->id, bs_dev->blocklen, bs->io_unit_size);
    1526           5 :                         bs_dev->destroy(bs_dev);
    1527           5 :                         return -EINVAL;
    1528             :                 }
    1529         135 :         }
    1530             : 
    1531         135 :         blob->back_bs_dev = bs_dev;
    1532         135 :         blob->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
    1533             : 
    1534         135 :         return 0;
    1535         150 : }
    1536             : 
    1537             : static void
    1538        4282 : blob_load_backing_dev(spdk_bs_sequence_t *seq, void *cb_arg)
    1539             : {
    1540        4282 :         struct spdk_blob_load_ctx       *ctx = cb_arg;
    1541        4282 :         struct spdk_blob                *blob = ctx->blob;
    1542             :         const void                      *value;
    1543             :         size_t                          len;
    1544             :         int                             rc;
    1545             : 
    1546        4282 :         if (blob_is_esnap_clone(blob)) {
    1547         150 :                 rc = blob_load_esnap(blob, seq->cpl.u.blob_handle.esnap_ctx);
    1548         150 :                 blob_load_final(ctx, rc);
    1549         150 :                 return;
    1550             :         }
    1551             : 
    1552        4132 :         if (spdk_blob_is_thin_provisioned(blob)) {
    1553        1301 :                 rc = blob_get_xattr_value(blob, BLOB_SNAPSHOT, &value, &len, true);
    1554        1301 :                 if (rc == 0) {
    1555         575 :                         if (len != sizeof(spdk_blob_id)) {
    1556           0 :                                 blob_load_final(ctx, -EINVAL);
    1557           0 :                                 return;
    1558             :                         }
    1559             :                         /* open snapshot blob and continue in the callback function */
    1560         575 :                         blob->parent_id = *(spdk_blob_id *)value;
    1561        1150 :                         spdk_bs_open_blob(blob->bs, blob->parent_id,
    1562         575 :                                           blob_load_snapshot_cpl, ctx);
    1563         575 :                         return;
    1564             :                 } else {
    1565             :                         /* add zeroes_dev for thin provisioned blob */
    1566         726 :                         blob->back_bs_dev = bs_create_zeroes_dev();
    1567             :                 }
    1568         726 :         } else {
    1569             :                 /* standard blob */
    1570        2831 :                 blob->back_bs_dev = NULL;
    1571             :         }
    1572        3557 :         blob_load_final(ctx, 0);
    1573        4282 : }
    1574             : 
    1575             : static void
    1576        4189 : blob_load_cpl_extents_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    1577             : {
    1578        4189 :         struct spdk_blob_load_ctx       *ctx = cb_arg;
    1579        4189 :         struct spdk_blob                *blob = ctx->blob;
    1580             :         struct spdk_blob_md_page        *page;
    1581             :         uint64_t                        i;
    1582             :         uint32_t                        crc;
    1583             :         uint64_t                        lba;
    1584             :         void                            *tmp;
    1585             :         uint64_t                        sz;
    1586             : 
    1587        4189 :         if (bserrno) {
    1588           9 :                 SPDK_ERRLOG("Extent page read failed: %d\n", bserrno);
    1589           9 :                 blob_load_final(ctx, bserrno);
    1590           9 :                 return;
    1591             :         }
    1592             : 
    1593        4180 :         if (ctx->pages == NULL) {
    1594             :                 /* First iteration of this function, allocate buffer for single EXTENT_PAGE */
    1595        2624 :                 ctx->pages = spdk_zmalloc(blob->bs->md_page_size, 0,
    1596             :                                           NULL, SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    1597        2624 :                 if (!ctx->pages) {
    1598           0 :                         blob_load_final(ctx, -ENOMEM);
    1599           0 :                         return;
    1600             :                 }
    1601        2624 :                 ctx->num_pages = 1;
    1602        2624 :                 ctx->next_extent_page = 0;
    1603        2624 :         } else {
    1604        1556 :                 page = &ctx->pages[0];
    1605        1556 :                 crc = blob_md_page_calc_crc(page);
    1606        1556 :                 if (crc != page->crc) {
    1607           0 :                         blob_load_final(ctx, -EINVAL);
    1608           0 :                         return;
    1609             :                 }
    1610             : 
    1611        1556 :                 if (page->next != SPDK_INVALID_MD_PAGE) {
    1612           0 :                         blob_load_final(ctx, -EINVAL);
    1613           0 :                         return;
    1614             :                 }
    1615             : 
    1616        1556 :                 bserrno = blob_parse_extent_page(page, blob);
    1617        1556 :                 if (bserrno) {
    1618           0 :                         blob_load_final(ctx, bserrno);
    1619           0 :                         return;
    1620             :                 }
    1621             :         }
    1622             : 
    1623        4816 :         for (i = ctx->next_extent_page; i < blob->active.num_extent_pages; i++) {
    1624        2201 :                 if (blob->active.extent_pages[i] != 0) {
    1625             :                         /* Extent page was allocated, read and parse it. */
    1626        1565 :                         lba = bs_md_page_to_lba(blob->bs, blob->active.extent_pages[i]);
    1627        1565 :                         ctx->next_extent_page = i + 1;
    1628             : 
    1629        3130 :                         bs_sequence_read_dev(seq, &ctx->pages[0], lba,
    1630        1565 :                                              bs_byte_to_lba(blob->bs, blob->bs->md_page_size),
    1631        1565 :                                              blob_load_cpl_extents_cpl, ctx);
    1632        1565 :                         return;
    1633             :                 } else {
    1634             :                         /* Thin provisioned blobs can point to unallocated extent pages.
    1635             :                          * In this case blob size should be increased by up to the amount left in remaining_clusters_in_et. */
    1636             : 
    1637         636 :                         sz = spdk_min(blob->remaining_clusters_in_et, SPDK_EXTENTS_PER_EP);
    1638         636 :                         blob->active.num_clusters += sz;
    1639         636 :                         blob->remaining_clusters_in_et -= sz;
    1640             : 
    1641         636 :                         assert(spdk_blob_is_thin_provisioned(blob));
    1642         636 :                         assert(i + 1 < blob->active.num_extent_pages || blob->remaining_clusters_in_et == 0);
    1643             : 
    1644         636 :                         tmp = realloc(blob->active.clusters, blob->active.num_clusters * sizeof(*blob->active.clusters));
    1645         636 :                         if (tmp == NULL) {
    1646           0 :                                 blob_load_final(ctx, -ENOMEM);
    1647           0 :                                 return;
    1648             :                         }
    1649        1272 :                         memset(tmp + sizeof(*blob->active.clusters) * blob->active.cluster_array_size, 0,
    1650         636 :                                sizeof(*blob->active.clusters) * (blob->active.num_clusters - blob->active.cluster_array_size));
    1651         636 :                         blob->active.clusters = tmp;
    1652         636 :                         blob->active.cluster_array_size = blob->active.num_clusters;
    1653             :                 }
    1654         636 :         }
    1655             : 
    1656        2615 :         blob_load_backing_dev(seq, ctx);
    1657        4189 : }
    1658             : 
    1659             : static void
    1660        4464 : blob_load_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    1661             : {
    1662        4464 :         struct spdk_blob_load_ctx       *ctx = cb_arg;
    1663        4464 :         struct spdk_blob                *blob = ctx->blob;
    1664             :         struct spdk_blob_md_page        *page;
    1665             :         int                             rc;
    1666             :         uint32_t                        crc;
    1667             :         uint32_t                        current_page;
    1668             : 
    1669        4464 :         if (ctx->num_pages == 1) {
    1670        4341 :                 current_page = bs_blobid_to_page(blob->id);
    1671        4341 :         } else {
    1672         123 :                 assert(ctx->num_pages != 0);
    1673         123 :                 page = &ctx->pages[ctx->num_pages - 2];
    1674         123 :                 current_page = page->next;
    1675             :         }
    1676             : 
    1677        4464 :         if (bserrno) {
    1678          25 :                 SPDK_ERRLOG("Metadata page %d read failed for blobid 0x%" PRIx64 ": %d\n",
    1679             :                             current_page, blob->id, bserrno);
    1680          25 :                 blob_load_final(ctx, bserrno);
    1681          25 :                 return;
    1682             :         }
    1683             : 
    1684        4439 :         page = &ctx->pages[ctx->num_pages - 1];
    1685        4439 :         crc = blob_md_page_calc_crc(page);
    1686        4439 :         if (crc != page->crc) {
    1687          10 :                 SPDK_ERRLOG("Metadata page %d crc mismatch for blobid 0x%" PRIx64 "\n",
    1688             :                             current_page, blob->id);
    1689          10 :                 blob_load_final(ctx, -EINVAL);
    1690          10 :                 return;
    1691             :         }
    1692             : 
    1693        4429 :         if (page->next != SPDK_INVALID_MD_PAGE) {
    1694             :                 struct spdk_blob_md_page *tmp_pages;
    1695         123 :                 uint32_t next_page = page->next;
    1696         123 :                 uint64_t next_lba = bs_md_page_to_lba(blob->bs, next_page);
    1697             : 
    1698             :                 /* Read the next page */
    1699         123 :                 tmp_pages = spdk_realloc(ctx->pages, (sizeof(*page) * (ctx->num_pages + 1)), 0);
    1700         123 :                 if (tmp_pages == NULL) {
    1701           0 :                         blob_load_final(ctx, -ENOMEM);
    1702           0 :                         return;
    1703             :                 }
    1704         123 :                 ctx->num_pages++;
    1705         123 :                 ctx->pages = tmp_pages;
    1706             : 
    1707         246 :                 bs_sequence_read_dev(seq, &ctx->pages[ctx->num_pages - 1],
    1708         123 :                                      next_lba,
    1709         123 :                                      bs_byte_to_lba(blob->bs, sizeof(*page)),
    1710         123 :                                      blob_load_cpl, ctx);
    1711         123 :                 return;
    1712             :         }
    1713             : 
    1714             :         /* Parse the pages */
    1715        4306 :         rc = blob_parse(ctx->pages, ctx->num_pages, blob);
    1716        4306 :         if (rc) {
    1717          15 :                 blob_load_final(ctx, rc);
    1718          15 :                 return;
    1719             :         }
    1720             : 
    1721        4291 :         if (blob->extent_table_found == true) {
    1722             :                 /* If EXTENT_TABLE was found, that means support for it should be enabled. */
    1723        2624 :                 assert(blob->extent_rle_found == false);
    1724        2624 :                 blob->use_extent_table = true;
    1725        2624 :         } else {
    1726             :                 /* If EXTENT_RLE or no extent_* descriptor was found disable support
    1727             :                  * for extent table. No extent_* descriptors means that blob has length of 0
    1728             :                  * and no extent_rle descriptors were persisted for it.
    1729             :                  * EXTENT_TABLE if used, is always present in metadata regardless of length. */
    1730        1667 :                 blob->use_extent_table = false;
    1731             :         }
    1732             : 
    1733             :         /* Check the clear_method stored in metadata vs what may have been passed
    1734             :          * via spdk_bs_open_blob_ext() and update accordingly.
    1735             :          */
    1736        4291 :         blob_update_clear_method(blob);
    1737             : 
    1738        4291 :         spdk_free(ctx->pages);
    1739        4291 :         ctx->pages = NULL;
    1740             : 
    1741        4291 :         if (blob->extent_table_found) {
    1742        2624 :                 blob_load_cpl_extents_cpl(seq, ctx, 0);
    1743        2624 :         } else {
    1744        1667 :                 blob_load_backing_dev(seq, ctx);
    1745             :         }
    1746        4464 : }
    1747             : 
    1748             : /* Load a blob from disk given a blobid */
    1749             : static void
    1750        4341 : blob_load(spdk_bs_sequence_t *seq, struct spdk_blob *blob,
    1751             :           spdk_bs_sequence_cpl cb_fn, void *cb_arg)
    1752             : {
    1753             :         struct spdk_blob_load_ctx *ctx;
    1754             :         struct spdk_blob_store *bs;
    1755             :         uint32_t page_num;
    1756             :         uint64_t lba;
    1757             : 
    1758        4341 :         blob_verify_md_op(blob);
    1759             : 
    1760        4341 :         bs = blob->bs;
    1761             : 
    1762        4341 :         ctx = calloc(1, sizeof(*ctx));
    1763        4341 :         if (!ctx) {
    1764           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    1765           0 :                 return;
    1766             :         }
    1767             : 
    1768        4341 :         ctx->blob = blob;
    1769        4341 :         ctx->pages = spdk_realloc(ctx->pages, bs->md_page_size, 0);
    1770        4341 :         if (!ctx->pages) {
    1771           0 :                 free(ctx);
    1772           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    1773           0 :                 return;
    1774             :         }
    1775        4341 :         ctx->num_pages = 1;
    1776        4341 :         ctx->cb_fn = cb_fn;
    1777        4341 :         ctx->cb_arg = cb_arg;
    1778        4341 :         ctx->seq = seq;
    1779             : 
    1780        4341 :         page_num = bs_blobid_to_page(blob->id);
    1781        4341 :         lba = bs_md_page_to_lba(blob->bs, page_num);
    1782             : 
    1783        4341 :         blob->state = SPDK_BLOB_STATE_LOADING;
    1784             : 
    1785        8682 :         bs_sequence_read_dev(seq, &ctx->pages[0], lba,
    1786        4341 :                              bs_byte_to_lba(bs, bs->md_page_size),
    1787        4341 :                              blob_load_cpl, ctx);
    1788        4341 : }
    1789             : 
    1790             : struct spdk_blob_persist_ctx {
    1791             :         struct spdk_blob                *blob;
    1792             : 
    1793             :         struct spdk_blob_md_page        *pages;
    1794             :         uint32_t                        next_extent_page;
    1795             :         struct spdk_blob_md_page        *extent_page;
    1796             : 
    1797             :         spdk_bs_sequence_t              *seq;
    1798             :         spdk_bs_sequence_cpl            cb_fn;
    1799             :         void                            *cb_arg;
    1800             :         TAILQ_ENTRY(spdk_blob_persist_ctx) link;
    1801             : };
    1802             : 
    1803             : static void
    1804        1584 : bs_batch_clear_dev(struct spdk_blob *blob, spdk_bs_batch_t *batch, uint64_t lba,
    1805             :                    uint64_t lba_count)
    1806             : {
    1807        1584 :         switch (blob->clear_method) {
    1808             :         case BLOB_CLEAR_WITH_DEFAULT:
    1809             :         case BLOB_CLEAR_WITH_UNMAP:
    1810        1584 :                 bs_batch_unmap_dev(batch, lba, lba_count);
    1811        1584 :                 break;
    1812             :         case BLOB_CLEAR_WITH_WRITE_ZEROES:
    1813           0 :                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    1814           0 :                 break;
    1815           0 :         case BLOB_CLEAR_WITH_NONE:
    1816             :         default:
    1817           0 :                 break;
    1818             :         }
    1819        1584 : }
    1820             : 
    1821             : static int
    1822        1452 : bs_super_validate(struct spdk_bs_super_block *super, struct spdk_blob_store *bs)
    1823             : {
    1824             :         uint32_t        crc;
    1825             :         static const char zeros[SPDK_BLOBSTORE_TYPE_LENGTH];
    1826             : 
    1827        1452 :         if (super->version > SPDK_BS_VERSION ||
    1828        1447 :             super->version < SPDK_BS_INITIAL_VERSION) {
    1829          10 :                 return -EILSEQ;
    1830             :         }
    1831             : 
    1832        2884 :         if (memcmp(super->signature, SPDK_BS_SUPER_BLOCK_SIG,
    1833        1442 :                    sizeof(super->signature)) != 0) {
    1834           0 :                 return -EILSEQ;
    1835             :         }
    1836             : 
    1837        1442 :         crc = blob_md_page_calc_crc(super);
    1838        1442 :         if (crc != super->crc) {
    1839           5 :                 return -EILSEQ;
    1840             :         }
    1841             : 
    1842        1437 :         if (memcmp(&bs->bstype, &super->bstype, SPDK_BLOBSTORE_TYPE_LENGTH) == 0) {
    1843        1420 :                 SPDK_DEBUGLOG(blob, "Bstype matched - loading blobstore\n");
    1844        1437 :         } else if (memcmp(&bs->bstype, zeros, SPDK_BLOBSTORE_TYPE_LENGTH) == 0) {
    1845           7 :                 SPDK_DEBUGLOG(blob, "Bstype wildcard used - loading blobstore regardless bstype\n");
    1846           7 :         } else {
    1847          10 :                 SPDK_DEBUGLOG(blob, "Unexpected bstype\n");
    1848          10 :                 SPDK_LOGDUMP(blob, "Expected:", bs->bstype.bstype, SPDK_BLOBSTORE_TYPE_LENGTH);
    1849          10 :                 SPDK_LOGDUMP(blob, "Found:", super->bstype.bstype, SPDK_BLOBSTORE_TYPE_LENGTH);
    1850          10 :                 return -ENXIO;
    1851             :         }
    1852             : 
    1853        1427 :         if (super->size > bs->dev->blockcnt * bs->dev->blocklen) {
    1854          10 :                 SPDK_NOTICELOG("Size mismatch, dev size: %" PRIu64 ", blobstore size: %" PRIu64 "\n",
    1855             :                                bs->dev->blockcnt * bs->dev->blocklen, super->size);
    1856          10 :                 return -EILSEQ;
    1857             :         }
    1858             : 
    1859        1417 :         return 0;
    1860        1452 : }
    1861             : 
    1862             : static void bs_mark_dirty(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
    1863             :                           spdk_bs_sequence_cpl cb_fn, void *cb_arg);
    1864             : 
    1865             : static void
    1866        6304 : blob_persist_complete_cb(void *arg)
    1867             : {
    1868        6304 :         struct spdk_blob_persist_ctx *ctx = arg;
    1869             : 
    1870             :         /* Call user callback */
    1871        6304 :         ctx->cb_fn(ctx->seq, ctx->cb_arg, 0);
    1872             : 
    1873             :         /* Free the memory */
    1874        6304 :         spdk_free(ctx->pages);
    1875        6304 :         free(ctx);
    1876        6304 : }
    1877             : 
    1878             : static void blob_persist_start(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno);
    1879             : 
    1880             : static void
    1881        6304 : blob_persist_complete(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx, int bserrno)
    1882             : {
    1883             :         struct spdk_blob_persist_ctx    *next_persist, *tmp;
    1884        6304 :         struct spdk_blob                *blob = ctx->blob;
    1885             : 
    1886        6304 :         if (bserrno == 0) {
    1887        6239 :                 blob_mark_clean(blob);
    1888        6239 :         }
    1889             : 
    1890        6304 :         assert(ctx == TAILQ_FIRST(&blob->persists_to_complete));
    1891             : 
    1892             :         /* Complete all persists that were pending when the current persist started */
    1893       12608 :         TAILQ_FOREACH_SAFE(next_persist, &blob->persists_to_complete, link, tmp) {
    1894        6304 :                 TAILQ_REMOVE(&blob->persists_to_complete, next_persist, link);
    1895        6304 :                 spdk_thread_send_msg(spdk_get_thread(), blob_persist_complete_cb, next_persist);
    1896        6304 :         }
    1897             : 
    1898        6304 :         if (TAILQ_EMPTY(&blob->pending_persists)) {
    1899        6276 :                 return;
    1900             :         }
    1901             : 
    1902             :         /* Queue up all pending persists for completion and start blob persist with first one */
    1903          28 :         TAILQ_SWAP(&blob->persists_to_complete, &blob->pending_persists, spdk_blob_persist_ctx, link);
    1904          28 :         next_persist = TAILQ_FIRST(&blob->persists_to_complete);
    1905             : 
    1906          28 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    1907          28 :         bs_mark_dirty(seq, blob->bs, blob_persist_start, next_persist);
    1908        6304 : }
    1909             : 
    1910             : static void
    1911        6239 : blob_persist_clear_extents_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    1912             : {
    1913        6239 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    1914        6239 :         struct spdk_blob                *blob = ctx->blob;
    1915        6239 :         struct spdk_blob_store          *bs = blob->bs;
    1916             :         size_t                          i;
    1917             : 
    1918        6239 :         if (bserrno != 0) {
    1919           0 :                 blob_persist_complete(seq, ctx, bserrno);
    1920           0 :                 return;
    1921             :         }
    1922             : 
    1923        6239 :         spdk_spin_lock(&bs->used_lock);
    1924             : 
    1925             :         /* Release all extent_pages that were truncated */
    1926        8837 :         for (i = blob->active.num_extent_pages; i < blob->active.extent_pages_array_size; i++) {
    1927             :                 /* Nothing to release if it was not allocated */
    1928        2598 :                 if (blob->active.extent_pages[i] != 0) {
    1929         936 :                         bs_release_md_page(bs, blob->active.extent_pages[i]);
    1930         936 :                 }
    1931        2598 :         }
    1932             : 
    1933        6239 :         spdk_spin_unlock(&bs->used_lock);
    1934             : 
    1935        6239 :         if (blob->active.num_extent_pages == 0) {
    1936        4134 :                 free(blob->active.extent_pages);
    1937        4134 :                 blob->active.extent_pages = NULL;
    1938        4134 :                 blob->active.extent_pages_array_size = 0;
    1939        6239 :         } else if (blob->active.num_extent_pages != blob->active.extent_pages_array_size) {
    1940             : #ifndef __clang_analyzer__
    1941             :                 void *tmp;
    1942             : 
    1943             :                 /* scan-build really can't figure reallocs, workaround it */
    1944           3 :                 tmp = realloc(blob->active.extent_pages, sizeof(uint32_t) * blob->active.num_extent_pages);
    1945           3 :                 assert(tmp != NULL);
    1946           3 :                 blob->active.extent_pages = tmp;
    1947             : #endif
    1948           3 :                 blob->active.extent_pages_array_size = blob->active.num_extent_pages;
    1949           3 :         }
    1950             : 
    1951        6239 :         blob_persist_complete(seq, ctx, bserrno);
    1952        6239 : }
    1953             : 
    1954             : static void
    1955        6239 : blob_persist_clear_extents(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx)
    1956             : {
    1957        6239 :         struct spdk_blob                *blob = ctx->blob;
    1958        6239 :         struct spdk_blob_store          *bs = blob->bs;
    1959             :         size_t                          i;
    1960             :         uint64_t                        lba;
    1961             :         uint64_t                        lba_count;
    1962             :         spdk_bs_batch_t                 *batch;
    1963             : 
    1964        6239 :         batch = bs_sequence_to_batch(seq, blob_persist_clear_extents_cpl, ctx);
    1965        6239 :         lba_count = bs_byte_to_lba(bs, bs->md_page_size);
    1966             : 
    1967             :         /* Clear all extent_pages that were truncated */
    1968        8837 :         for (i = blob->active.num_extent_pages; i < blob->active.extent_pages_array_size; i++) {
    1969             :                 /* Nothing to clear if it was not allocated */
    1970        2598 :                 if (blob->active.extent_pages[i] != 0) {
    1971         936 :                         lba = bs_md_page_to_lba(bs, blob->active.extent_pages[i]);
    1972         936 :                         bs_batch_write_zeroes_dev(batch, lba, lba_count);
    1973         936 :                 }
    1974        2598 :         }
    1975             : 
    1976        6239 :         bs_batch_close(batch);
    1977        6239 : }
    1978             : 
    1979             : static void
    1980        6239 : blob_persist_clear_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    1981             : {
    1982        6239 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    1983        6239 :         struct spdk_blob                *blob = ctx->blob;
    1984        6239 :         struct spdk_blob_store          *bs = blob->bs;
    1985             :         size_t                          i;
    1986             : 
    1987        6239 :         if (bserrno != 0) {
    1988           0 :                 blob_persist_complete(seq, ctx, bserrno);
    1989           0 :                 return;
    1990             :         }
    1991             : 
    1992        6239 :         spdk_spin_lock(&bs->used_lock);
    1993             :         /* Release all clusters that were truncated */
    1994     1342490 :         for (i = blob->active.num_clusters; i < blob->active.cluster_array_size; i++) {
    1995     1336251 :                 uint32_t cluster_num = bs_lba_to_cluster(bs, blob->active.clusters[i]);
    1996             : 
    1997             :                 /* Nothing to release if it was not allocated */
    1998     1336251 :                 if (blob->active.clusters[i] != 0) {
    1999        2926 :                         bs_release_cluster(bs, cluster_num);
    2000        2926 :                 }
    2001     1336251 :         }
    2002        6239 :         spdk_spin_unlock(&bs->used_lock);
    2003             : 
    2004        6239 :         if (blob->active.num_clusters == 0) {
    2005        2423 :                 free(blob->active.clusters);
    2006        2423 :                 blob->active.clusters = NULL;
    2007        2423 :                 blob->active.cluster_array_size = 0;
    2008        6239 :         } else if (blob->active.num_clusters != blob->active.cluster_array_size) {
    2009             : #ifndef __clang_analyzer__
    2010             :                 void *tmp;
    2011             : 
    2012             :                 /* scan-build really can't figure reallocs, workaround it */
    2013          22 :                 tmp = realloc(blob->active.clusters, sizeof(*blob->active.clusters) * blob->active.num_clusters);
    2014          22 :                 assert(tmp != NULL);
    2015          22 :                 blob->active.clusters = tmp;
    2016             : 
    2017             : #endif
    2018          22 :                 blob->active.cluster_array_size = blob->active.num_clusters;
    2019          22 :         }
    2020             : 
    2021             :         /* Move on to clearing extent pages */
    2022        6239 :         blob_persist_clear_extents(seq, ctx);
    2023        6239 : }
    2024             : 
    2025             : static void
    2026        6239 : blob_persist_clear_clusters(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx)
    2027             : {
    2028        6239 :         struct spdk_blob                *blob = ctx->blob;
    2029        6239 :         struct spdk_blob_store          *bs = blob->bs;
    2030             :         spdk_bs_batch_t                 *batch;
    2031             :         size_t                          i;
    2032             :         uint64_t                        lba;
    2033             :         uint64_t                        lba_count;
    2034             : 
    2035             :         /* Clusters don't move around in blobs. The list shrinks or grows
    2036             :          * at the end, but no changes ever occur in the middle of the list.
    2037             :          */
    2038             : 
    2039        6239 :         batch = bs_sequence_to_batch(seq, blob_persist_clear_clusters_cpl, ctx);
    2040             : 
    2041             :         /* Clear all clusters that were truncated */
    2042        6239 :         lba = 0;
    2043        6239 :         lba_count = 0;
    2044     1342490 :         for (i = blob->active.num_clusters; i < blob->active.cluster_array_size; i++) {
    2045     1336251 :                 uint64_t next_lba = blob->active.clusters[i];
    2046     1336251 :                 uint64_t next_lba_count = bs_cluster_to_lba(bs, 1);
    2047             : 
    2048     1336251 :                 if (next_lba > 0 && (lba + lba_count) == next_lba) {
    2049             :                         /* This cluster is contiguous with the previous one. */
    2050        1347 :                         lba_count += next_lba_count;
    2051        1347 :                         continue;
    2052     1334904 :                 } else if (next_lba == 0) {
    2053     1333325 :                         continue;
    2054             :                 }
    2055             : 
    2056             :                 /* This cluster is not contiguous with the previous one. */
    2057             : 
    2058             :                 /* If a run of LBAs previously existing, clear them now */
    2059        1579 :                 if (lba_count > 0) {
    2060          45 :                         bs_batch_clear_dev(ctx->blob, batch, lba, lba_count);
    2061          45 :                 }
    2062             : 
    2063             :                 /* Start building the next batch */
    2064        1579 :                 lba = next_lba;
    2065        1579 :                 if (next_lba > 0) {
    2066        1579 :                         lba_count = next_lba_count;
    2067        1579 :                 } else {
    2068           0 :                         lba_count = 0;
    2069             :                 }
    2070        1579 :         }
    2071             : 
    2072             :         /* If we ended with a contiguous set of LBAs, clear them now */
    2073        6239 :         if (lba_count > 0) {
    2074        1534 :                 bs_batch_clear_dev(ctx->blob, batch, lba, lba_count);
    2075        1534 :         }
    2076             : 
    2077        6239 :         bs_batch_close(batch);
    2078        6239 : }
    2079             : 
    2080             : static void
    2081        6244 : blob_persist_zero_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2082             : {
    2083        6244 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    2084        6244 :         struct spdk_blob                *blob = ctx->blob;
    2085        6244 :         struct spdk_blob_store          *bs = blob->bs;
    2086             :         size_t                          i;
    2087             : 
    2088        6244 :         if (bserrno != 0) {
    2089           5 :                 blob_persist_complete(seq, ctx, bserrno);
    2090           5 :                 return;
    2091             :         }
    2092             : 
    2093        6239 :         spdk_spin_lock(&bs->used_lock);
    2094             : 
    2095             :         /* This loop starts at 1 because the first page is special and handled
    2096             :          * below. The pages (except the first) are never written in place,
    2097             :          * so any pages in the clean list must be zeroed.
    2098             :          */
    2099        6324 :         for (i = 1; i < blob->clean.num_pages; i++) {
    2100          85 :                 bs_release_md_page(bs, blob->clean.pages[i]);
    2101          85 :         }
    2102             : 
    2103        6239 :         if (blob->active.num_pages == 0) {
    2104             :                 uint32_t page_num;
    2105             : 
    2106        1857 :                 page_num = bs_blobid_to_page(blob->id);
    2107        1857 :                 bs_release_md_page(bs, page_num);
    2108        1857 :         }
    2109             : 
    2110        6239 :         spdk_spin_unlock(&bs->used_lock);
    2111             : 
    2112             :         /* Move on to clearing clusters */
    2113        6239 :         blob_persist_clear_clusters(seq, ctx);
    2114        6244 : }
    2115             : 
    2116             : static void
    2117        6294 : blob_persist_zero_pages(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2118             : {
    2119        6294 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    2120        6294 :         struct spdk_blob                *blob = ctx->blob;
    2121        6294 :         struct spdk_blob_store          *bs = blob->bs;
    2122             :         uint64_t                        lba;
    2123             :         uint64_t                        lba_count;
    2124             :         spdk_bs_batch_t                 *batch;
    2125             :         size_t                          i;
    2126             : 
    2127        6294 :         if (bserrno != 0) {
    2128          50 :                 blob_persist_complete(seq, ctx, bserrno);
    2129          50 :                 return;
    2130             :         }
    2131             : 
    2132        6244 :         batch = bs_sequence_to_batch(seq, blob_persist_zero_pages_cpl, ctx);
    2133             : 
    2134        6244 :         lba_count = bs_byte_to_lba(bs, bs->md_page_size);
    2135             : 
    2136             :         /* This loop starts at 1 because the first page is special and handled
    2137             :          * below. The pages (except the first) are never written in place,
    2138             :          * so any pages in the clean list must be zeroed.
    2139             :          */
    2140        6329 :         for (i = 1; i < blob->clean.num_pages; i++) {
    2141          85 :                 lba = bs_md_page_to_lba(bs, blob->clean.pages[i]);
    2142             : 
    2143          85 :                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    2144          85 :         }
    2145             : 
    2146             :         /* The first page will only be zeroed if this is a delete. */
    2147        6244 :         if (blob->active.num_pages == 0) {
    2148             :                 uint32_t page_num;
    2149             : 
    2150             :                 /* The first page in the metadata goes where the blobid indicates */
    2151        1862 :                 page_num = bs_blobid_to_page(blob->id);
    2152        1862 :                 lba = bs_md_page_to_lba(bs, page_num);
    2153             : 
    2154        1862 :                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    2155        1862 :         }
    2156             : 
    2157        6244 :         bs_batch_close(batch);
    2158        6294 : }
    2159             : 
    2160             : static void
    2161        4432 : blob_persist_write_page_root(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2162             : {
    2163        4432 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    2164        4432 :         struct spdk_blob                *blob = ctx->blob;
    2165        4432 :         struct spdk_blob_store          *bs = blob->bs;
    2166             :         uint64_t                        lba;
    2167             :         uint32_t                        lba_count;
    2168             :         struct spdk_blob_md_page        *page;
    2169             : 
    2170        4432 :         if (bserrno != 0) {
    2171           0 :                 blob_persist_complete(seq, ctx, bserrno);
    2172           0 :                 return;
    2173             :         }
    2174             : 
    2175        4432 :         if (blob->active.num_pages == 0) {
    2176             :                 /* Move on to the next step */
    2177           0 :                 blob_persist_zero_pages(seq, ctx, 0);
    2178           0 :                 return;
    2179             :         }
    2180             : 
    2181        4432 :         lba_count = bs_byte_to_lba(bs, bs->md_page_size);
    2182             : 
    2183        4432 :         page = &ctx->pages[0];
    2184             :         /* The first page in the metadata goes where the blobid indicates */
    2185        4432 :         lba = bs_md_page_to_lba(bs, bs_blobid_to_page(blob->id));
    2186             : 
    2187        8864 :         bs_sequence_write_dev(seq, page, lba, lba_count,
    2188        4432 :                               blob_persist_zero_pages, ctx);
    2189        4432 : }
    2190             : 
    2191             : static void
    2192        4432 : blob_persist_write_page_chain(spdk_bs_sequence_t *seq, struct spdk_blob_persist_ctx *ctx)
    2193             : {
    2194        4432 :         struct spdk_blob                *blob = ctx->blob;
    2195        4432 :         struct spdk_blob_store          *bs = blob->bs;
    2196             :         uint64_t                        lba;
    2197             :         uint32_t                        lba_count;
    2198             :         struct spdk_blob_md_page        *page;
    2199             :         spdk_bs_batch_t                 *batch;
    2200             :         size_t                          i;
    2201             : 
    2202             :         /* Clusters don't move around in blobs. The list shrinks or grows
    2203             :          * at the end, but no changes ever occur in the middle of the list.
    2204             :          */
    2205             : 
    2206        4432 :         lba_count = bs_byte_to_lba(bs, sizeof(*page));
    2207             : 
    2208        4432 :         batch = bs_sequence_to_batch(seq, blob_persist_write_page_root, ctx);
    2209             : 
    2210             :         /* This starts at 1. The root page is not written until
    2211             :          * all of the others are finished
    2212             :          */
    2213        4541 :         for (i = 1; i < blob->active.num_pages; i++) {
    2214         109 :                 page = &ctx->pages[i];
    2215         109 :                 assert(page->sequence_num == i);
    2216             : 
    2217         109 :                 lba = bs_md_page_to_lba(bs, blob->active.pages[i]);
    2218             : 
    2219         109 :                 bs_batch_write_dev(batch, page, lba, lba_count);
    2220         109 :         }
    2221             : 
    2222        4432 :         bs_batch_close(batch);
    2223        4432 : }
    2224             : 
    2225             : static int
    2226        4465 : blob_resize(struct spdk_blob *blob, uint64_t sz)
    2227             : {
    2228             :         uint64_t        i;
    2229             :         uint64_t        *tmp;
    2230             :         uint64_t        cluster;
    2231             :         uint32_t        lfmd; /*  lowest free md page */
    2232             :         uint64_t        num_clusters;
    2233             :         uint32_t        *ep_tmp;
    2234        4465 :         uint64_t        new_num_ep = 0, current_num_ep = 0;
    2235             :         struct spdk_blob_store *bs;
    2236             :         int             rc;
    2237             : 
    2238        4465 :         bs = blob->bs;
    2239             : 
    2240        4465 :         blob_verify_md_op(blob);
    2241             : 
    2242        4465 :         if (blob->active.num_clusters == sz) {
    2243         566 :                 return 0;
    2244             :         }
    2245             : 
    2246        3899 :         if (blob->active.num_clusters < blob->active.cluster_array_size) {
    2247             :                 /* If this blob was resized to be larger, then smaller, then
    2248             :                  * larger without syncing, then the cluster array already
    2249             :                  * contains spare assigned clusters we can use.
    2250             :                  */
    2251           0 :                 num_clusters = spdk_min(blob->active.cluster_array_size,
    2252             :                                         sz);
    2253           0 :         } else {
    2254        3899 :                 num_clusters = blob->active.num_clusters;
    2255             :         }
    2256             : 
    2257        3899 :         if (blob->use_extent_table) {
    2258             :                 /* Round up since every cluster beyond current Extent Table size,
    2259             :                  * requires new extent page. */
    2260        2359 :                 new_num_ep = spdk_divide_round_up(sz, SPDK_EXTENTS_PER_EP);
    2261        2359 :                 current_num_ep = spdk_divide_round_up(num_clusters, SPDK_EXTENTS_PER_EP);
    2262        2359 :         }
    2263             : 
    2264        3899 :         assert(!spdk_spin_held(&bs->used_lock));
    2265             : 
    2266             :         /* Check first that we have enough clusters and md pages before we start claiming them.
    2267             :          * bs->used_lock is held to ensure that clusters we think are free are still free when we go
    2268             :          * to claim them later in this function.
    2269             :          */
    2270        3899 :         if (sz > num_clusters && spdk_blob_is_thin_provisioned(blob) == false) {
    2271        1624 :                 spdk_spin_lock(&bs->used_lock);
    2272        1624 :                 if ((sz - num_clusters) > bs->num_free_clusters) {
    2273          10 :                         rc = -ENOSPC;
    2274          10 :                         goto out;
    2275             :                 }
    2276        1614 :                 lfmd = 0;
    2277        2572 :                 for (i = current_num_ep; i < new_num_ep ; i++) {
    2278         958 :                         lfmd = spdk_bit_array_find_first_clear(blob->bs->used_md_pages, lfmd);
    2279         958 :                         if (lfmd == UINT32_MAX) {
    2280             :                                 /* No more free md pages. Cannot satisfy the request */
    2281           0 :                                 rc = -ENOSPC;
    2282           0 :                                 goto out;
    2283             :                         }
    2284         958 :                 }
    2285        1614 :         }
    2286             : 
    2287        3889 :         if (sz > num_clusters) {
    2288             :                 /* Expand the cluster array if necessary.
    2289             :                  * We only shrink the array when persisting.
    2290             :                  */
    2291        2130 :                 tmp = realloc(blob->active.clusters, sizeof(*blob->active.clusters) * sz);
    2292        2130 :                 if (sz > 0 && tmp == NULL) {
    2293           0 :                         rc = -ENOMEM;
    2294           0 :                         goto out;
    2295             :                 }
    2296        4260 :                 memset(tmp + blob->active.cluster_array_size, 0,
    2297        2130 :                        sizeof(*blob->active.clusters) * (sz - blob->active.cluster_array_size));
    2298        2130 :                 blob->active.clusters = tmp;
    2299        2130 :                 blob->active.cluster_array_size = sz;
    2300             : 
    2301             :                 /* Expand the extents table, only if enough clusters were added */
    2302        2130 :                 if (new_num_ep > current_num_ep && blob->use_extent_table) {
    2303        1255 :                         ep_tmp = realloc(blob->active.extent_pages, sizeof(*blob->active.extent_pages) * new_num_ep);
    2304        1255 :                         if (new_num_ep > 0 && ep_tmp == NULL) {
    2305           0 :                                 rc = -ENOMEM;
    2306           0 :                                 goto out;
    2307             :                         }
    2308        2510 :                         memset(ep_tmp + blob->active.extent_pages_array_size, 0,
    2309        1255 :                                sizeof(*blob->active.extent_pages) * (new_num_ep - blob->active.extent_pages_array_size));
    2310        1255 :                         blob->active.extent_pages = ep_tmp;
    2311        1255 :                         blob->active.extent_pages_array_size = new_num_ep;
    2312        1255 :                 }
    2313        2130 :         }
    2314             : 
    2315        3889 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    2316             : 
    2317        3889 :         if (spdk_blob_is_thin_provisioned(blob) == false) {
    2318        3028 :                 cluster = 0;
    2319        3028 :                 lfmd = 0;
    2320       12281 :                 for (i = num_clusters; i < sz; i++) {
    2321        9253 :                         bs_allocate_cluster(blob, i, &cluster, &lfmd, true);
    2322             :                         /* Do not increment lfmd here.  lfmd will get updated
    2323             :                          * to the md_page allocated (if any) when a new extent
    2324             :                          * page is needed.  Just pass that value again,
    2325             :                          * bs_allocate_cluster will just start at that index
    2326             :                          * to find the next free md_page when needed.
    2327             :                          */
    2328        9253 :                 }
    2329        3028 :         }
    2330             : 
    2331             :         /* If we are shrinking the blob, we must adjust num_allocated_clusters */
    2332     1340190 :         for (i = sz; i < num_clusters; i++) {
    2333     1336301 :                 if (blob->active.clusters[i] != 0) {
    2334        2926 :                         blob->active.num_allocated_clusters--;
    2335        2926 :                 }
    2336     1336301 :         }
    2337             : 
    2338        3889 :         blob->active.num_clusters = sz;
    2339        3889 :         blob->active.num_extent_pages = new_num_ep;
    2340             : 
    2341        3889 :         rc = 0;
    2342             : out:
    2343        3899 :         if (spdk_spin_held(&bs->used_lock)) {
    2344        1624 :                 spdk_spin_unlock(&bs->used_lock);
    2345        1624 :         }
    2346             : 
    2347        3899 :         return rc;
    2348        4465 : }
    2349             : 
    2350             : static void
    2351        4432 : blob_persist_generate_new_md(struct spdk_blob_persist_ctx *ctx)
    2352             : {
    2353        4432 :         spdk_bs_sequence_t *seq = ctx->seq;
    2354        4432 :         struct spdk_blob *blob = ctx->blob;
    2355        4432 :         struct spdk_blob_store *bs = blob->bs;
    2356             :         uint64_t i;
    2357             :         uint32_t page_num;
    2358             :         void *tmp;
    2359             :         int rc;
    2360             : 
    2361             :         /* Generate the new metadata */
    2362        4432 :         rc = blob_serialize(blob, &ctx->pages, &blob->active.num_pages);
    2363        4432 :         if (rc < 0) {
    2364           0 :                 blob_persist_complete(seq, ctx, rc);
    2365           0 :                 return;
    2366             :         }
    2367             : 
    2368        4432 :         assert(blob->active.num_pages >= 1);
    2369             : 
    2370             :         /* Resize the cache of page indices */
    2371        4432 :         tmp = realloc(blob->active.pages, blob->active.num_pages * sizeof(*blob->active.pages));
    2372        4432 :         if (!tmp) {
    2373           0 :                 blob_persist_complete(seq, ctx, -ENOMEM);
    2374           0 :                 return;
    2375             :         }
    2376        4432 :         blob->active.pages = tmp;
    2377             : 
    2378             :         /* Assign this metadata to pages. This requires two passes - one to verify that there are
    2379             :          * enough pages and a second to actually claim them. The used_lock is held across
    2380             :          * both passes to ensure things don't change in the middle.
    2381             :          */
    2382        4432 :         spdk_spin_lock(&bs->used_lock);
    2383        4432 :         page_num = 0;
    2384             :         /* Note that this loop starts at one. The first page location is fixed by the blobid. */
    2385        4541 :         for (i = 1; i < blob->active.num_pages; i++) {
    2386         109 :                 page_num = spdk_bit_array_find_first_clear(bs->used_md_pages, page_num);
    2387         109 :                 if (page_num == UINT32_MAX) {
    2388           0 :                         spdk_spin_unlock(&bs->used_lock);
    2389           0 :                         blob_persist_complete(seq, ctx, -ENOMEM);
    2390           0 :                         return;
    2391             :                 }
    2392         109 :                 page_num++;
    2393         109 :         }
    2394             : 
    2395        4432 :         page_num = 0;
    2396        4432 :         blob->active.pages[0] = bs_blobid_to_page(blob->id);
    2397        4541 :         for (i = 1; i < blob->active.num_pages; i++) {
    2398         109 :                 page_num = spdk_bit_array_find_first_clear(bs->used_md_pages, page_num);
    2399         109 :                 ctx->pages[i - 1].next = page_num;
    2400             :                 /* Now that previous metadata page is complete, calculate the crc for it. */
    2401         109 :                 ctx->pages[i - 1].crc = blob_md_page_calc_crc(&ctx->pages[i - 1]);
    2402         109 :                 blob->active.pages[i] = page_num;
    2403         109 :                 bs_claim_md_page(bs, page_num);
    2404         109 :                 SPDK_DEBUGLOG(blob, "Claiming page %u for blob 0x%" PRIx64 "\n", page_num,
    2405             :                               blob->id);
    2406         109 :                 page_num++;
    2407         109 :         }
    2408        4432 :         spdk_spin_unlock(&bs->used_lock);
    2409        4432 :         ctx->pages[i - 1].crc = blob_md_page_calc_crc(&ctx->pages[i - 1]);
    2410             :         /* Start writing the metadata from last page to first */
    2411        4432 :         blob->state = SPDK_BLOB_STATE_CLEAN;
    2412        4432 :         blob_persist_write_page_chain(seq, ctx);
    2413        4432 : }
    2414             : 
    2415             : static void
    2416        3108 : blob_persist_write_extent_pages(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2417             : {
    2418        3108 :         struct spdk_blob_persist_ctx    *ctx = cb_arg;
    2419        3108 :         struct spdk_blob                *blob = ctx->blob;
    2420             :         size_t                          i;
    2421             :         uint32_t                        extent_page_id;
    2422        3108 :         uint32_t                        page_count = 0;
    2423             :         int                             rc;
    2424             : 
    2425        3108 :         if (ctx->extent_page != NULL) {
    2426         991 :                 spdk_free(ctx->extent_page);
    2427         991 :                 ctx->extent_page = NULL;
    2428         991 :         }
    2429             : 
    2430        3108 :         if (bserrno != 0) {
    2431           0 :                 blob_persist_complete(seq, ctx, bserrno);
    2432           0 :                 return;
    2433             :         }
    2434             : 
    2435             :         /* Only write out Extent Pages when blob was resized. */
    2436        6492 :         for (i = ctx->next_extent_page; i < blob->active.extent_pages_array_size; i++) {
    2437        4375 :                 extent_page_id = blob->active.extent_pages[i];
    2438        4375 :                 if (extent_page_id == 0) {
    2439             :                         /* No Extent Page to persist */
    2440        3384 :                         assert(spdk_blob_is_thin_provisioned(blob));
    2441        3384 :                         continue;
    2442             :                 }
    2443         991 :                 assert(spdk_bit_array_get(blob->bs->used_md_pages, extent_page_id));
    2444         991 :                 ctx->next_extent_page = i + 1;
    2445         991 :                 rc = blob_serialize_add_page(ctx->blob, &ctx->extent_page, &page_count, &ctx->extent_page);
    2446         991 :                 if (rc < 0) {
    2447           0 :                         blob_persist_complete(seq, ctx, rc);
    2448           0 :                         return;
    2449             :                 }
    2450             : 
    2451         991 :                 blob->state = SPDK_BLOB_STATE_DIRTY;
    2452         991 :                 blob_serialize_extent_page(blob, i * SPDK_EXTENTS_PER_EP, ctx->extent_page);
    2453             : 
    2454         991 :                 ctx->extent_page->crc = blob_md_page_calc_crc(ctx->extent_page);
    2455             : 
    2456        1982 :                 bs_sequence_write_dev(seq, ctx->extent_page, bs_md_page_to_lba(blob->bs, extent_page_id),
    2457         991 :                                       bs_byte_to_lba(blob->bs, blob->bs->md_page_size),
    2458         991 :                                       blob_persist_write_extent_pages, ctx);
    2459         991 :                 return;
    2460             :         }
    2461             : 
    2462        2117 :         blob_persist_generate_new_md(ctx);
    2463        3108 : }
    2464             : 
    2465             : static void
    2466        6304 : blob_persist_start(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2467             : {
    2468        6304 :         struct spdk_blob_persist_ctx *ctx = cb_arg;
    2469        6304 :         struct spdk_blob *blob = ctx->blob;
    2470             : 
    2471        6304 :         if (bserrno != 0) {
    2472          10 :                 blob_persist_complete(seq, ctx, bserrno);
    2473          10 :                 return;
    2474             :         }
    2475             : 
    2476        6294 :         if (blob->active.num_pages == 0) {
    2477             :                 /* This is the signal that the blob should be deleted.
    2478             :                  * Immediately jump to the clean up routine. */
    2479        1862 :                 assert(blob->clean.num_pages > 0);
    2480        1862 :                 blob->state = SPDK_BLOB_STATE_CLEAN;
    2481        1862 :                 blob_persist_zero_pages(seq, ctx, 0);
    2482        1862 :                 return;
    2483             : 
    2484             :         }
    2485             : 
    2486        4432 :         if (blob->clean.num_clusters < blob->active.num_clusters) {
    2487             :                 /* Blob was resized up */
    2488        2095 :                 assert(blob->clean.num_extent_pages <= blob->active.num_extent_pages);
    2489        2095 :                 ctx->next_extent_page = spdk_max(1, blob->clean.num_extent_pages) - 1;
    2490        4432 :         } else if (blob->active.num_clusters < blob->active.cluster_array_size) {
    2491             :                 /* Blob was resized down */
    2492          22 :                 assert(blob->clean.num_extent_pages >= blob->active.num_extent_pages);
    2493          22 :                 ctx->next_extent_page = spdk_max(1, blob->active.num_extent_pages) - 1;
    2494          22 :         } else {
    2495             :                 /* No change in size occurred */
    2496        2315 :                 blob_persist_generate_new_md(ctx);
    2497        2315 :                 return;
    2498             :         }
    2499             : 
    2500        2117 :         blob_persist_write_extent_pages(seq, ctx, 0);
    2501        6304 : }
    2502             : 
    2503             : struct spdk_bs_mark_dirty {
    2504             :         struct spdk_blob_store          *bs;
    2505             :         struct spdk_bs_super_block      *super;
    2506             :         spdk_bs_sequence_cpl            cb_fn;
    2507             :         void                            *cb_arg;
    2508             : };
    2509             : 
    2510             : static void
    2511         197 : bs_mark_dirty_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2512             : {
    2513         197 :         struct spdk_bs_mark_dirty *ctx = cb_arg;
    2514             : 
    2515         197 :         if (bserrno == 0) {
    2516         187 :                 ctx->bs->clean = 0;
    2517         187 :         }
    2518             : 
    2519         197 :         ctx->cb_fn(seq, ctx->cb_arg, bserrno);
    2520             : 
    2521         197 :         spdk_free(ctx->super);
    2522         197 :         free(ctx);
    2523         197 : }
    2524             : 
    2525             : static void bs_write_super(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
    2526             :                            struct spdk_bs_super_block *super, spdk_bs_sequence_cpl cb_fn, void *cb_arg);
    2527             : 
    2528             : 
    2529             : static void
    2530         197 : bs_mark_dirty_write(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2531             : {
    2532         197 :         struct spdk_bs_mark_dirty *ctx = cb_arg;
    2533             :         int rc;
    2534             : 
    2535         197 :         if (bserrno != 0) {
    2536           5 :                 bs_mark_dirty_write_cpl(seq, ctx, bserrno);
    2537           5 :                 return;
    2538             :         }
    2539             : 
    2540         192 :         rc = bs_super_validate(ctx->super, ctx->bs);
    2541         192 :         if (rc != 0) {
    2542           0 :                 bs_mark_dirty_write_cpl(seq, ctx, rc);
    2543           0 :                 return;
    2544             :         }
    2545             : 
    2546         192 :         ctx->super->clean = 0;
    2547         192 :         if (ctx->super->size == 0) {
    2548           5 :                 ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    2549           5 :         }
    2550             : 
    2551         192 :         bs_write_super(seq, ctx->bs, ctx->super, bs_mark_dirty_write_cpl, ctx);
    2552         197 : }
    2553             : 
    2554             : static void
    2555        6961 : bs_mark_dirty(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
    2556             :               spdk_bs_sequence_cpl cb_fn, void *cb_arg)
    2557             : {
    2558             :         struct spdk_bs_mark_dirty *ctx;
    2559             : 
    2560             :         /* Blobstore is already marked dirty */
    2561        6961 :         if (bs->clean == 0) {
    2562        6764 :                 cb_fn(seq, cb_arg, 0);
    2563        6764 :                 return;
    2564             :         }
    2565             : 
    2566         197 :         ctx = calloc(1, sizeof(*ctx));
    2567         197 :         if (!ctx) {
    2568           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    2569           0 :                 return;
    2570             :         }
    2571         197 :         ctx->bs = bs;
    2572         197 :         ctx->cb_fn = cb_fn;
    2573         197 :         ctx->cb_arg = cb_arg;
    2574             : 
    2575         197 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    2576             :                                   SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    2577         197 :         if (!ctx->super) {
    2578           0 :                 free(ctx);
    2579           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    2580           0 :                 return;
    2581             :         }
    2582             : 
    2583         394 :         bs_sequence_read_dev(seq, ctx->super, bs_page_to_lba(bs, 0),
    2584         197 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    2585         197 :                              bs_mark_dirty_write, ctx);
    2586        6961 : }
    2587             : 
    2588             : /* Write a blob to disk */
    2589             : static void
    2590       11346 : blob_persist(spdk_bs_sequence_t *seq, struct spdk_blob *blob,
    2591             :              spdk_bs_sequence_cpl cb_fn, void *cb_arg)
    2592             : {
    2593             :         struct spdk_blob_persist_ctx *ctx;
    2594             : 
    2595       11346 :         blob_verify_md_op(blob);
    2596             : 
    2597       11346 :         if (blob->state == SPDK_BLOB_STATE_CLEAN && TAILQ_EMPTY(&blob->persists_to_complete)) {
    2598        5042 :                 cb_fn(seq, cb_arg, 0);
    2599        5042 :                 return;
    2600             :         }
    2601             : 
    2602        6304 :         ctx = calloc(1, sizeof(*ctx));
    2603        6304 :         if (!ctx) {
    2604           0 :                 cb_fn(seq, cb_arg, -ENOMEM);
    2605           0 :                 return;
    2606             :         }
    2607        6304 :         ctx->blob = blob;
    2608        6304 :         ctx->seq = seq;
    2609        6304 :         ctx->cb_fn = cb_fn;
    2610        6304 :         ctx->cb_arg = cb_arg;
    2611             : 
    2612             :         /* Multiple blob persists can affect one another, via blob->state or
    2613             :          * blob mutable data changes. To prevent it, queue up the persists. */
    2614        6304 :         if (!TAILQ_EMPTY(&blob->persists_to_complete)) {
    2615          28 :                 TAILQ_INSERT_TAIL(&blob->pending_persists, ctx, link);
    2616          28 :                 return;
    2617             :         }
    2618        6276 :         TAILQ_INSERT_HEAD(&blob->persists_to_complete, ctx, link);
    2619             : 
    2620        6276 :         bs_mark_dirty(seq, blob->bs, blob_persist_start, ctx);
    2621       11346 : }
    2622             : 
    2623             : struct spdk_blob_copy_cluster_ctx {
    2624             :         struct spdk_blob *blob;
    2625             :         uint8_t *buf;
    2626             :         uint64_t io_unit;
    2627             :         uint64_t new_cluster;
    2628             :         uint32_t new_extent_page;
    2629             :         spdk_bs_sequence_t *seq;
    2630             :         struct spdk_blob_md_page *new_cluster_page;
    2631             : };
    2632             : 
    2633             : struct spdk_blob_free_cluster_ctx {
    2634             :         struct spdk_blob *blob;
    2635             :         uint64_t page;
    2636             :         struct spdk_blob_md_page *md_page;
    2637             :         uint64_t cluster_num;
    2638             :         uint32_t extent_page;
    2639             :         spdk_bs_sequence_t *seq;
    2640             : };
    2641             : 
    2642             : static void
    2643        1025 : blob_allocate_and_copy_cluster_cpl(void *cb_arg, int bserrno)
    2644             : {
    2645        1025 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2646        1025 :         struct spdk_bs_request_set *set = (struct spdk_bs_request_set *)ctx->seq;
    2647             :         TAILQ_HEAD(, spdk_bs_request_set) requests;
    2648             :         spdk_bs_user_op_t *op;
    2649             : 
    2650        1025 :         TAILQ_INIT(&requests);
    2651        1025 :         TAILQ_SWAP(&set->channel->need_cluster_alloc, &requests, spdk_bs_request_set, link);
    2652             : 
    2653        2050 :         while (!TAILQ_EMPTY(&requests)) {
    2654        1025 :                 op = TAILQ_FIRST(&requests);
    2655        1025 :                 TAILQ_REMOVE(&requests, op, link);
    2656        1025 :                 if (bserrno == 0) {
    2657        1025 :                         bs_user_op_execute(op);
    2658        1025 :                 } else {
    2659           0 :                         bs_user_op_abort(op, bserrno);
    2660             :                 }
    2661             :         }
    2662             : 
    2663        1025 :         spdk_free(ctx->buf);
    2664        1025 :         free(ctx);
    2665        1025 : }
    2666             : 
    2667             : static void
    2668          75 : blob_free_cluster_cpl(void *cb_arg, int bserrno)
    2669             : {
    2670          75 :         struct spdk_blob_free_cluster_ctx *ctx = cb_arg;
    2671          75 :         spdk_bs_sequence_t *seq = ctx->seq;
    2672             : 
    2673          75 :         bs_sequence_finish(seq, bserrno);
    2674             : 
    2675          75 :         free(ctx);
    2676          75 : }
    2677             : 
    2678             : static void
    2679           5 : blob_insert_cluster_revert(struct spdk_blob_copy_cluster_ctx *ctx)
    2680             : {
    2681           5 :         spdk_spin_lock(&ctx->blob->bs->used_lock);
    2682           5 :         bs_release_cluster(ctx->blob->bs, ctx->new_cluster);
    2683           5 :         if (ctx->new_extent_page != 0) {
    2684           3 :                 bs_release_md_page(ctx->blob->bs, ctx->new_extent_page);
    2685           3 :         }
    2686           5 :         spdk_spin_unlock(&ctx->blob->bs->used_lock);
    2687           5 : }
    2688             : 
    2689             : static void
    2690           5 : blob_insert_cluster_clear_cpl(void *cb_arg, int bserrno)
    2691             : {
    2692           5 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2693             : 
    2694           5 :         if (bserrno) {
    2695           0 :                 SPDK_WARNLOG("Failed to clear cluster: %d\n", bserrno);
    2696           0 :         }
    2697             : 
    2698           5 :         blob_insert_cluster_revert(ctx);
    2699           5 :         bs_sequence_finish(ctx->seq, bserrno);
    2700           5 : }
    2701             : 
    2702             : static void
    2703           5 : blob_insert_cluster_clear(struct spdk_blob_copy_cluster_ctx *ctx)
    2704             : {
    2705             :         struct spdk_bs_cpl cpl;
    2706             :         spdk_bs_batch_t *batch;
    2707           5 :         struct spdk_io_channel *ch = spdk_io_channel_from_ctx(ctx->seq->channel);
    2708             : 
    2709             :         /*
    2710             :          * We allocated a cluster and we copied data to it. But now, we realized that we don't need
    2711             :          * this cluster and we want to release it. We must ensure that we clear the data on this
    2712             :          * cluster.
    2713             :          * The cluster may later be re-allocated by a thick-provisioned blob for example. When
    2714             :          * reading from this thick-provisioned blob before writing data, we should read zeroes.
    2715             :          */
    2716             : 
    2717           5 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    2718           5 :         cpl.u.blob_basic.cb_fn = blob_insert_cluster_clear_cpl;
    2719           5 :         cpl.u.blob_basic.cb_arg = ctx;
    2720             : 
    2721           5 :         batch = bs_batch_open(ch, &cpl, ctx->blob);
    2722           5 :         if (!batch) {
    2723           0 :                 blob_insert_cluster_clear_cpl(ctx, -ENOMEM);
    2724           0 :                 return;
    2725             :         }
    2726             : 
    2727          10 :         bs_batch_clear_dev(ctx->blob, batch, bs_cluster_to_lba(ctx->blob->bs, ctx->new_cluster),
    2728           5 :                            bs_cluster_to_lba(ctx->blob->bs, 1));
    2729           5 :         bs_batch_close(batch);
    2730           5 : }
    2731             : 
    2732             : static void
    2733        1025 : blob_insert_cluster_cpl(void *cb_arg, int bserrno)
    2734             : {
    2735        1025 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2736             : 
    2737        1025 :         if (bserrno) {
    2738           5 :                 if (bserrno == -EEXIST) {
    2739             :                         /* The metadata insert failed because another thread
    2740             :                          * allocated the cluster first. Clear and free our cluster
    2741             :                          * but continue without error. */
    2742           5 :                         blob_insert_cluster_clear(ctx);
    2743           5 :                         return;
    2744             :                 }
    2745             : 
    2746           0 :                 blob_insert_cluster_revert(ctx);
    2747           0 :         }
    2748             : 
    2749        1020 :         bs_sequence_finish(ctx->seq, bserrno);
    2750        1025 : }
    2751             : 
    2752             : static void
    2753         515 : blob_write_copy_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2754             : {
    2755         515 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2756             :         uint32_t cluster_number;
    2757             : 
    2758         515 :         if (bserrno) {
    2759             :                 /* The write failed, so jump to the final completion handler */
    2760           0 :                 bs_sequence_finish(seq, bserrno);
    2761           0 :                 return;
    2762             :         }
    2763             : 
    2764         515 :         cluster_number = bs_io_unit_to_cluster(ctx->blob->bs, ctx->io_unit);
    2765             : 
    2766        1030 :         blob_insert_cluster_on_md_thread(ctx->blob, cluster_number, ctx->new_cluster,
    2767         515 :                                          ctx->new_extent_page, ctx->new_cluster_page, blob_insert_cluster_cpl, ctx);
    2768         515 : }
    2769             : 
    2770             : static void
    2771         385 : blob_write_copy(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    2772             : {
    2773         385 :         struct spdk_blob_copy_cluster_ctx *ctx = cb_arg;
    2774             : 
    2775         385 :         if (bserrno != 0) {
    2776             :                 /* The read failed, so jump to the final completion handler */
    2777           0 :                 bs_sequence_finish(seq, bserrno);
    2778           0 :                 return;
    2779             :         }
    2780             : 
    2781             :         /* Write whole cluster */
    2782         770 :         bs_sequence_write_dev(seq, ctx->buf,
    2783         385 :                               bs_cluster_to_lba(ctx->blob->bs, ctx->new_cluster),
    2784         385 :                               bs_cluster_to_lba(ctx->blob->bs, 1),
    2785         385 :                               blob_write_copy_cpl, ctx);
    2786         385 : }
    2787             : 
    2788             : static bool
    2789        1005 : blob_can_copy(struct spdk_blob *blob, uint64_t cluster_start_io_unit, uint64_t *base_lba)
    2790             : {
    2791        1005 :         uint64_t lba = bs_dev_io_unit_to_lba(blob, blob->back_bs_dev, cluster_start_io_unit);
    2792             : 
    2793        1359 :         return (!blob_is_esnap_clone(blob) && blob->bs->dev->copy != NULL) &&
    2794         354 :                blob->back_bs_dev->translate_lba(blob->back_bs_dev, lba, base_lba);
    2795             : }
    2796             : 
    2797             : static void
    2798         130 : blob_copy(struct spdk_blob_copy_cluster_ctx *ctx, spdk_bs_user_op_t *op, uint64_t src_lba)
    2799             : {
    2800         130 :         struct spdk_blob *blob = ctx->blob;
    2801         130 :         uint64_t lba_count = bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz);
    2802             : 
    2803         260 :         bs_sequence_copy_dev(ctx->seq,
    2804         130 :                              bs_cluster_to_lba(blob->bs, ctx->new_cluster),
    2805         130 :                              src_lba,
    2806         130 :                              lba_count,
    2807         130 :                              blob_write_copy_cpl, ctx);
    2808         130 : }
    2809             : 
    2810             : static void
    2811        1025 : bs_allocate_and_copy_cluster(struct spdk_blob *blob,
    2812             :                              struct spdk_io_channel *_ch,
    2813             :                              uint64_t io_unit, spdk_bs_user_op_t *op)
    2814             : {
    2815             :         struct spdk_bs_cpl cpl;
    2816             :         struct spdk_bs_channel *ch;
    2817             :         struct spdk_blob_copy_cluster_ctx *ctx;
    2818             :         uint64_t cluster_start_io_unit;
    2819             :         uint32_t cluster_number;
    2820             :         bool is_zeroes;
    2821             :         bool can_copy;
    2822             :         bool is_valid_range;
    2823             :         uint64_t copy_src_lba;
    2824             :         int rc;
    2825             : 
    2826        1025 :         ch = spdk_io_channel_get_ctx(_ch);
    2827             : 
    2828        1025 :         if (!TAILQ_EMPTY(&ch->need_cluster_alloc)) {
    2829             :                 /* There are already operations pending. Queue this user op
    2830             :                  * and return because it will be re-executed when the outstanding
    2831             :                  * cluster allocation completes. */
    2832           0 :                 TAILQ_INSERT_TAIL(&ch->need_cluster_alloc, op, link);
    2833           0 :                 return;
    2834             :         }
    2835             : 
    2836             :         /* Round the io_unit offset down to the first io_unit in the cluster */
    2837        1025 :         cluster_start_io_unit = bs_io_unit_to_cluster_start(blob, io_unit);
    2838             : 
    2839             :         /* Calculate which index in the metadata cluster array the corresponding
    2840             :          * cluster is supposed to be at. */
    2841        1025 :         cluster_number = bs_io_unit_to_cluster_number(blob, io_unit);
    2842             : 
    2843        1025 :         ctx = calloc(1, sizeof(*ctx));
    2844        1025 :         if (!ctx) {
    2845           0 :                 bs_user_op_abort(op, -ENOMEM);
    2846           0 :                 return;
    2847             :         }
    2848             : 
    2849        1025 :         assert(blob->bs->cluster_sz % blob->back_bs_dev->blocklen == 0);
    2850             : 
    2851        1025 :         ctx->blob = blob;
    2852        1025 :         ctx->io_unit = cluster_start_io_unit;
    2853        1025 :         ctx->new_cluster_page = ch->new_cluster_page;
    2854        1025 :         memset(ctx->new_cluster_page, 0, blob->bs->md_page_size);
    2855             : 
    2856             :         /* Check if the cluster that we intend to do CoW for is valid for
    2857             :          * the backing dev. For zeroes backing dev, it'll be always valid.
    2858             :          * For other backing dev e.g. a snapshot, it could be invalid if
    2859             :          * the blob has been resized after snapshot was taken. */
    2860        2050 :         is_valid_range = blob->back_bs_dev->is_range_valid(blob->back_bs_dev,
    2861        1025 :                          bs_dev_io_unit_to_lba(blob, blob->back_bs_dev, cluster_start_io_unit),
    2862        1025 :                          bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz));
    2863             : 
    2864        1025 :         can_copy = is_valid_range && blob_can_copy(blob, cluster_start_io_unit, &copy_src_lba);
    2865             : 
    2866        1025 :         is_zeroes = is_valid_range && blob->back_bs_dev->is_zeroes(blob->back_bs_dev,
    2867        1005 :                         bs_dev_io_unit_to_lba(blob, blob->back_bs_dev, cluster_start_io_unit),
    2868        1005 :                         bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz));
    2869        1025 :         if (blob->parent_id != SPDK_BLOBID_INVALID && !is_zeroes && !can_copy) {
    2870         385 :                 ctx->buf = spdk_malloc(blob->bs->cluster_sz, blob->back_bs_dev->blocklen,
    2871             :                                        NULL, SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    2872         385 :                 if (!ctx->buf) {
    2873           0 :                         SPDK_ERRLOG("DMA allocation for cluster of size = %" PRIu32 " failed.\n",
    2874             :                                     blob->bs->cluster_sz);
    2875           0 :                         free(ctx);
    2876           0 :                         bs_user_op_abort(op, -ENOMEM);
    2877           0 :                         return;
    2878             :                 }
    2879         385 :         }
    2880             : 
    2881        1025 :         spdk_spin_lock(&blob->bs->used_lock);
    2882        1025 :         rc = bs_allocate_cluster(blob, cluster_number, &ctx->new_cluster, &ctx->new_extent_page,
    2883             :                                  false);
    2884        1025 :         spdk_spin_unlock(&blob->bs->used_lock);
    2885        1025 :         if (rc != 0) {
    2886           0 :                 spdk_free(ctx->buf);
    2887           0 :                 free(ctx);
    2888           0 :                 bs_user_op_abort(op, rc);
    2889           0 :                 return;
    2890             :         }
    2891             : 
    2892        1025 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    2893        1025 :         cpl.u.blob_basic.cb_fn = blob_allocate_and_copy_cluster_cpl;
    2894        1025 :         cpl.u.blob_basic.cb_arg = ctx;
    2895             : 
    2896        1025 :         ctx->seq = bs_sequence_start_blob(_ch, &cpl, blob);
    2897        1025 :         if (!ctx->seq) {
    2898           0 :                 spdk_spin_lock(&blob->bs->used_lock);
    2899           0 :                 bs_release_cluster(blob->bs, ctx->new_cluster);
    2900           0 :                 spdk_spin_unlock(&blob->bs->used_lock);
    2901           0 :                 spdk_free(ctx->buf);
    2902           0 :                 free(ctx);
    2903           0 :                 bs_user_op_abort(op, -ENOMEM);
    2904           0 :                 return;
    2905             :         }
    2906             : 
    2907             :         /* Queue the user op to block other incoming operations */
    2908        1025 :         TAILQ_INSERT_TAIL(&ch->need_cluster_alloc, op, link);
    2909             : 
    2910        1025 :         if (blob->parent_id != SPDK_BLOBID_INVALID && !is_zeroes) {
    2911         515 :                 if (can_copy) {
    2912         130 :                         blob_copy(ctx, op, copy_src_lba);
    2913         130 :                 } else {
    2914             :                         /* Read cluster from backing device */
    2915         770 :                         bs_sequence_read_bs_dev(ctx->seq, blob->back_bs_dev, ctx->buf,
    2916         385 :                                                 bs_dev_io_unit_to_lba(blob, blob->back_bs_dev, cluster_start_io_unit),
    2917         385 :                                                 bs_dev_byte_to_lba(blob->back_bs_dev, blob->bs->cluster_sz),
    2918         385 :                                                 blob_write_copy, ctx);
    2919             :                 }
    2920             : 
    2921         515 :         } else {
    2922        1020 :                 blob_insert_cluster_on_md_thread(ctx->blob, cluster_number, ctx->new_cluster,
    2923         510 :                                                  ctx->new_extent_page, ctx->new_cluster_page, blob_insert_cluster_cpl, ctx);
    2924             :         }
    2925        1025 : }
    2926             : 
    2927             : static inline bool
    2928       56755 : blob_calculate_lba_and_lba_count(struct spdk_blob *blob, uint64_t io_unit, uint64_t length,
    2929             :                                  uint64_t *lba, uint64_t *lba_count)
    2930             : {
    2931       56755 :         *lba_count = length;
    2932             : 
    2933       56755 :         if (!bs_io_unit_is_allocated(blob, io_unit)) {
    2934        5196 :                 assert(blob->back_bs_dev != NULL);
    2935        5196 :                 *lba = bs_io_unit_to_back_dev_lba(blob, io_unit);
    2936        5196 :                 *lba_count = bs_io_unit_to_back_dev_lba(blob, *lba_count);
    2937        5196 :                 return false;
    2938             :         } else {
    2939       51559 :                 *lba = bs_blob_io_unit_to_lba(blob, io_unit);
    2940       51559 :                 return true;
    2941             :         }
    2942       56755 : }
    2943             : 
    2944             : struct op_split_ctx {
    2945             :         struct spdk_blob *blob;
    2946             :         struct spdk_io_channel *channel;
    2947             :         uint64_t io_unit_offset;
    2948             :         uint64_t io_units_remaining;
    2949             :         void *curr_payload;
    2950             :         enum spdk_blob_op_type op_type;
    2951             :         spdk_bs_sequence_t *seq;
    2952             :         bool in_submit_ctx;
    2953             :         bool completed_in_submit_ctx;
    2954             :         bool done;
    2955             : };
    2956             : 
    2957             : static void
    2958         966 : blob_request_submit_op_split_next(void *cb_arg, int bserrno)
    2959             : {
    2960         966 :         struct op_split_ctx     *ctx = cb_arg;
    2961         966 :         struct spdk_blob        *blob = ctx->blob;
    2962         966 :         struct spdk_io_channel  *ch = ctx->channel;
    2963         966 :         enum spdk_blob_op_type  op_type = ctx->op_type;
    2964             :         uint8_t                 *buf;
    2965             :         uint64_t                offset;
    2966             :         uint64_t                length;
    2967             :         uint64_t                op_length;
    2968             : 
    2969         966 :         if (bserrno != 0 || ctx->io_units_remaining == 0) {
    2970         222 :                 bs_sequence_finish(ctx->seq, bserrno);
    2971         222 :                 if (ctx->in_submit_ctx) {
    2972             :                         /* Defer freeing of the ctx object, since it will be
    2973             :                          * accessed when this unwinds back to the submission
    2974             :                          * context.
    2975             :                          */
    2976          50 :                         ctx->done = true;
    2977          50 :                 } else {
    2978         172 :                         free(ctx);
    2979             :                 }
    2980         222 :                 return;
    2981             :         }
    2982             : 
    2983         744 :         if (ctx->in_submit_ctx) {
    2984             :                 /* If this split operation completed in the context
    2985             :                  * of its submission, mark the flag and return immediately
    2986             :                  * to avoid recursion.
    2987             :                  */
    2988          85 :                 ctx->completed_in_submit_ctx = true;
    2989          85 :                 return;
    2990             :         }
    2991             : 
    2992         659 :         while (true) {
    2993         744 :                 ctx->completed_in_submit_ctx = false;
    2994             : 
    2995         744 :                 offset = ctx->io_unit_offset;
    2996         744 :                 length = ctx->io_units_remaining;
    2997         744 :                 buf = ctx->curr_payload;
    2998         744 :                 op_length = spdk_min(length, bs_num_io_units_to_cluster_boundary(blob,
    2999             :                                      offset));
    3000             : 
    3001             :                 /* Update length and payload for next operation */
    3002         744 :                 ctx->io_units_remaining -= op_length;
    3003         744 :                 ctx->io_unit_offset += op_length;
    3004         744 :                 if (op_type == SPDK_BLOB_WRITE || op_type == SPDK_BLOB_READ) {
    3005         659 :                         ctx->curr_payload += op_length * blob->bs->io_unit_size;
    3006         659 :                 }
    3007             : 
    3008         744 :                 assert(!ctx->in_submit_ctx);
    3009         744 :                 ctx->in_submit_ctx = true;
    3010             : 
    3011         744 :                 switch (op_type) {
    3012             :                 case SPDK_BLOB_READ:
    3013        1044 :                         spdk_blob_io_read(blob, ch, buf, offset, op_length,
    3014         522 :                                           blob_request_submit_op_split_next, ctx);
    3015         522 :                         break;
    3016             :                 case SPDK_BLOB_WRITE:
    3017         274 :                         spdk_blob_io_write(blob, ch, buf, offset, op_length,
    3018         137 :                                            blob_request_submit_op_split_next, ctx);
    3019         137 :                         break;
    3020             :                 case SPDK_BLOB_UNMAP:
    3021          90 :                         spdk_blob_io_unmap(blob, ch, offset, op_length,
    3022          45 :                                            blob_request_submit_op_split_next, ctx);
    3023          45 :                         break;
    3024             :                 case SPDK_BLOB_WRITE_ZEROES:
    3025          80 :                         spdk_blob_io_write_zeroes(blob, ch, offset, op_length,
    3026          40 :                                                   blob_request_submit_op_split_next, ctx);
    3027          40 :                         break;
    3028             :                 case SPDK_BLOB_READV:
    3029             :                 case SPDK_BLOB_WRITEV:
    3030           0 :                         SPDK_ERRLOG("readv/write not valid\n");
    3031           0 :                         bs_sequence_finish(ctx->seq, -EINVAL);
    3032           0 :                         free(ctx);
    3033           0 :                         return;
    3034             :                 }
    3035             : 
    3036             : #ifndef __clang_analyzer__
    3037             :                 /* scan-build reports a false positive around accessing the ctx here. It
    3038             :                  * forms a path that recursively calls this function, but then says
    3039             :                  * "assuming ctx->in_submit_ctx is false", when that isn't possible.
    3040             :                  * This path does free(ctx), returns to here, and reports a use-after-free
    3041             :                  * bug.  Wrapping this bit of code so that scan-build doesn't see it
    3042             :                  * works around the scan-build bug.
    3043             :                  */
    3044         744 :                 assert(ctx->in_submit_ctx);
    3045         744 :                 ctx->in_submit_ctx = false;
    3046             : 
    3047             :                 /* If the operation completed immediately, loop back and submit the
    3048             :                  * next operation.  Otherwise we can return and the next split
    3049             :                  * operation will get submitted when this current operation is
    3050             :                  * later completed asynchronously.
    3051             :                  */
    3052         744 :                 if (ctx->completed_in_submit_ctx) {
    3053          85 :                         continue;
    3054         659 :                 } else if (ctx->done) {
    3055          50 :                         free(ctx);
    3056          50 :                 }
    3057             : #endif
    3058         659 :                 break;
    3059             :         }
    3060         966 : }
    3061             : 
    3062             : static void
    3063         222 : blob_request_submit_op_split(struct spdk_io_channel *ch, struct spdk_blob *blob,
    3064             :                              void *payload, uint64_t offset, uint64_t length,
    3065             :                              spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
    3066             : {
    3067             :         struct op_split_ctx *ctx;
    3068             :         spdk_bs_sequence_t *seq;
    3069             :         struct spdk_bs_cpl cpl;
    3070             : 
    3071         222 :         assert(blob != NULL);
    3072             : 
    3073         222 :         ctx = calloc(1, sizeof(struct op_split_ctx));
    3074         222 :         if (ctx == NULL) {
    3075           0 :                 cb_fn(cb_arg, -ENOMEM);
    3076           0 :                 return;
    3077             :         }
    3078             : 
    3079         222 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    3080         222 :         cpl.u.blob_basic.cb_fn = cb_fn;
    3081         222 :         cpl.u.blob_basic.cb_arg = cb_arg;
    3082             : 
    3083         222 :         seq = bs_sequence_start_blob(ch, &cpl, blob);
    3084         222 :         if (!seq) {
    3085           0 :                 free(ctx);
    3086           0 :                 cb_fn(cb_arg, -ENOMEM);
    3087           0 :                 return;
    3088             :         }
    3089             : 
    3090         222 :         ctx->blob = blob;
    3091         222 :         ctx->channel = ch;
    3092         222 :         ctx->curr_payload = payload;
    3093         222 :         ctx->io_unit_offset = offset;
    3094         222 :         ctx->io_units_remaining = length;
    3095         222 :         ctx->op_type = op_type;
    3096         222 :         ctx->seq = seq;
    3097             : 
    3098         222 :         blob_request_submit_op_split_next(ctx, 0);
    3099         222 : }
    3100             : 
    3101             : static void
    3102          75 : spdk_free_cluster_unmap_complete(void *cb_arg, int bserrno)
    3103             : {
    3104          75 :         struct spdk_blob_free_cluster_ctx *ctx = cb_arg;
    3105             : 
    3106          75 :         if (bserrno) {
    3107           0 :                 bs_sequence_finish(ctx->seq, bserrno);
    3108           0 :                 free(ctx);
    3109           0 :                 return;
    3110             :         }
    3111             : 
    3112         150 :         blob_free_cluster_on_md_thread(ctx->blob, ctx->cluster_num,
    3113          75 :                                        ctx->extent_page, ctx->md_page, blob_free_cluster_cpl, ctx);
    3114          75 : }
    3115             : 
    3116             : static void
    3117       52830 : blob_request_submit_op_single(struct spdk_io_channel *_ch, struct spdk_blob *blob,
    3118             :                               void *payload, uint64_t offset, uint64_t length,
    3119             :                               spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
    3120             : {
    3121             :         struct spdk_bs_cpl cpl;
    3122             :         uint64_t lba;
    3123             :         uint64_t lba_count;
    3124             :         bool is_allocated;
    3125             : 
    3126       52830 :         assert(blob != NULL);
    3127             : 
    3128       52830 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    3129       52830 :         cpl.u.blob_basic.cb_fn = cb_fn;
    3130       52830 :         cpl.u.blob_basic.cb_arg = cb_arg;
    3131             : 
    3132       52830 :         if (blob->frozen_refcnt) {
    3133             :                 /* This blob I/O is frozen */
    3134             :                 spdk_bs_user_op_t *op;
    3135           5 :                 struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_ch);
    3136             : 
    3137           5 :                 op = bs_user_op_alloc(_ch, &cpl, op_type, blob, payload, 0, offset, length);
    3138           5 :                 if (!op) {
    3139           0 :                         cb_fn(cb_arg, -ENOMEM);
    3140           0 :                         return;
    3141             :                 }
    3142             : 
    3143           5 :                 TAILQ_INSERT_TAIL(&bs_channel->queued_io, op, link);
    3144             : 
    3145           5 :                 return;
    3146             :         }
    3147             : 
    3148       52825 :         is_allocated = blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count);
    3149             : 
    3150       52825 :         switch (op_type) {
    3151             :         case SPDK_BLOB_READ: {
    3152             :                 spdk_bs_batch_t *batch;
    3153             : 
    3154       25377 :                 batch = bs_batch_open(_ch, &cpl, blob);
    3155       25377 :                 if (!batch) {
    3156           0 :                         cb_fn(cb_arg, -ENOMEM);
    3157           0 :                         return;
    3158             :                 }
    3159             : 
    3160       25377 :                 if (is_allocated) {
    3161             :                         /* Read from the blob */
    3162       23531 :                         bs_batch_read_dev(batch, payload, lba, lba_count);
    3163       23531 :                 } else {
    3164             :                         /* Read from the backing block device */
    3165        1846 :                         bs_batch_read_bs_dev(batch, blob->back_bs_dev, payload, lba, lba_count);
    3166             :                 }
    3167             : 
    3168       25377 :                 bs_batch_close(batch);
    3169       25377 :                 break;
    3170             :         }
    3171             :         case SPDK_BLOB_WRITE:
    3172             :         case SPDK_BLOB_WRITE_ZEROES: {
    3173       27328 :                 if (is_allocated) {
    3174             :                         /* Write to the blob */
    3175             :                         spdk_bs_batch_t *batch;
    3176             : 
    3177       26888 :                         if (lba_count == 0) {
    3178           0 :                                 cb_fn(cb_arg, 0);
    3179           0 :                                 return;
    3180             :                         }
    3181             : 
    3182       26888 :                         batch = bs_batch_open(_ch, &cpl, blob);
    3183       26888 :                         if (!batch) {
    3184           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3185           0 :                                 return;
    3186             :                         }
    3187             : 
    3188       26888 :                         if (op_type == SPDK_BLOB_WRITE) {
    3189       26848 :                                 bs_batch_write_dev(batch, payload, lba, lba_count);
    3190       26848 :                         } else {
    3191          40 :                                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    3192             :                         }
    3193             : 
    3194       26888 :                         bs_batch_close(batch);
    3195       26888 :                 } else {
    3196             :                         /* Queue this operation and allocate the cluster */
    3197             :                         spdk_bs_user_op_t *op;
    3198             : 
    3199         440 :                         op = bs_user_op_alloc(_ch, &cpl, op_type, blob, payload, 0, offset, length);
    3200         440 :                         if (!op) {
    3201           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3202           0 :                                 return;
    3203             :                         }
    3204             : 
    3205         440 :                         bs_allocate_and_copy_cluster(blob, _ch, offset, op);
    3206             :                 }
    3207       27328 :                 break;
    3208             :         }
    3209             :         case SPDK_BLOB_UNMAP: {
    3210         120 :                 struct spdk_blob_free_cluster_ctx *ctx = NULL;
    3211             :                 spdk_bs_batch_t *batch;
    3212             : 
    3213             :                 /* if aligned with cluster release cluster */
    3214         205 :                 if (spdk_blob_is_thin_provisioned(blob) && is_allocated &&
    3215          90 :                     blob_backed_with_zeroes_dev(blob) &&
    3216          85 :                     bs_io_units_per_cluster(blob) == length) {
    3217          75 :                         struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_ch);
    3218             :                         uint64_t cluster_start_page;
    3219             :                         uint32_t cluster_number;
    3220             : 
    3221          75 :                         assert(offset % bs_io_units_per_cluster(blob) == 0);
    3222             : 
    3223             :                         /* Round the io_unit offset down to the first page in the cluster */
    3224          75 :                         cluster_start_page = bs_io_unit_to_cluster_start(blob, offset);
    3225             : 
    3226             :                         /* Calculate which index in the metadata cluster array the corresponding
    3227             :                          * cluster is supposed to be at. */
    3228          75 :                         cluster_number = bs_io_unit_to_cluster_number(blob, offset);
    3229             : 
    3230          75 :                         ctx = calloc(1, sizeof(*ctx));
    3231          75 :                         if (!ctx) {
    3232           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3233           0 :                                 return;
    3234             :                         }
    3235             :                         /* When freeing a cluster the flow should be (in order):
    3236             :                          * 1. Unmap the underlying area (so if the cluster is reclaimed in the future, it won't leak
    3237             :                          * old data)
    3238             :                          * 2. Once the unmap completes (to avoid any races with incoming writes that may claim the
    3239             :                          * cluster), update and sync metadata freeing the cluster
    3240             :                          * 3. Once metadata update is done, complete the user unmap request
    3241             :                          */
    3242          75 :                         ctx->blob = blob;
    3243          75 :                         ctx->page = cluster_start_page;
    3244          75 :                         ctx->cluster_num = cluster_number;
    3245          75 :                         ctx->md_page = bs_channel->new_cluster_page;
    3246          75 :                         ctx->seq = bs_sequence_start_bs(_ch, &cpl);
    3247          75 :                         if (!ctx->seq) {
    3248           0 :                                 free(ctx);
    3249           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3250           0 :                                 return;
    3251             :                         }
    3252             : 
    3253          75 :                         if (blob->use_extent_table) {
    3254          45 :                                 ctx->extent_page = *bs_cluster_to_extent_page(blob, cluster_number);
    3255          45 :                         }
    3256             : 
    3257          75 :                         cpl.u.blob_basic.cb_fn = spdk_free_cluster_unmap_complete;
    3258          75 :                         cpl.u.blob_basic.cb_arg = ctx;
    3259          75 :                 }
    3260             : 
    3261         120 :                 batch = bs_batch_open(_ch, &cpl, blob);
    3262         120 :                 if (!batch) {
    3263           0 :                         free(ctx);
    3264           0 :                         cb_fn(cb_arg, -ENOMEM);
    3265           0 :                         return;
    3266             :                 }
    3267             : 
    3268         120 :                 if (is_allocated) {
    3269         120 :                         bs_batch_unmap_dev(batch, lba, lba_count);
    3270         120 :                 }
    3271             : 
    3272         120 :                 bs_batch_close(batch);
    3273         120 :                 break;
    3274             :         }
    3275             :         case SPDK_BLOB_READV:
    3276             :         case SPDK_BLOB_WRITEV:
    3277           0 :                 SPDK_ERRLOG("readv/write not valid\n");
    3278           0 :                 cb_fn(cb_arg, -EINVAL);
    3279           0 :                 break;
    3280             :         }
    3281       52830 : }
    3282             : 
    3283             : static void
    3284       53692 : blob_request_submit_op(struct spdk_blob *blob, struct spdk_io_channel *_channel,
    3285             :                        void *payload, uint64_t offset, uint64_t length,
    3286             :                        spdk_blob_op_complete cb_fn, void *cb_arg, enum spdk_blob_op_type op_type)
    3287             : {
    3288       53692 :         assert(blob != NULL);
    3289             : 
    3290       53692 :         if (blob->data_ro && op_type != SPDK_BLOB_READ) {
    3291           5 :                 cb_fn(cb_arg, -EPERM);
    3292           5 :                 return;
    3293             :         }
    3294             : 
    3295       53687 :         if (length == 0) {
    3296         615 :                 cb_fn(cb_arg, 0);
    3297         615 :                 return;
    3298             :         }
    3299             : 
    3300       53072 :         if (offset + length > bs_cluster_to_lba(blob->bs, blob->active.num_clusters)) {
    3301          30 :                 cb_fn(cb_arg, -EINVAL);
    3302          30 :                 return;
    3303             :         }
    3304       53042 :         if (length <= bs_num_io_units_to_cluster_boundary(blob, offset)) {
    3305      105640 :                 blob_request_submit_op_single(_channel, blob, payload, offset, length,
    3306       52820 :                                               cb_fn, cb_arg, op_type);
    3307       52820 :         } else {
    3308         444 :                 blob_request_submit_op_split(_channel, blob, payload, offset, length,
    3309         222 :                                              cb_fn, cb_arg, op_type);
    3310             :         }
    3311       53692 : }
    3312             : 
    3313             : struct rw_iov_ctx {
    3314             :         struct spdk_blob *blob;
    3315             :         struct spdk_io_channel *channel;
    3316             :         spdk_blob_op_complete cb_fn;
    3317             :         void *cb_arg;
    3318             :         bool read;
    3319             :         int iovcnt;
    3320             :         struct iovec *orig_iov;
    3321             :         uint64_t io_unit_offset;
    3322             :         uint64_t io_units_remaining;
    3323             :         uint64_t io_units_done;
    3324             :         struct spdk_blob_ext_io_opts *ext_io_opts;
    3325             :         struct iovec iov[0];
    3326             : };
    3327             : 
    3328             : static void
    3329        3910 : rw_iov_done(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    3330             : {
    3331        3910 :         assert(cb_arg == NULL);
    3332        3910 :         bs_sequence_finish(seq, bserrno);
    3333        3910 : }
    3334             : 
    3335             : static void
    3336         930 : rw_iov_split_next(void *cb_arg, int bserrno)
    3337             : {
    3338         930 :         struct rw_iov_ctx *ctx = cb_arg;
    3339         930 :         struct spdk_blob *blob = ctx->blob;
    3340             :         struct iovec *iov, *orig_iov;
    3341             :         int iovcnt;
    3342             :         size_t orig_iovoff;
    3343             :         uint64_t io_units_count, io_units_to_boundary, io_unit_offset;
    3344             :         uint64_t byte_count;
    3345             : 
    3346         930 :         if (bserrno != 0 || ctx->io_units_remaining == 0) {
    3347         255 :                 ctx->cb_fn(ctx->cb_arg, bserrno);
    3348         255 :                 free(ctx);
    3349         255 :                 return;
    3350             :         }
    3351             : 
    3352         675 :         io_unit_offset = ctx->io_unit_offset;
    3353         675 :         io_units_to_boundary = bs_num_io_units_to_cluster_boundary(blob, io_unit_offset);
    3354         675 :         io_units_count = spdk_min(ctx->io_units_remaining, io_units_to_boundary);
    3355             :         /*
    3356             :          * Get index and offset into the original iov array for our current position in the I/O sequence.
    3357             :          *  byte_count will keep track of how many bytes remaining until orig_iov and orig_iovoff will
    3358             :          *  point to the current position in the I/O sequence.
    3359             :          */
    3360         675 :         byte_count = ctx->io_units_done * blob->bs->io_unit_size;
    3361         675 :         orig_iov = &ctx->orig_iov[0];
    3362         675 :         orig_iovoff = 0;
    3363        1435 :         while (byte_count > 0) {
    3364         760 :                 if (byte_count >= orig_iov->iov_len) {
    3365         440 :                         byte_count -= orig_iov->iov_len;
    3366         440 :                         orig_iov++;
    3367         440 :                 } else {
    3368         320 :                         orig_iovoff = byte_count;
    3369         320 :                         byte_count = 0;
    3370             :                 }
    3371             :         }
    3372             : 
    3373             :         /*
    3374             :          * Build an iov array for the next I/O in the sequence.  byte_count will keep track of how many
    3375             :          *  bytes of this next I/O remain to be accounted for in the new iov array.
    3376             :          */
    3377         675 :         byte_count = io_units_count * blob->bs->io_unit_size;
    3378         675 :         iov = &ctx->iov[0];
    3379         675 :         iovcnt = 0;
    3380        1725 :         while (byte_count > 0) {
    3381        1050 :                 assert(iovcnt < ctx->iovcnt);
    3382        1050 :                 iov->iov_len = spdk_min(byte_count, orig_iov->iov_len - orig_iovoff);
    3383        1050 :                 iov->iov_base = orig_iov->iov_base + orig_iovoff;
    3384        1050 :                 byte_count -= iov->iov_len;
    3385        1050 :                 orig_iovoff = 0;
    3386        1050 :                 orig_iov++;
    3387        1050 :                 iov++;
    3388        1050 :                 iovcnt++;
    3389             :         }
    3390             : 
    3391         675 :         ctx->io_unit_offset += io_units_count;
    3392         675 :         ctx->io_units_remaining -= io_units_count;
    3393         675 :         ctx->io_units_done += io_units_count;
    3394         675 :         iov = &ctx->iov[0];
    3395             : 
    3396         675 :         if (ctx->read) {
    3397        1020 :                 spdk_blob_io_readv_ext(ctx->blob, ctx->channel, iov, iovcnt, io_unit_offset,
    3398         510 :                                        io_units_count, rw_iov_split_next, ctx, ctx->ext_io_opts);
    3399         510 :         } else {
    3400         330 :                 spdk_blob_io_writev_ext(ctx->blob, ctx->channel, iov, iovcnt, io_unit_offset,
    3401         165 :                                         io_units_count, rw_iov_split_next, ctx, ctx->ext_io_opts);
    3402             :         }
    3403         930 : }
    3404             : 
    3405             : static void
    3406        4195 : blob_request_submit_rw_iov(struct spdk_blob *blob, struct spdk_io_channel *_channel,
    3407             :                            struct iovec *iov, int iovcnt,
    3408             :                            uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg, bool read,
    3409             :                            struct spdk_blob_ext_io_opts *ext_io_opts)
    3410             : {
    3411             :         struct spdk_bs_cpl      cpl;
    3412             : 
    3413        4195 :         assert(blob != NULL);
    3414             : 
    3415        4195 :         if (!read && blob->data_ro) {
    3416           5 :                 cb_fn(cb_arg, -EPERM);
    3417           5 :                 return;
    3418             :         }
    3419             : 
    3420        4190 :         if (length == 0) {
    3421           0 :                 cb_fn(cb_arg, 0);
    3422           0 :                 return;
    3423             :         }
    3424             : 
    3425        4190 :         if (offset + length > bs_cluster_to_lba(blob->bs, blob->active.num_clusters)) {
    3426           0 :                 cb_fn(cb_arg, -EINVAL);
    3427           0 :                 return;
    3428             :         }
    3429             : 
    3430             :         /*
    3431             :          * For now, we implement readv/writev using a sequence (instead of a batch) to account for having
    3432             :          *  to split a request that spans a cluster boundary.  For I/O that do not span a cluster boundary,
    3433             :          *  there will be no noticeable difference compared to using a batch.  For I/O that do span a cluster
    3434             :          *  boundary, the target LBAs (after blob offset to LBA translation) may not be contiguous, so we need
    3435             :          *  to allocate a separate iov array and split the I/O such that none of the resulting
    3436             :          *  smaller I/O cross a cluster boundary.  These smaller I/O will be issued in sequence (not in parallel)
    3437             :          *  but since this case happens very infrequently, any performance impact will be negligible.
    3438             :          *
    3439             :          * This could be optimized in the future to allocate a big enough iov array to account for all of the iovs
    3440             :          *  for all of the smaller I/Os, pre-build all of the iov arrays for the smaller I/Os, then issue them
    3441             :          *  in a batch.  That would also require creating an intermediate spdk_bs_cpl that would get called
    3442             :          *  when the batch was completed, to allow for freeing the memory for the iov arrays.
    3443             :          */
    3444        4190 :         if (spdk_likely(length <= bs_num_io_units_to_cluster_boundary(blob, offset))) {
    3445             :                 uint64_t lba_count;
    3446             :                 uint64_t lba;
    3447             :                 bool is_allocated;
    3448             : 
    3449        3930 :                 cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    3450        3930 :                 cpl.u.blob_basic.cb_fn = cb_fn;
    3451        3930 :                 cpl.u.blob_basic.cb_arg = cb_arg;
    3452             : 
    3453        3930 :                 if (blob->frozen_refcnt) {
    3454             :                         /* This blob I/O is frozen */
    3455             :                         enum spdk_blob_op_type op_type;
    3456             :                         spdk_bs_user_op_t *op;
    3457           0 :                         struct spdk_bs_channel *bs_channel = spdk_io_channel_get_ctx(_channel);
    3458             : 
    3459           0 :                         op_type = read ? SPDK_BLOB_READV : SPDK_BLOB_WRITEV;
    3460           0 :                         op = bs_user_op_alloc(_channel, &cpl, op_type, blob, iov, iovcnt, offset, length);
    3461           0 :                         if (!op) {
    3462           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3463           0 :                                 return;
    3464             :                         }
    3465             : 
    3466           0 :                         TAILQ_INSERT_TAIL(&bs_channel->queued_io, op, link);
    3467             : 
    3468           0 :                         return;
    3469             :                 }
    3470             : 
    3471        3930 :                 is_allocated = blob_calculate_lba_and_lba_count(blob, offset, length, &lba, &lba_count);
    3472             : 
    3473        3930 :                 if (read) {
    3474             :                         spdk_bs_sequence_t *seq;
    3475             : 
    3476        3565 :                         seq = bs_sequence_start_blob(_channel, &cpl, blob);
    3477        3565 :                         if (!seq) {
    3478           0 :                                 cb_fn(cb_arg, -ENOMEM);
    3479           0 :                                 return;
    3480             :                         }
    3481             : 
    3482        3565 :                         seq->ext_io_opts = ext_io_opts;
    3483             : 
    3484        3565 :                         if (is_allocated) {
    3485         675 :                                 bs_sequence_readv_dev(seq, iov, iovcnt, lba, lba_count, rw_iov_done, NULL);
    3486         675 :                         } else {
    3487        2890 :                                 bs_sequence_readv_bs_dev(seq, blob->back_bs_dev, iov, iovcnt, lba, lba_count,
    3488             :                                                          rw_iov_done, NULL);
    3489             :                         }
    3490        3565 :                 } else {
    3491         365 :                         if (is_allocated) {
    3492             :                                 spdk_bs_sequence_t *seq;
    3493             : 
    3494         345 :                                 seq = bs_sequence_start_blob(_channel, &cpl, blob);
    3495         345 :                                 if (!seq) {
    3496           0 :                                         cb_fn(cb_arg, -ENOMEM);
    3497           0 :                                         return;
    3498             :                                 }
    3499             : 
    3500         345 :                                 seq->ext_io_opts = ext_io_opts;
    3501             : 
    3502         345 :                                 bs_sequence_writev_dev(seq, iov, iovcnt, lba, lba_count, rw_iov_done, NULL);
    3503         345 :                         } else {
    3504             :                                 /* Queue this operation and allocate the cluster */
    3505             :                                 spdk_bs_user_op_t *op;
    3506             : 
    3507          40 :                                 op = bs_user_op_alloc(_channel, &cpl, SPDK_BLOB_WRITEV, blob, iov, iovcnt, offset,
    3508          20 :                                                       length);
    3509          20 :                                 if (!op) {
    3510           0 :                                         cb_fn(cb_arg, -ENOMEM);
    3511           0 :                                         return;
    3512             :                                 }
    3513             : 
    3514          20 :                                 op->ext_io_opts = ext_io_opts;
    3515             : 
    3516          20 :                                 bs_allocate_and_copy_cluster(blob, _channel, offset, op);
    3517             :                         }
    3518             :                 }
    3519        3930 :         } else {
    3520             :                 struct rw_iov_ctx *ctx;
    3521             : 
    3522         260 :                 ctx = calloc(1, sizeof(struct rw_iov_ctx) + iovcnt * sizeof(struct iovec));
    3523         260 :                 if (ctx == NULL) {
    3524           5 :                         cb_fn(cb_arg, -ENOMEM);
    3525           5 :                         return;
    3526             :                 }
    3527             : 
    3528         255 :                 ctx->blob = blob;
    3529         255 :                 ctx->channel = _channel;
    3530         255 :                 ctx->cb_fn = cb_fn;
    3531         255 :                 ctx->cb_arg = cb_arg;
    3532         255 :                 ctx->read = read;
    3533         255 :                 ctx->orig_iov = iov;
    3534         255 :                 ctx->iovcnt = iovcnt;
    3535         255 :                 ctx->io_unit_offset = offset;
    3536         255 :                 ctx->io_units_remaining = length;
    3537         255 :                 ctx->io_units_done = 0;
    3538         255 :                 ctx->ext_io_opts = ext_io_opts;
    3539             : 
    3540         255 :                 rw_iov_split_next(ctx, 0);
    3541             :         }
    3542        4195 : }
    3543             : 
    3544             : static struct spdk_blob *
    3545        9668 : blob_lookup(struct spdk_blob_store *bs, spdk_blob_id blobid)
    3546             : {
    3547             :         struct spdk_blob find;
    3548             : 
    3549        9668 :         if (spdk_bit_array_get(bs->open_blobids, blobid) == 0) {
    3550        8681 :                 return NULL;
    3551             :         }
    3552             : 
    3553         987 :         find.id = blobid;
    3554         987 :         return RB_FIND(spdk_blob_tree, &bs->open_blobs, &find);
    3555        9668 : }
    3556             : 
    3557             : static void
    3558        2256 : blob_get_snapshot_and_clone_entries(struct spdk_blob *blob,
    3559             :                                     struct spdk_blob_list **snapshot_entry, struct spdk_blob_list **clone_entry)
    3560             : {
    3561        2256 :         assert(blob != NULL);
    3562        2256 :         *snapshot_entry = NULL;
    3563        2256 :         *clone_entry = NULL;
    3564             : 
    3565        2256 :         if (blob->parent_id == SPDK_BLOBID_INVALID) {
    3566        1901 :                 return;
    3567             :         }
    3568             : 
    3569         535 :         TAILQ_FOREACH(*snapshot_entry, &blob->bs->snapshots, link) {
    3570         470 :                 if ((*snapshot_entry)->id == blob->parent_id) {
    3571         290 :                         break;
    3572             :                 }
    3573         180 :         }
    3574             : 
    3575         355 :         if (*snapshot_entry != NULL) {
    3576         345 :                 TAILQ_FOREACH(*clone_entry, &(*snapshot_entry)->clones, link) {
    3577         345 :                         if ((*clone_entry)->id == blob->id) {
    3578         290 :                                 break;
    3579             :                         }
    3580          55 :                 }
    3581             : 
    3582         290 :                 assert(*clone_entry != NULL);
    3583         290 :         }
    3584        2256 : }
    3585             : 
    3586             : static int
    3587        1008 : bs_channel_create(void *io_device, void *ctx_buf)
    3588             : {
    3589        1008 :         struct spdk_blob_store          *bs = io_device;
    3590        1008 :         struct spdk_bs_channel          *channel = ctx_buf;
    3591             :         struct spdk_bs_dev              *dev;
    3592        1008 :         uint32_t                        max_ops = bs->max_channel_ops;
    3593             :         uint32_t                        i;
    3594             : 
    3595        1008 :         dev = bs->dev;
    3596             : 
    3597        1008 :         channel->req_mem = calloc(max_ops, sizeof(struct spdk_bs_request_set));
    3598        1008 :         if (!channel->req_mem) {
    3599           0 :                 return -1;
    3600             :         }
    3601             : 
    3602        1008 :         TAILQ_INIT(&channel->reqs);
    3603             : 
    3604      517104 :         for (i = 0; i < max_ops; i++) {
    3605      516096 :                 TAILQ_INSERT_TAIL(&channel->reqs, &channel->req_mem[i], link);
    3606      516096 :         }
    3607             : 
    3608        1008 :         channel->bs = bs;
    3609        1008 :         channel->dev = dev;
    3610        1008 :         channel->dev_channel = dev->create_channel(dev);
    3611             : 
    3612        1008 :         if (!channel->dev_channel) {
    3613           0 :                 SPDK_ERRLOG("Failed to create device channel.\n");
    3614           0 :                 free(channel->req_mem);
    3615           0 :                 return -1;
    3616             :         }
    3617             : 
    3618        1008 :         channel->new_cluster_page = spdk_zmalloc(bs->md_page_size, 0, NULL, SPDK_ENV_NUMA_ID_ANY,
    3619             :                                     SPDK_MALLOC_DMA);
    3620        1008 :         if (!channel->new_cluster_page) {
    3621           0 :                 SPDK_ERRLOG("Failed to allocate new cluster page\n");
    3622           0 :                 free(channel->req_mem);
    3623           0 :                 channel->dev->destroy_channel(channel->dev, channel->dev_channel);
    3624           0 :                 return -1;
    3625             :         }
    3626             : 
    3627        1008 :         TAILQ_INIT(&channel->need_cluster_alloc);
    3628        1008 :         TAILQ_INIT(&channel->queued_io);
    3629        1008 :         RB_INIT(&channel->esnap_channels);
    3630             : 
    3631        1008 :         return 0;
    3632        1008 : }
    3633             : 
    3634             : static void
    3635        1008 : bs_channel_destroy(void *io_device, void *ctx_buf)
    3636             : {
    3637        1008 :         struct spdk_bs_channel *channel = ctx_buf;
    3638             :         spdk_bs_user_op_t *op;
    3639             : 
    3640        1008 :         while (!TAILQ_EMPTY(&channel->need_cluster_alloc)) {
    3641           0 :                 op = TAILQ_FIRST(&channel->need_cluster_alloc);
    3642           0 :                 TAILQ_REMOVE(&channel->need_cluster_alloc, op, link);
    3643           0 :                 bs_user_op_abort(op, -EIO);
    3644             :         }
    3645             : 
    3646        1008 :         while (!TAILQ_EMPTY(&channel->queued_io)) {
    3647           0 :                 op = TAILQ_FIRST(&channel->queued_io);
    3648           0 :                 TAILQ_REMOVE(&channel->queued_io, op, link);
    3649           0 :                 bs_user_op_abort(op, -EIO);
    3650             :         }
    3651             : 
    3652        1008 :         blob_esnap_destroy_bs_channel(channel);
    3653             : 
    3654        1008 :         free(channel->req_mem);
    3655        1008 :         spdk_free(channel->new_cluster_page);
    3656        1008 :         channel->dev->destroy_channel(channel->dev, channel->dev_channel);
    3657        1008 : }
    3658             : 
    3659             : static void
    3660         988 : bs_dev_destroy(void *io_device)
    3661             : {
    3662         988 :         struct spdk_blob_store *bs = io_device;
    3663             :         struct spdk_blob        *blob, *blob_tmp;
    3664             : 
    3665         988 :         bs->dev->destroy(bs->dev);
    3666             : 
    3667         988 :         RB_FOREACH_SAFE(blob, spdk_blob_tree, &bs->open_blobs, blob_tmp) {
    3668           0 :                 RB_REMOVE(spdk_blob_tree, &bs->open_blobs, blob);
    3669           0 :                 spdk_bit_array_clear(bs->open_blobids, blob->id);
    3670           0 :                 blob_free(blob);
    3671           0 :         }
    3672             : 
    3673         988 :         spdk_spin_destroy(&bs->used_lock);
    3674             : 
    3675         988 :         spdk_bit_array_free(&bs->open_blobids);
    3676         988 :         spdk_bit_array_free(&bs->used_blobids);
    3677         988 :         spdk_bit_array_free(&bs->used_md_pages);
    3678         988 :         spdk_bit_pool_free(&bs->used_clusters);
    3679             :         /*
    3680             :          * If this function is called for any reason except a successful unload,
    3681             :          * the unload_cpl type will be NONE and this will be a nop.
    3682             :          */
    3683         988 :         bs_call_cpl(&bs->unload_cpl, bs->unload_err);
    3684             : 
    3685         988 :         free(bs);
    3686         988 : }
    3687             : 
    3688             : static int
    3689        1139 : bs_blob_list_add(struct spdk_blob *blob)
    3690             : {
    3691             :         spdk_blob_id snapshot_id;
    3692        1139 :         struct spdk_blob_list *snapshot_entry = NULL;
    3693        1139 :         struct spdk_blob_list *clone_entry = NULL;
    3694             : 
    3695        1139 :         assert(blob != NULL);
    3696             : 
    3697        1139 :         snapshot_id = blob->parent_id;
    3698        1139 :         if (snapshot_id == SPDK_BLOBID_INVALID ||
    3699         567 :             snapshot_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    3700         617 :                 return 0;
    3701             :         }
    3702             : 
    3703         522 :         snapshot_entry = bs_get_snapshot_entry(blob->bs, snapshot_id);
    3704         522 :         if (snapshot_entry == NULL) {
    3705             :                 /* Snapshot not found */
    3706         362 :                 snapshot_entry = calloc(1, sizeof(struct spdk_blob_list));
    3707         362 :                 if (snapshot_entry == NULL) {
    3708           0 :                         return -ENOMEM;
    3709             :                 }
    3710         362 :                 snapshot_entry->id = snapshot_id;
    3711         362 :                 TAILQ_INIT(&snapshot_entry->clones);
    3712         362 :                 TAILQ_INSERT_TAIL(&blob->bs->snapshots, snapshot_entry, link);
    3713         362 :         } else {
    3714         255 :                 TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
    3715          95 :                         if (clone_entry->id == blob->id) {
    3716           0 :                                 break;
    3717             :                         }
    3718          95 :                 }
    3719             :         }
    3720             : 
    3721         522 :         if (clone_entry == NULL) {
    3722             :                 /* Clone not found */
    3723         522 :                 clone_entry = calloc(1, sizeof(struct spdk_blob_list));
    3724         522 :                 if (clone_entry == NULL) {
    3725           0 :                         return -ENOMEM;
    3726             :                 }
    3727         522 :                 clone_entry->id = blob->id;
    3728         522 :                 TAILQ_INIT(&clone_entry->clones);
    3729         522 :                 TAILQ_INSERT_TAIL(&snapshot_entry->clones, clone_entry, link);
    3730         522 :                 snapshot_entry->clone_count++;
    3731         522 :         }
    3732             : 
    3733         522 :         return 0;
    3734        1139 : }
    3735             : 
    3736             : static void
    3737        2158 : bs_blob_list_remove(struct spdk_blob *blob)
    3738             : {
    3739        2158 :         struct spdk_blob_list *snapshot_entry = NULL;
    3740        2158 :         struct spdk_blob_list *clone_entry = NULL;
    3741             : 
    3742        2158 :         blob_get_snapshot_and_clone_entries(blob, &snapshot_entry, &clone_entry);
    3743             : 
    3744        2158 :         if (snapshot_entry == NULL) {
    3745        1888 :                 return;
    3746             :         }
    3747             : 
    3748         270 :         blob->parent_id = SPDK_BLOBID_INVALID;
    3749         270 :         TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
    3750         270 :         free(clone_entry);
    3751             : 
    3752         270 :         snapshot_entry->clone_count--;
    3753        2158 : }
    3754             : 
    3755             : static int
    3756         988 : bs_blob_list_free(struct spdk_blob_store *bs)
    3757             : {
    3758             :         struct spdk_blob_list *snapshot_entry;
    3759             :         struct spdk_blob_list *snapshot_entry_tmp;
    3760             :         struct spdk_blob_list *clone_entry;
    3761             :         struct spdk_blob_list *clone_entry_tmp;
    3762             : 
    3763        1170 :         TAILQ_FOREACH_SAFE(snapshot_entry, &bs->snapshots, link, snapshot_entry_tmp) {
    3764         374 :                 TAILQ_FOREACH_SAFE(clone_entry, &snapshot_entry->clones, link, clone_entry_tmp) {
    3765         192 :                         TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
    3766         192 :                         free(clone_entry);
    3767         192 :                 }
    3768         182 :                 TAILQ_REMOVE(&bs->snapshots, snapshot_entry, link);
    3769         182 :                 free(snapshot_entry);
    3770         182 :         }
    3771             : 
    3772         988 :         return 0;
    3773             : }
    3774             : 
    3775             : static void
    3776         988 : bs_free(struct spdk_blob_store *bs)
    3777             : {
    3778         988 :         bs_blob_list_free(bs);
    3779             : 
    3780         988 :         bs_unregister_md_thread(bs);
    3781         988 :         spdk_io_device_unregister(bs, bs_dev_destroy);
    3782         988 : }
    3783             : 
    3784             : void
    3785        1323 : spdk_bs_opts_init(struct spdk_bs_opts *opts, size_t opts_size)
    3786             : {
    3787             : 
    3788        1323 :         if (!opts) {
    3789           0 :                 SPDK_ERRLOG("opts should not be NULL\n");
    3790           0 :                 return;
    3791             :         }
    3792             : 
    3793        1323 :         if (!opts_size) {
    3794           0 :                 SPDK_ERRLOG("opts_size should not be zero value\n");
    3795           0 :                 return;
    3796             :         }
    3797             : 
    3798        1323 :         memset(opts, 0, opts_size);
    3799        1323 :         opts->opts_size = opts_size;
    3800             : 
    3801             : #define FIELD_OK(field) \
    3802             :         offsetof(struct spdk_bs_opts, field) + sizeof(opts->field) <= opts_size
    3803             : 
    3804             : #define SET_FIELD(field, value) \
    3805             :         if (FIELD_OK(field)) { \
    3806             :                 opts->field = value; \
    3807             :         } \
    3808             : 
    3809        1323 :         SET_FIELD(cluster_sz, SPDK_BLOB_OPTS_CLUSTER_SZ);
    3810        1323 :         SET_FIELD(num_md_pages, SPDK_BLOB_OPTS_NUM_MD_PAGES);
    3811        1323 :         SET_FIELD(max_md_ops, SPDK_BLOB_OPTS_NUM_MD_PAGES);
    3812        1323 :         SET_FIELD(max_channel_ops, SPDK_BLOB_OPTS_DEFAULT_CHANNEL_OPS);
    3813        1323 :         SET_FIELD(clear_method,  BS_CLEAR_WITH_UNMAP);
    3814             : 
    3815        1323 :         if (FIELD_OK(bstype)) {
    3816        1323 :                 memset(&opts->bstype, 0, sizeof(opts->bstype));
    3817        1323 :         }
    3818             : 
    3819        1323 :         SET_FIELD(iter_cb_fn, NULL);
    3820        1323 :         SET_FIELD(iter_cb_arg, NULL);
    3821        1323 :         SET_FIELD(force_recover, false);
    3822        1323 :         SET_FIELD(esnap_bs_dev_create, NULL);
    3823        1323 :         SET_FIELD(esnap_ctx, NULL);
    3824             : 
    3825             : #undef FIELD_OK
    3826             : #undef SET_FIELD
    3827        1323 : }
    3828             : 
    3829             : static int
    3830         607 : bs_opts_verify(struct spdk_bs_opts *opts)
    3831             : {
    3832         607 :         if (opts->cluster_sz == 0 || opts->num_md_pages == 0 || opts->max_md_ops == 0 ||
    3833         602 :             opts->max_channel_ops == 0) {
    3834           5 :                 SPDK_ERRLOG("Blobstore options cannot be set to 0\n");
    3835           5 :                 return -1;
    3836             :         }
    3837             : 
    3838         602 :         if ((opts->cluster_sz % SPDK_BS_PAGE_SIZE) != 0) {
    3839           5 :                 SPDK_ERRLOG("Cluster size %" PRIu32 " is not an integral multiple of blocklen %" PRIu32"\n",
    3840             :                             opts->cluster_sz, SPDK_BS_PAGE_SIZE);
    3841           5 :                 return -1;
    3842             :         }
    3843             : 
    3844         597 :         return 0;
    3845         607 : }
    3846             : 
    3847             : /* START spdk_bs_load */
    3848             : 
    3849             : /* spdk_bs_load_ctx is used for init, load, unload and dump code paths. */
    3850             : 
    3851             : struct spdk_bs_load_ctx {
    3852             :         struct spdk_blob_store          *bs;
    3853             :         struct spdk_bs_super_block      *super;
    3854             : 
    3855             :         struct spdk_bs_md_mask          *mask;
    3856             :         bool                            in_page_chain;
    3857             :         uint32_t                        page_index;
    3858             :         uint32_t                        cur_page;
    3859             :         struct spdk_blob_md_page        *page;
    3860             : 
    3861             :         uint64_t                        num_extent_pages;
    3862             :         uint32_t                        *extent_page_num;
    3863             :         struct spdk_blob_md_page        *extent_pages;
    3864             :         struct spdk_bit_array           *used_clusters;
    3865             : 
    3866             :         spdk_bs_sequence_t                      *seq;
    3867             :         spdk_blob_op_with_handle_complete       iter_cb_fn;
    3868             :         void                                    *iter_cb_arg;
    3869             :         struct spdk_blob                        *blob;
    3870             :         spdk_blob_id                            blobid;
    3871             : 
    3872             :         bool                                    force_recover;
    3873             : 
    3874             :         /* These fields are used in the spdk_bs_dump path. */
    3875             :         bool                                    dumping;
    3876             :         FILE                                    *fp;
    3877             :         spdk_bs_dump_print_xattr                print_xattr_fn;
    3878             :         char                                    xattr_name[4096];
    3879             : };
    3880             : 
    3881             : static void
    3882        1349 : bs_init_per_cluster_fields(struct spdk_blob_store *bs)
    3883             : {
    3884        1349 :         bs->pages_per_cluster = bs->cluster_sz / bs->md_page_size;
    3885        1349 :         if (spdk_u32_is_pow2(bs->pages_per_cluster)) {
    3886        1349 :                 bs->pages_per_cluster_shift = spdk_u32log2(bs->pages_per_cluster);
    3887        1349 :         }
    3888        1349 :         bs->io_units_per_cluster = bs->cluster_sz / bs->io_unit_size;
    3889        1349 :         if (spdk_u32_is_pow2(bs->io_units_per_cluster)) {
    3890        1349 :                 bs->io_units_per_cluster_shift = spdk_u32log2(bs->io_units_per_cluster);
    3891        1349 :         }
    3892        1349 : }
    3893             : 
    3894             : static int
    3895         988 : bs_alloc(struct spdk_bs_dev *dev, struct spdk_bs_opts *opts, struct spdk_blob_store **_bs,
    3896             :          struct spdk_bs_load_ctx **_ctx)
    3897             : {
    3898             :         struct spdk_blob_store  *bs;
    3899             :         struct spdk_bs_load_ctx *ctx;
    3900             :         uint64_t dev_size;
    3901             :         uint32_t md_page_size;
    3902             :         int rc;
    3903             : 
    3904         988 :         dev_size = dev->blocklen * dev->blockcnt;
    3905         988 :         if (dev_size < opts->cluster_sz) {
    3906             :                 /* Device size cannot be smaller than cluster size of blobstore */
    3907           0 :                 SPDK_INFOLOG(blob, "Device size %" PRIu64 " is smaller than cluster size %" PRIu32 "\n",
    3908             :                              dev_size, opts->cluster_sz);
    3909           0 :                 return -ENOSPC;
    3910             :         }
    3911             : 
    3912         988 :         md_page_size = spdk_max(spdk_max(dev->phys_blocklen, SPDK_BS_PAGE_SIZE),
    3913             :                                 opts->md_page_size);
    3914         988 :         if (opts->cluster_sz < md_page_size) {
    3915             :                 /* Cluster size cannot be smaller than page size */
    3916           0 :                 SPDK_ERRLOG("Cluster size %" PRIu32 " is smaller than page size %d\n",
    3917             :                             opts->cluster_sz, md_page_size);
    3918           0 :                 return -EINVAL;
    3919             :         }
    3920         988 :         bs = calloc(1, sizeof(struct spdk_blob_store));
    3921         988 :         if (!bs) {
    3922           0 :                 return -ENOMEM;
    3923             :         }
    3924             : 
    3925         988 :         ctx = calloc(1, sizeof(struct spdk_bs_load_ctx));
    3926         988 :         if (!ctx) {
    3927           0 :                 free(bs);
    3928           0 :                 return -ENOMEM;
    3929             :         }
    3930             : 
    3931         988 :         ctx->bs = bs;
    3932         988 :         ctx->iter_cb_fn = opts->iter_cb_fn;
    3933         988 :         ctx->iter_cb_arg = opts->iter_cb_arg;
    3934         988 :         ctx->force_recover = opts->force_recover;
    3935             : 
    3936         988 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    3937             :                                   SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    3938         988 :         if (!ctx->super) {
    3939           0 :                 free(ctx);
    3940           0 :                 free(bs);
    3941           0 :                 return -ENOMEM;
    3942             :         }
    3943             : 
    3944         988 :         RB_INIT(&bs->open_blobs);
    3945         988 :         TAILQ_INIT(&bs->snapshots);
    3946         988 :         bs->dev = dev;
    3947         988 :         bs->md_page_size = md_page_size;
    3948         988 :         bs->md_thread = spdk_get_thread();
    3949         988 :         assert(bs->md_thread != NULL);
    3950             : 
    3951             :         /*
    3952             :          * Do not use bs_lba_to_cluster() here since blockcnt may not be an
    3953             :          *  even multiple of the cluster size.
    3954             :          */
    3955         988 :         bs->cluster_sz = opts->cluster_sz;
    3956         988 :         bs->total_clusters = dev->blockcnt / (bs->cluster_sz / dev->blocklen);
    3957         988 :         ctx->used_clusters = spdk_bit_array_create(bs->total_clusters);
    3958         988 :         if (!ctx->used_clusters) {
    3959           0 :                 spdk_free(ctx->super);
    3960           0 :                 free(ctx);
    3961           0 :                 free(bs);
    3962           0 :                 return -ENOMEM;
    3963             :         }
    3964             : 
    3965         988 :         bs->num_free_clusters = bs->total_clusters;
    3966         988 :         bs->io_unit_size = dev->blocklen;
    3967         988 :         bs_init_per_cluster_fields(bs);
    3968             : 
    3969         988 :         bs->max_channel_ops = opts->max_channel_ops;
    3970         988 :         bs->super_blob = SPDK_BLOBID_INVALID;
    3971         988 :         memcpy(&bs->bstype, &opts->bstype, sizeof(opts->bstype));
    3972         988 :         bs->esnap_bs_dev_create = opts->esnap_bs_dev_create;
    3973         988 :         bs->esnap_ctx = opts->esnap_ctx;
    3974             : 
    3975             :         /* The metadata is assumed to be at least 1 page */
    3976         988 :         bs->used_md_pages = spdk_bit_array_create(1);
    3977         988 :         bs->used_blobids = spdk_bit_array_create(0);
    3978         988 :         bs->open_blobids = spdk_bit_array_create(0);
    3979             : 
    3980         988 :         spdk_spin_init(&bs->used_lock);
    3981             : 
    3982         988 :         spdk_io_device_register(bs, bs_channel_create, bs_channel_destroy,
    3983             :                                 sizeof(struct spdk_bs_channel), "blobstore");
    3984         988 :         rc = bs_register_md_thread(bs);
    3985         988 :         if (rc == -1) {
    3986           0 :                 spdk_io_device_unregister(bs, NULL);
    3987           0 :                 spdk_spin_destroy(&bs->used_lock);
    3988           0 :                 spdk_bit_array_free(&bs->open_blobids);
    3989           0 :                 spdk_bit_array_free(&bs->used_blobids);
    3990           0 :                 spdk_bit_array_free(&bs->used_md_pages);
    3991           0 :                 spdk_bit_array_free(&ctx->used_clusters);
    3992           0 :                 spdk_free(ctx->super);
    3993           0 :                 free(ctx);
    3994           0 :                 free(bs);
    3995             :                 /* FIXME: this is a lie but don't know how to get a proper error code here */
    3996           0 :                 return -ENOMEM;
    3997             :         }
    3998             : 
    3999         988 :         *_ctx = ctx;
    4000         988 :         *_bs = bs;
    4001         988 :         return 0;
    4002         988 : }
    4003             : 
    4004             : static void
    4005          40 : bs_load_ctx_fail(struct spdk_bs_load_ctx *ctx, int bserrno)
    4006             : {
    4007          40 :         assert(bserrno != 0);
    4008             : 
    4009          40 :         spdk_free(ctx->mask);
    4010          40 :         spdk_free(ctx->super);
    4011          40 :         bs_sequence_finish(ctx->seq, bserrno);
    4012          40 :         bs_free(ctx->bs);
    4013          40 :         spdk_bit_array_free(&ctx->used_clusters);
    4014          40 :         free(ctx);
    4015          40 : }
    4016             : 
    4017             : static void
    4018        1031 : bs_write_super(spdk_bs_sequence_t *seq, struct spdk_blob_store *bs,
    4019             :                struct spdk_bs_super_block *super, spdk_bs_sequence_cpl cb_fn, void *cb_arg)
    4020             : {
    4021             :         /* Update the values in the super block */
    4022        1031 :         super->super_blob = bs->super_blob;
    4023        1031 :         memcpy(&super->bstype, &bs->bstype, sizeof(bs->bstype));
    4024        1031 :         super->crc = blob_md_page_calc_crc(super);
    4025        2062 :         bs_sequence_write_dev(seq, super, bs_page_to_lba(bs, 0),
    4026        1031 :                               bs_byte_to_lba(bs, sizeof(*super)),
    4027        1031 :                               cb_fn, cb_arg);
    4028        1031 : }
    4029             : 
    4030             : static void
    4031         953 : bs_write_used_clusters(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
    4032             : {
    4033         953 :         struct spdk_bs_load_ctx *ctx = arg;
    4034             :         uint64_t        mask_size, lba, lba_count;
    4035             : 
    4036             :         /* Write out the used clusters mask */
    4037         953 :         mask_size = ctx->super->used_cluster_mask_len * ctx->bs->md_page_size;
    4038         953 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL,
    4039             :                                  SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    4040         953 :         if (!ctx->mask) {
    4041           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4042           0 :                 return;
    4043             :         }
    4044             : 
    4045         953 :         ctx->mask->type = SPDK_MD_MASK_TYPE_USED_CLUSTERS;
    4046         953 :         ctx->mask->length = ctx->bs->total_clusters;
    4047             :         /* We could get here through the normal unload path, or through dirty
    4048             :          * shutdown recovery.  For the normal unload path, we use the mask from
    4049             :          * the bit pool.  For dirty shutdown recovery, we don't have a bit pool yet -
    4050             :          * only the bit array from the load ctx.
    4051             :          */
    4052         953 :         if (ctx->bs->used_clusters) {
    4053         819 :                 assert(ctx->mask->length == spdk_bit_pool_capacity(ctx->bs->used_clusters));
    4054         819 :                 spdk_bit_pool_store_mask(ctx->bs->used_clusters, ctx->mask->mask);
    4055         819 :         } else {
    4056         134 :                 assert(ctx->mask->length == spdk_bit_array_capacity(ctx->used_clusters));
    4057         134 :                 spdk_bit_array_store_mask(ctx->used_clusters, ctx->mask->mask);
    4058             :         }
    4059         953 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
    4060         953 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
    4061         953 :         bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
    4062         953 : }
    4063             : 
    4064             : static void
    4065         953 : bs_write_used_md(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
    4066             : {
    4067         953 :         struct spdk_bs_load_ctx *ctx = arg;
    4068             :         uint64_t        mask_size, lba, lba_count;
    4069             : 
    4070         953 :         mask_size = ctx->super->used_page_mask_len * ctx->bs->md_page_size;
    4071         953 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL,
    4072             :                                  SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    4073         953 :         if (!ctx->mask) {
    4074           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4075           0 :                 return;
    4076             :         }
    4077             : 
    4078         953 :         ctx->mask->type = SPDK_MD_MASK_TYPE_USED_PAGES;
    4079         953 :         ctx->mask->length = ctx->super->md_len;
    4080         953 :         assert(ctx->mask->length == spdk_bit_array_capacity(ctx->bs->used_md_pages));
    4081             : 
    4082         953 :         spdk_bit_array_store_mask(ctx->bs->used_md_pages, ctx->mask->mask);
    4083         953 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_start);
    4084         953 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_len);
    4085         953 :         bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
    4086         953 : }
    4087             : 
    4088             : static void
    4089         953 : bs_write_used_blobids(spdk_bs_sequence_t *seq, void *arg, spdk_bs_sequence_cpl cb_fn)
    4090             : {
    4091         953 :         struct spdk_bs_load_ctx *ctx = arg;
    4092             :         uint64_t        mask_size, lba, lba_count;
    4093             : 
    4094         953 :         if (ctx->super->used_blobid_mask_len == 0) {
    4095             :                 /*
    4096             :                  * This is a pre-v3 on-disk format where the blobid mask does not get
    4097             :                  *  written to disk.
    4098             :                  */
    4099          30 :                 cb_fn(seq, arg, 0);
    4100          30 :                 return;
    4101             :         }
    4102             : 
    4103         923 :         mask_size = ctx->super->used_blobid_mask_len * ctx->bs->md_page_size;
    4104         923 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_NUMA_ID_ANY,
    4105             :                                  SPDK_MALLOC_DMA);
    4106         923 :         if (!ctx->mask) {
    4107           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4108           0 :                 return;
    4109             :         }
    4110             : 
    4111         923 :         ctx->mask->type = SPDK_MD_MASK_TYPE_USED_BLOBIDS;
    4112         923 :         ctx->mask->length = ctx->super->md_len;
    4113         923 :         assert(ctx->mask->length == spdk_bit_array_capacity(ctx->bs->used_blobids));
    4114             : 
    4115         923 :         spdk_bit_array_store_mask(ctx->bs->used_blobids, ctx->mask->mask);
    4116         923 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_start);
    4117         923 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_len);
    4118         923 :         bs_sequence_write_dev(seq, ctx->mask, lba, lba_count, cb_fn, arg);
    4119         953 : }
    4120             : 
    4121             : static void
    4122         882 : blob_set_thin_provision(struct spdk_blob *blob)
    4123             : {
    4124         882 :         blob_verify_md_op(blob);
    4125         882 :         blob->invalid_flags |= SPDK_BLOB_THIN_PROV;
    4126         882 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    4127         882 : }
    4128             : 
    4129             : static void
    4130        2617 : blob_set_clear_method(struct spdk_blob *blob, enum blob_clear_method clear_method)
    4131             : {
    4132        2617 :         blob_verify_md_op(blob);
    4133        2617 :         blob->clear_method = clear_method;
    4134        2617 :         blob->md_ro_flags |= (clear_method << SPDK_BLOB_CLEAR_METHOD_SHIFT);
    4135        2617 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    4136        2617 : }
    4137             : 
    4138             : static void bs_load_iter(void *arg, struct spdk_blob *blob, int bserrno);
    4139             : 
    4140             : static void
    4141          30 : bs_delete_corrupted_blob_cpl(void *cb_arg, int bserrno)
    4142             : {
    4143          30 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4144             :         spdk_blob_id id;
    4145             :         int64_t page_num;
    4146             : 
    4147             :         /* Iterate to next blob (we can't use spdk_bs_iter_next function as our
    4148             :          * last blob has been removed */
    4149          30 :         page_num = bs_blobid_to_page(ctx->blobid);
    4150          30 :         page_num++;
    4151          30 :         page_num = spdk_bit_array_find_first_set(ctx->bs->used_blobids, page_num);
    4152          30 :         if (page_num >= spdk_bit_array_capacity(ctx->bs->used_blobids)) {
    4153          30 :                 bs_load_iter(ctx, NULL, -ENOENT);
    4154          30 :                 return;
    4155             :         }
    4156             : 
    4157           0 :         id = bs_page_to_blobid(page_num);
    4158             : 
    4159           0 :         spdk_bs_open_blob(ctx->bs, id, bs_load_iter, ctx);
    4160          30 : }
    4161             : 
    4162             : static void
    4163          30 : bs_delete_corrupted_close_cb(void *cb_arg, int bserrno)
    4164             : {
    4165          30 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4166             : 
    4167          30 :         if (bserrno != 0) {
    4168           0 :                 SPDK_ERRLOG("Failed to close corrupted blob\n");
    4169           0 :                 spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4170           0 :                 return;
    4171             :         }
    4172             : 
    4173          30 :         spdk_bs_delete_blob(ctx->bs, ctx->blobid, bs_delete_corrupted_blob_cpl, ctx);
    4174          30 : }
    4175             : 
    4176             : static void
    4177          30 : bs_delete_corrupted_blob(void *cb_arg, int bserrno)
    4178             : {
    4179          30 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4180             :         uint64_t i;
    4181             : 
    4182          30 :         if (bserrno != 0) {
    4183           0 :                 SPDK_ERRLOG("Failed to close clone of a corrupted blob\n");
    4184           0 :                 spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4185           0 :                 return;
    4186             :         }
    4187             : 
    4188             :         /* Snapshot and clone have the same copy of cluster map and extent pages
    4189             :          * at this point. Let's clear both for snapshot now,
    4190             :          * so that it won't be cleared for clone later when we remove snapshot.
    4191             :          * Also set thin provision to pass data corruption check */
    4192         330 :         for (i = 0; i < ctx->blob->active.num_clusters; i++) {
    4193         300 :                 ctx->blob->active.clusters[i] = 0;
    4194         300 :         }
    4195          48 :         for (i = 0; i < ctx->blob->active.num_extent_pages; i++) {
    4196          18 :                 ctx->blob->active.extent_pages[i] = 0;
    4197          18 :         }
    4198             : 
    4199          30 :         ctx->blob->active.num_allocated_clusters = 0;
    4200             : 
    4201          30 :         ctx->blob->md_ro = false;
    4202             : 
    4203          30 :         blob_set_thin_provision(ctx->blob);
    4204             : 
    4205          30 :         ctx->blobid = ctx->blob->id;
    4206             : 
    4207          30 :         spdk_blob_close(ctx->blob, bs_delete_corrupted_close_cb, ctx);
    4208          30 : }
    4209             : 
    4210             : static void
    4211          15 : bs_update_corrupted_blob(void *cb_arg, int bserrno)
    4212             : {
    4213          15 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4214             : 
    4215          15 :         if (bserrno != 0) {
    4216           0 :                 SPDK_ERRLOG("Failed to close clone of a corrupted blob\n");
    4217           0 :                 spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4218           0 :                 return;
    4219             :         }
    4220             : 
    4221          15 :         ctx->blob->md_ro = false;
    4222          15 :         blob_remove_xattr(ctx->blob, SNAPSHOT_PENDING_REMOVAL, true);
    4223          15 :         blob_remove_xattr(ctx->blob, SNAPSHOT_IN_PROGRESS, true);
    4224          15 :         spdk_blob_set_read_only(ctx->blob);
    4225             : 
    4226          15 :         if (ctx->iter_cb_fn) {
    4227           0 :                 ctx->iter_cb_fn(ctx->iter_cb_arg, ctx->blob, 0);
    4228           0 :         }
    4229          15 :         bs_blob_list_add(ctx->blob);
    4230             : 
    4231          15 :         spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4232          15 : }
    4233             : 
    4234             : static void
    4235          45 : bs_examine_clone(void *cb_arg, struct spdk_blob *blob, int bserrno)
    4236             : {
    4237          45 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4238             : 
    4239          45 :         if (bserrno != 0) {
    4240           0 :                 SPDK_ERRLOG("Failed to open clone of a corrupted blob\n");
    4241           0 :                 spdk_bs_iter_next(ctx->bs, ctx->blob, bs_load_iter, ctx);
    4242           0 :                 return;
    4243             :         }
    4244             : 
    4245          45 :         if (blob->parent_id == ctx->blob->id) {
    4246             :                 /* Power failure occurred before updating clone (snapshot delete case)
    4247             :                  * or after updating clone (creating snapshot case) - keep snapshot */
    4248          15 :                 spdk_blob_close(blob, bs_update_corrupted_blob, ctx);
    4249          15 :         } else {
    4250             :                 /* Power failure occurred after updating clone (snapshot delete case)
    4251             :                  * or before updating clone (creating snapshot case) - remove snapshot */
    4252          30 :                 spdk_blob_close(blob, bs_delete_corrupted_blob, ctx);
    4253             :         }
    4254          45 : }
    4255             : 
    4256             : static void
    4257         903 : bs_load_iter(void *arg, struct spdk_blob *blob, int bserrno)
    4258             : {
    4259         903 :         struct spdk_bs_load_ctx *ctx = arg;
    4260             :         const void *value;
    4261             :         size_t len;
    4262         903 :         int rc = 0;
    4263             : 
    4264         903 :         if (bserrno == 0) {
    4265             :                 /* Examine blob if it is corrupted after power failure. Fix
    4266             :                  * the ones that can be fixed and remove any other corrupted
    4267             :                  * ones. If it is not corrupted just process it */
    4268         552 :                 rc = blob_get_xattr_value(blob, SNAPSHOT_PENDING_REMOVAL, &value, &len, true);
    4269         552 :                 if (rc != 0) {
    4270         527 :                         rc = blob_get_xattr_value(blob, SNAPSHOT_IN_PROGRESS, &value, &len, true);
    4271         527 :                         if (rc != 0) {
    4272             :                                 /* Not corrupted - process it and continue with iterating through blobs */
    4273         507 :                                 if (ctx->iter_cb_fn) {
    4274          42 :                                         ctx->iter_cb_fn(ctx->iter_cb_arg, blob, 0);
    4275          42 :                                 }
    4276         507 :                                 bs_blob_list_add(blob);
    4277         507 :                                 spdk_bs_iter_next(ctx->bs, blob, bs_load_iter, ctx);
    4278         507 :                                 return;
    4279             :                         }
    4280             : 
    4281          20 :                 }
    4282             : 
    4283          45 :                 assert(len == sizeof(spdk_blob_id));
    4284             : 
    4285          45 :                 ctx->blob = blob;
    4286             : 
    4287             :                 /* Open clone to check if we are able to fix this blob or should we remove it */
    4288          45 :                 spdk_bs_open_blob(ctx->bs, *(spdk_blob_id *)value, bs_examine_clone, ctx);
    4289          45 :                 return;
    4290         351 :         } else if (bserrno == -ENOENT) {
    4291         351 :                 bserrno = 0;
    4292         351 :         } else {
    4293             :                 /*
    4294             :                  * This case needs to be looked at further.  Same problem
    4295             :                  *  exists with applications that rely on explicit blob
    4296             :                  *  iteration.  We should just skip the blob that failed
    4297             :                  *  to load and continue on to the next one.
    4298             :                  */
    4299           0 :                 SPDK_ERRLOG("Error in iterating blobs\n");
    4300             :         }
    4301             : 
    4302         351 :         ctx->iter_cb_fn = NULL;
    4303             : 
    4304         351 :         spdk_free(ctx->super);
    4305         351 :         bs_sequence_finish(ctx->seq, bserrno);
    4306         351 :         free(ctx);
    4307         903 : }
    4308             : 
    4309             : static void bs_dump_read_md_page(spdk_bs_sequence_t *seq, void *cb_arg);
    4310             : 
    4311             : static void
    4312         351 : bs_load_complete(struct spdk_bs_load_ctx *ctx)
    4313             : {
    4314         351 :         ctx->bs->used_clusters = spdk_bit_pool_create_from_array(ctx->used_clusters);
    4315         351 :         if (ctx->dumping) {
    4316           0 :                 bs_dump_read_md_page(ctx->seq, ctx);
    4317           0 :                 return;
    4318             :         }
    4319         351 :         spdk_bs_iter_first(ctx->bs, bs_load_iter, ctx);
    4320         351 : }
    4321             : 
    4322             : static void
    4323         217 : bs_load_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4324             : {
    4325         217 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4326             :         int rc;
    4327             : 
    4328             :         /* The type must be correct */
    4329         217 :         assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_BLOBIDS);
    4330             : 
    4331             :         /* The length of the mask (in bits) must not be greater than
    4332             :          * the length of the buffer (converted to bits) */
    4333         217 :         assert(ctx->mask->length <= (ctx->super->used_blobid_mask_len * ctx->super->md_page_size * 8));
    4334             : 
    4335             :         /* The length of the mask must be exactly equal to the size
    4336             :          * (in pages) of the metadata region */
    4337         217 :         assert(ctx->mask->length == ctx->super->md_len);
    4338             : 
    4339         217 :         rc = spdk_bit_array_resize(&ctx->bs->used_blobids, ctx->mask->length);
    4340         217 :         if (rc < 0) {
    4341           0 :                 bs_load_ctx_fail(ctx, rc);
    4342           0 :                 return;
    4343             :         }
    4344             : 
    4345         217 :         spdk_bit_array_load_mask(ctx->bs->used_blobids, ctx->mask->mask);
    4346         217 :         spdk_free(ctx->mask);
    4347             : 
    4348         217 :         bs_load_complete(ctx);
    4349         217 : }
    4350             : 
    4351             : static void
    4352         217 : bs_load_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4353             : {
    4354         217 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4355             :         uint64_t                lba, lba_count, mask_size;
    4356             :         int                     rc;
    4357             : 
    4358         217 :         if (bserrno != 0) {
    4359           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4360           0 :                 return;
    4361             :         }
    4362             : 
    4363             :         /* The type must be correct */
    4364         217 :         assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_CLUSTERS);
    4365             :         /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
    4366         217 :         assert(ctx->mask->length <= (ctx->super->used_cluster_mask_len * sizeof(
    4367             :                                              struct spdk_blob_md_page) * 8));
    4368             :         /*
    4369             :          * The length of the mask must be equal to or larger than the total number of clusters. It may be
    4370             :          * larger than the total number of clusters due to a failure spdk_bs_grow.
    4371             :          */
    4372         217 :         assert(ctx->mask->length >= ctx->bs->total_clusters);
    4373         217 :         if (ctx->mask->length > ctx->bs->total_clusters) {
    4374           5 :                 SPDK_WARNLOG("Shrink the used_custers mask length to total_clusters");
    4375           5 :                 ctx->mask->length = ctx->bs->total_clusters;
    4376           5 :         }
    4377             : 
    4378         217 :         rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->mask->length);
    4379         217 :         if (rc < 0) {
    4380           0 :                 spdk_free(ctx->mask);
    4381           0 :                 bs_load_ctx_fail(ctx, rc);
    4382           0 :                 return;
    4383             :         }
    4384             : 
    4385         217 :         spdk_bit_array_load_mask(ctx->used_clusters, ctx->mask->mask);
    4386         217 :         ctx->bs->num_free_clusters = spdk_bit_array_count_clear(ctx->used_clusters);
    4387         217 :         assert(ctx->bs->num_free_clusters <= ctx->bs->total_clusters);
    4388             : 
    4389         217 :         spdk_free(ctx->mask);
    4390             : 
    4391             :         /* Read the used blobids mask */
    4392         217 :         mask_size = ctx->super->used_blobid_mask_len * ctx->super->md_page_size;
    4393         217 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_NUMA_ID_ANY,
    4394             :                                  SPDK_MALLOC_DMA);
    4395         217 :         if (!ctx->mask) {
    4396           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4397           0 :                 return;
    4398             :         }
    4399         217 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_start);
    4400         217 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_blobid_mask_len);
    4401         434 :         bs_sequence_read_dev(seq, ctx->mask, lba, lba_count,
    4402         217 :                              bs_load_used_blobids_cpl, ctx);
    4403         217 : }
    4404             : 
    4405             : static void
    4406         222 : bs_load_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4407             : {
    4408         222 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4409             :         uint64_t                lba, lba_count, mask_size;
    4410             :         int                     rc;
    4411             : 
    4412         222 :         if (bserrno != 0) {
    4413           5 :                 bs_load_ctx_fail(ctx, bserrno);
    4414           5 :                 return;
    4415             :         }
    4416             : 
    4417             :         /* The type must be correct */
    4418         217 :         assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_PAGES);
    4419             :         /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
    4420         217 :         assert(ctx->mask->length <= (ctx->super->used_page_mask_len * ctx->super->md_page_size *
    4421             :                                      8));
    4422             :         /* The length of the mask must be exactly equal to the size (in pages) of the metadata region */
    4423         217 :         if (ctx->mask->length != ctx->super->md_len) {
    4424           0 :                 SPDK_ERRLOG("mismatched md_len in used_pages mask: "
    4425             :                             "mask->length=%" PRIu32 " super->md_len=%" PRIu32 "\n",
    4426             :                             ctx->mask->length, ctx->super->md_len);
    4427           0 :                 assert(false);
    4428             :         }
    4429             : 
    4430         217 :         rc = spdk_bit_array_resize(&ctx->bs->used_md_pages, ctx->mask->length);
    4431         217 :         if (rc < 0) {
    4432           0 :                 bs_load_ctx_fail(ctx, rc);
    4433           0 :                 return;
    4434             :         }
    4435             : 
    4436         217 :         spdk_bit_array_load_mask(ctx->bs->used_md_pages, ctx->mask->mask);
    4437         217 :         spdk_free(ctx->mask);
    4438             : 
    4439             :         /* Read the used clusters mask */
    4440         217 :         mask_size = ctx->super->used_cluster_mask_len * ctx->super->md_page_size;
    4441         217 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_NUMA_ID_ANY,
    4442             :                                  SPDK_MALLOC_DMA);
    4443         217 :         if (!ctx->mask) {
    4444           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4445           0 :                 return;
    4446             :         }
    4447         217 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
    4448         217 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
    4449         434 :         bs_sequence_read_dev(seq, ctx->mask, lba, lba_count,
    4450         217 :                              bs_load_used_clusters_cpl, ctx);
    4451         222 : }
    4452             : 
    4453             : static void
    4454         227 : bs_load_read_used_pages(struct spdk_bs_load_ctx *ctx)
    4455             : {
    4456             :         uint64_t lba, lba_count, mask_size;
    4457             : 
    4458             :         /* Read the used pages mask */
    4459         227 :         mask_size = ctx->super->used_page_mask_len * ctx->super->md_page_size;
    4460         227 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL,
    4461             :                                  SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    4462         227 :         if (!ctx->mask) {
    4463           5 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4464           5 :                 return;
    4465             :         }
    4466             : 
    4467         222 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_start);
    4468         222 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_page_mask_len);
    4469         444 :         bs_sequence_read_dev(ctx->seq, ctx->mask, lba, lba_count,
    4470         222 :                              bs_load_used_pages_cpl, ctx);
    4471         227 : }
    4472             : 
    4473             : static int
    4474         323 : bs_load_replay_md_parse_page(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_page *page)
    4475             : {
    4476         323 :         struct spdk_blob_store *bs = ctx->bs;
    4477             :         struct spdk_blob_md_descriptor *desc;
    4478         323 :         size_t  cur_desc = 0;
    4479             : 
    4480         323 :         desc = (struct spdk_blob_md_descriptor *)page->descriptors;
    4481         933 :         while (cur_desc < sizeof(page->descriptors)) {
    4482         933 :                 if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
    4483         298 :                         if (desc->length == 0) {
    4484             :                                 /* If padding and length are 0, this terminates the page */
    4485         298 :                                 break;
    4486             :                         }
    4487         635 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE) {
    4488             :                         struct spdk_blob_md_descriptor_extent_rle       *desc_extent_rle;
    4489             :                         unsigned int                            i, j;
    4490          68 :                         unsigned int                            cluster_count = 0;
    4491             :                         uint32_t                                cluster_idx;
    4492             : 
    4493          68 :                         desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)desc;
    4494             : 
    4495         136 :                         for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
    4496         828 :                                 for (j = 0; j < desc_extent_rle->extents[i].length; j++) {
    4497         760 :                                         cluster_idx = desc_extent_rle->extents[i].cluster_idx;
    4498             :                                         /*
    4499             :                                          * cluster_idx = 0 means an unallocated cluster - don't mark that
    4500             :                                          * in the used cluster map.
    4501             :                                          */
    4502         760 :                                         if (cluster_idx != 0) {
    4503         540 :                                                 SPDK_NOTICELOG("Recover: cluster %" PRIu32 "\n", cluster_idx + j);
    4504         540 :                                                 spdk_bit_array_set(ctx->used_clusters, cluster_idx + j);
    4505         540 :                                                 if (bs->num_free_clusters == 0) {
    4506           0 :                                                         return -ENOSPC;
    4507             :                                                 }
    4508         540 :                                                 bs->num_free_clusters--;
    4509         540 :                                         }
    4510         760 :                                         cluster_count++;
    4511         760 :                                 }
    4512          68 :                         }
    4513          68 :                         if (cluster_count == 0) {
    4514           0 :                                 return -EINVAL;
    4515             :                         }
    4516         635 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
    4517             :                         struct spdk_blob_md_descriptor_extent_page      *desc_extent;
    4518             :                         uint32_t                                        i;
    4519          78 :                         uint32_t                                        cluster_count = 0;
    4520             :                         uint32_t                                        cluster_idx;
    4521             :                         size_t                                          cluster_idx_length;
    4522             : 
    4523          78 :                         desc_extent = (struct spdk_blob_md_descriptor_extent_page *)desc;
    4524          78 :                         cluster_idx_length = desc_extent->length - sizeof(desc_extent->start_cluster_idx);
    4525             : 
    4526          78 :                         if (desc_extent->length <= sizeof(desc_extent->start_cluster_idx) ||
    4527          78 :                             (cluster_idx_length % sizeof(desc_extent->cluster_idx[0]) != 0)) {
    4528           0 :                                 return -EINVAL;
    4529             :                         }
    4530             : 
    4531         978 :                         for (i = 0; i < cluster_idx_length / sizeof(desc_extent->cluster_idx[0]); i++) {
    4532         900 :                                 cluster_idx = desc_extent->cluster_idx[i];
    4533             :                                 /*
    4534             :                                  * cluster_idx = 0 means an unallocated cluster - don't mark that
    4535             :                                  * in the used cluster map.
    4536             :                                  */
    4537         900 :                                 if (cluster_idx != 0) {
    4538         900 :                                         if (cluster_idx < desc_extent->start_cluster_idx &&
    4539           0 :                                             cluster_idx >= desc_extent->start_cluster_idx + cluster_count) {
    4540           0 :                                                 return -EINVAL;
    4541             :                                         }
    4542         900 :                                         spdk_bit_array_set(ctx->used_clusters, cluster_idx);
    4543         900 :                                         if (bs->num_free_clusters == 0) {
    4544           0 :                                                 return -ENOSPC;
    4545             :                                         }
    4546         900 :                                         bs->num_free_clusters--;
    4547         900 :                                 }
    4548         900 :                                 cluster_count++;
    4549         900 :                         }
    4550             : 
    4551          78 :                         if (cluster_count == 0) {
    4552           0 :                                 return -EINVAL;
    4553             :                         }
    4554         567 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
    4555             :                         /* Skip this item */
    4556         489 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
    4557             :                         /* Skip this item */
    4558         394 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
    4559             :                         /* Skip this item */
    4560         318 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
    4561             :                         struct spdk_blob_md_descriptor_extent_table *desc_extent_table;
    4562         123 :                         uint32_t num_extent_pages = ctx->num_extent_pages;
    4563             :                         uint32_t i;
    4564             :                         size_t extent_pages_length;
    4565             :                         void *tmp;
    4566             : 
    4567         123 :                         desc_extent_table = (struct spdk_blob_md_descriptor_extent_table *)desc;
    4568         123 :                         extent_pages_length = desc_extent_table->length - sizeof(desc_extent_table->num_clusters);
    4569             : 
    4570         123 :                         if (desc_extent_table->length == 0 ||
    4571         123 :                             (extent_pages_length % sizeof(desc_extent_table->extent_page[0]) != 0)) {
    4572           0 :                                 return -EINVAL;
    4573             :                         }
    4574             : 
    4575         240 :                         for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
    4576         117 :                                 if (desc_extent_table->extent_page[i].page_idx != 0) {
    4577          78 :                                         if (desc_extent_table->extent_page[i].num_pages != 1) {
    4578           0 :                                                 return -EINVAL;
    4579             :                                         }
    4580          78 :                                         num_extent_pages += 1;
    4581          78 :                                 }
    4582         117 :                         }
    4583             : 
    4584         123 :                         if (num_extent_pages > 0) {
    4585          78 :                                 tmp = realloc(ctx->extent_page_num, num_extent_pages * sizeof(uint32_t));
    4586          78 :                                 if (tmp == NULL) {
    4587           0 :                                         return -ENOMEM;
    4588             :                                 }
    4589          78 :                                 ctx->extent_page_num = tmp;
    4590             : 
    4591             :                                 /* Extent table entries contain md page numbers for extent pages.
    4592             :                                  * Zeroes represent unallocated extent pages, those are run-length-encoded.
    4593             :                                  */
    4594         156 :                                 for (i = 0; i < extent_pages_length / sizeof(desc_extent_table->extent_page[0]); i++) {
    4595          78 :                                         if (desc_extent_table->extent_page[i].page_idx != 0) {
    4596          78 :                                                 ctx->extent_page_num[ctx->num_extent_pages] = desc_extent_table->extent_page[i].page_idx;
    4597          78 :                                                 ctx->num_extent_pages += 1;
    4598          78 :                                         }
    4599          78 :                                 }
    4600          78 :                         }
    4601         123 :                 } else {
    4602             :                         /* Error */
    4603           0 :                         return -EINVAL;
    4604             :                 }
    4605             :                 /* Advance to the next descriptor */
    4606         635 :                 cur_desc += sizeof(*desc) + desc->length;
    4607         635 :                 if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
    4608          25 :                         break;
    4609             :                 }
    4610         610 :                 desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
    4611             :         }
    4612         323 :         return 0;
    4613         323 : }
    4614             : 
    4615             : static bool
    4616        1884 : bs_load_cur_extent_page_valid(struct spdk_blob_md_page *page)
    4617             : {
    4618             :         uint32_t crc;
    4619        1884 :         struct spdk_blob_md_descriptor *desc = (struct spdk_blob_md_descriptor *)page->descriptors;
    4620             :         size_t desc_len;
    4621             : 
    4622        1884 :         crc = blob_md_page_calc_crc(page);
    4623        1884 :         if (crc != page->crc) {
    4624           0 :                 return false;
    4625             :         }
    4626             : 
    4627             :         /* Extent page should always be of sequence num 0. */
    4628        1884 :         if (page->sequence_num != 0) {
    4629          55 :                 return false;
    4630             :         }
    4631             : 
    4632             :         /* Descriptor type must be EXTENT_PAGE. */
    4633        1829 :         if (desc->type != SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
    4634         195 :                 return false;
    4635             :         }
    4636             : 
    4637             :         /* Descriptor length cannot exceed the page. */
    4638        1634 :         desc_len = sizeof(*desc) + desc->length;
    4639        1634 :         if (desc_len > sizeof(page->descriptors)) {
    4640           0 :                 return false;
    4641             :         }
    4642             : 
    4643             :         /* It has to be the only descriptor in the page. */
    4644        1634 :         if (desc_len + sizeof(*desc) <= sizeof(page->descriptors)) {
    4645        1634 :                 desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + desc_len);
    4646        1634 :                 if (desc->length != 0) {
    4647           0 :                         return false;
    4648             :                 }
    4649        1634 :         }
    4650             : 
    4651        1634 :         return true;
    4652        1884 : }
    4653             : 
    4654             : static bool
    4655        8531 : bs_load_cur_md_page_valid(struct spdk_bs_load_ctx *ctx)
    4656             : {
    4657             :         uint32_t crc;
    4658        8531 :         struct spdk_blob_md_page *page = ctx->page;
    4659             : 
    4660        8531 :         crc = blob_md_page_calc_crc(page);
    4661        8531 :         if (crc != page->crc) {
    4662        8254 :                 return false;
    4663             :         }
    4664             : 
    4665             :         /* First page of a sequence should match the blobid. */
    4666         277 :         if (page->sequence_num == 0 &&
    4667         222 :             bs_page_to_blobid(ctx->cur_page) != page->id) {
    4668          27 :                 return false;
    4669             :         }
    4670         250 :         assert(bs_load_cur_extent_page_valid(page) == false);
    4671             : 
    4672         250 :         return true;
    4673        8531 : }
    4674             : 
    4675             : static void bs_load_replay_cur_md_page(struct spdk_bs_load_ctx *ctx);
    4676             : 
    4677             : static void
    4678         134 : bs_load_write_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4679             : {
    4680         134 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4681             : 
    4682         134 :         spdk_free(ctx->mask);
    4683         134 :         ctx->mask = NULL;
    4684             : 
    4685         134 :         if (bserrno != 0) {
    4686           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4687           0 :                 return;
    4688             :         }
    4689             : 
    4690         134 :         bs_load_complete(ctx);
    4691         134 : }
    4692             : 
    4693             : static void
    4694         134 : bs_load_write_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4695             : {
    4696         134 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4697             : 
    4698         134 :         spdk_free(ctx->mask);
    4699         134 :         ctx->mask = NULL;
    4700             : 
    4701         134 :         if (bserrno != 0) {
    4702           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4703           0 :                 return;
    4704             :         }
    4705             : 
    4706         134 :         bs_write_used_clusters(seq, ctx, bs_load_write_used_clusters_cpl);
    4707         134 : }
    4708             : 
    4709             : static void
    4710         134 : bs_load_write_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4711             : {
    4712         134 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4713             : 
    4714         134 :         spdk_free(ctx->mask);
    4715         134 :         ctx->mask = NULL;
    4716             : 
    4717         134 :         if (bserrno != 0) {
    4718           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4719           0 :                 return;
    4720             :         }
    4721             : 
    4722         134 :         bs_write_used_blobids(seq, ctx, bs_load_write_used_blobids_cpl);
    4723         134 : }
    4724             : 
    4725             : static void
    4726         134 : bs_load_write_used_md(struct spdk_bs_load_ctx *ctx)
    4727             : {
    4728         134 :         bs_write_used_md(ctx->seq, ctx, bs_load_write_used_pages_cpl);
    4729         134 : }
    4730             : 
    4731             : static void
    4732        8481 : bs_load_replay_md_chain_cpl(struct spdk_bs_load_ctx *ctx)
    4733             : {
    4734             :         uint64_t num_md_clusters;
    4735             :         uint64_t i;
    4736             : 
    4737        8481 :         ctx->in_page_chain = false;
    4738             : 
    4739        8481 :         do {
    4740        8576 :                 ctx->page_index++;
    4741        8576 :         } while (spdk_bit_array_get(ctx->bs->used_md_pages, ctx->page_index) == true);
    4742             : 
    4743        8481 :         if (ctx->page_index < ctx->super->md_len) {
    4744        8347 :                 ctx->cur_page = ctx->page_index;
    4745        8347 :                 bs_load_replay_cur_md_page(ctx);
    4746        8347 :         } else {
    4747             :                 /* Claim all of the clusters used by the metadata */
    4748         134 :                 num_md_clusters = spdk_divide_round_up(
    4749         134 :                                           ctx->super->md_start + ctx->super->md_len, ctx->bs->pages_per_cluster);
    4750         629 :                 for (i = 0; i < num_md_clusters; i++) {
    4751         495 :                         spdk_bit_array_set(ctx->used_clusters, i);
    4752         495 :                 }
    4753         134 :                 ctx->bs->num_free_clusters -= num_md_clusters;
    4754         134 :                 spdk_free(ctx->page);
    4755         134 :                 bs_load_write_used_md(ctx);
    4756             :         }
    4757        8481 : }
    4758             : 
    4759             : static void
    4760          78 : bs_load_replay_extent_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4761             : {
    4762          78 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4763             :         uint32_t page_num;
    4764             :         uint64_t i;
    4765             : 
    4766          78 :         if (bserrno != 0) {
    4767           0 :                 spdk_free(ctx->extent_pages);
    4768           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4769           0 :                 return;
    4770             :         }
    4771             : 
    4772         156 :         for (i = 0; i < ctx->num_extent_pages; i++) {
    4773             :                 /* Extent pages are only read when present within in chain md.
    4774             :                  * Integrity of md is not right if that page was not a valid extent page. */
    4775          78 :                 if (bs_load_cur_extent_page_valid(&ctx->extent_pages[i]) != true) {
    4776           0 :                         spdk_free(ctx->extent_pages);
    4777           0 :                         bs_load_ctx_fail(ctx, -EILSEQ);
    4778           0 :                         return;
    4779             :                 }
    4780             : 
    4781          78 :                 page_num = ctx->extent_page_num[i];
    4782          78 :                 spdk_bit_array_set(ctx->bs->used_md_pages, page_num);
    4783          78 :                 if (bs_load_replay_md_parse_page(ctx, &ctx->extent_pages[i])) {
    4784           0 :                         spdk_free(ctx->extent_pages);
    4785           0 :                         bs_load_ctx_fail(ctx, -EILSEQ);
    4786           0 :                         return;
    4787             :                 }
    4788          78 :         }
    4789             : 
    4790          78 :         spdk_free(ctx->extent_pages);
    4791          78 :         free(ctx->extent_page_num);
    4792          78 :         ctx->extent_page_num = NULL;
    4793          78 :         ctx->num_extent_pages = 0;
    4794             : 
    4795          78 :         bs_load_replay_md_chain_cpl(ctx);
    4796          78 : }
    4797             : 
    4798             : static void
    4799          78 : bs_load_replay_extent_pages(struct spdk_bs_load_ctx *ctx)
    4800             : {
    4801             :         spdk_bs_batch_t *batch;
    4802             :         uint32_t page;
    4803             :         uint64_t lba;
    4804             :         uint64_t i;
    4805             : 
    4806          78 :         ctx->extent_pages = spdk_zmalloc(ctx->super->md_page_size * ctx->num_extent_pages, 0,
    4807             :                                          NULL, SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    4808          78 :         if (!ctx->extent_pages) {
    4809           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4810           0 :                 return;
    4811             :         }
    4812             : 
    4813          78 :         batch = bs_sequence_to_batch(ctx->seq, bs_load_replay_extent_page_cpl, ctx);
    4814             : 
    4815         156 :         for (i = 0; i < ctx->num_extent_pages; i++) {
    4816          78 :                 page = ctx->extent_page_num[i];
    4817          78 :                 assert(page < ctx->super->md_len);
    4818          78 :                 lba = bs_md_page_to_lba(ctx->bs, page);
    4819         156 :                 bs_batch_read_dev(batch, &ctx->extent_pages[i], lba,
    4820          78 :                                   bs_byte_to_lba(ctx->bs, ctx->super->md_page_size));
    4821          78 :         }
    4822             : 
    4823          78 :         bs_batch_close(batch);
    4824          78 : }
    4825             : 
    4826             : static void
    4827        8531 : bs_load_replay_md_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4828             : {
    4829        8531 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4830             :         uint32_t page_num;
    4831             :         struct spdk_blob_md_page *page;
    4832             : 
    4833        8531 :         if (bserrno != 0) {
    4834           0 :                 bs_load_ctx_fail(ctx, bserrno);
    4835           0 :                 return;
    4836             :         }
    4837             : 
    4838        8531 :         page_num = ctx->cur_page;
    4839        8531 :         page = ctx->page;
    4840        8531 :         if (bs_load_cur_md_page_valid(ctx) == true) {
    4841         250 :                 if (page->sequence_num == 0 || ctx->in_page_chain == true) {
    4842         245 :                         spdk_spin_lock(&ctx->bs->used_lock);
    4843         245 :                         bs_claim_md_page(ctx->bs, page_num);
    4844         245 :                         spdk_spin_unlock(&ctx->bs->used_lock);
    4845         245 :                         if (page->sequence_num == 0) {
    4846         195 :                                 SPDK_NOTICELOG("Recover: blob 0x%" PRIx32 "\n", page_num);
    4847         195 :                                 spdk_bit_array_set(ctx->bs->used_blobids, page_num);
    4848         195 :                         }
    4849         245 :                         if (bs_load_replay_md_parse_page(ctx, page)) {
    4850           0 :                                 bs_load_ctx_fail(ctx, -EILSEQ);
    4851           0 :                                 return;
    4852             :                         }
    4853         245 :                         if (page->next != SPDK_INVALID_MD_PAGE) {
    4854          50 :                                 ctx->in_page_chain = true;
    4855          50 :                                 ctx->cur_page = page->next;
    4856          50 :                                 bs_load_replay_cur_md_page(ctx);
    4857          50 :                                 return;
    4858             :                         }
    4859         195 :                         if (ctx->num_extent_pages != 0) {
    4860          78 :                                 bs_load_replay_extent_pages(ctx);
    4861          78 :                                 return;
    4862             :                         }
    4863         117 :                 }
    4864         122 :         }
    4865        8403 :         bs_load_replay_md_chain_cpl(ctx);
    4866        8531 : }
    4867             : 
    4868             : static void
    4869        8531 : bs_load_replay_cur_md_page(struct spdk_bs_load_ctx *ctx)
    4870             : {
    4871             :         uint64_t lba;
    4872             : 
    4873        8531 :         assert(ctx->cur_page < ctx->super->md_len);
    4874        8531 :         lba = bs_md_page_to_lba(ctx->bs, ctx->cur_page);
    4875       17062 :         bs_sequence_read_dev(ctx->seq, ctx->page, lba,
    4876        8531 :                              bs_byte_to_lba(ctx->bs, ctx->super->md_page_size),
    4877        8531 :                              bs_load_replay_md_cpl, ctx);
    4878        8531 : }
    4879             : 
    4880             : static void
    4881         134 : bs_load_replay_md(struct spdk_bs_load_ctx *ctx)
    4882             : {
    4883         134 :         ctx->page_index = 0;
    4884         134 :         ctx->cur_page = 0;
    4885         134 :         ctx->page = spdk_zmalloc(ctx->bs->md_page_size, 0,
    4886             :                                  NULL, SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    4887         134 :         if (!ctx->page) {
    4888           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4889           0 :                 return;
    4890             :         }
    4891         134 :         bs_load_replay_cur_md_page(ctx);
    4892         134 : }
    4893             : 
    4894             : static void
    4895         134 : bs_recover(struct spdk_bs_load_ctx *ctx)
    4896             : {
    4897             :         int             rc;
    4898             : 
    4899         134 :         SPDK_NOTICELOG("Performing recovery on blobstore\n");
    4900         134 :         rc = spdk_bit_array_resize(&ctx->bs->used_md_pages, ctx->super->md_len);
    4901         134 :         if (rc < 0) {
    4902           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4903           0 :                 return;
    4904             :         }
    4905             : 
    4906         134 :         rc = spdk_bit_array_resize(&ctx->bs->used_blobids, ctx->super->md_len);
    4907         134 :         if (rc < 0) {
    4908           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4909           0 :                 return;
    4910             :         }
    4911             : 
    4912         134 :         rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->bs->total_clusters);
    4913         134 :         if (rc < 0) {
    4914           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4915           0 :                 return;
    4916             :         }
    4917             : 
    4918         134 :         rc = spdk_bit_array_resize(&ctx->bs->open_blobids, ctx->super->md_len);
    4919         134 :         if (rc < 0) {
    4920           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    4921           0 :                 return;
    4922             :         }
    4923             : 
    4924         134 :         ctx->bs->num_free_clusters = ctx->bs->total_clusters;
    4925         134 :         bs_load_replay_md(ctx);
    4926         134 : }
    4927             : 
    4928             : static int
    4929         356 : bs_parse_super(struct spdk_bs_load_ctx *ctx)
    4930             : {
    4931             :         int rc;
    4932             : 
    4933         356 :         if (ctx->super->size == 0) {
    4934          10 :                 ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    4935          10 :         }
    4936             : 
    4937         356 :         if (ctx->super->io_unit_size == 0) {
    4938          10 :                 ctx->super->io_unit_size = SPDK_BS_PAGE_SIZE;
    4939          10 :         }
    4940         356 :         if (ctx->super->md_page_size == 0) {
    4941           5 :                 ctx->super->md_page_size = SPDK_BS_PAGE_SIZE;
    4942           5 :         }
    4943             : 
    4944         356 :         ctx->bs->clean = 1;
    4945         356 :         ctx->bs->cluster_sz = ctx->super->cluster_size;
    4946         356 :         ctx->bs->total_clusters = ctx->super->size / ctx->super->cluster_size;
    4947         356 :         ctx->bs->io_unit_size = ctx->super->io_unit_size;
    4948         356 :         ctx->bs->md_page_size = ctx->super->md_page_size;
    4949         356 :         bs_init_per_cluster_fields(ctx->bs);
    4950         356 :         rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->bs->total_clusters);
    4951         356 :         if (rc < 0) {
    4952           0 :                 return -ENOMEM;
    4953             :         }
    4954         356 :         ctx->bs->md_start = ctx->super->md_start;
    4955         356 :         ctx->bs->md_len = ctx->super->md_len;
    4956         356 :         rc = spdk_bit_array_resize(&ctx->bs->open_blobids, ctx->bs->md_len);
    4957         356 :         if (rc < 0) {
    4958           0 :                 return -ENOMEM;
    4959             :         }
    4960             : 
    4961         712 :         ctx->bs->total_data_clusters = ctx->bs->total_clusters - spdk_divide_round_up(
    4962         356 :                                                ctx->bs->md_start + ctx->bs->md_len, ctx->bs->pages_per_cluster);
    4963         356 :         ctx->bs->super_blob = ctx->super->super_blob;
    4964         356 :         memcpy(&ctx->bs->bstype, &ctx->super->bstype, sizeof(ctx->super->bstype));
    4965             : 
    4966         356 :         return 0;
    4967         356 : }
    4968             : 
    4969             : static void
    4970         386 : bs_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    4971             : {
    4972         386 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    4973             :         int rc;
    4974             : 
    4975         386 :         rc = bs_super_validate(ctx->super, ctx->bs);
    4976         386 :         if (rc != 0) {
    4977          30 :                 bs_load_ctx_fail(ctx, rc);
    4978          30 :                 return;
    4979             :         }
    4980             : 
    4981         356 :         rc = bs_parse_super(ctx);
    4982         356 :         if (rc < 0) {
    4983           0 :                 bs_load_ctx_fail(ctx, rc);
    4984           0 :                 return;
    4985             :         }
    4986             : 
    4987         356 :         if (ctx->super->used_blobid_mask_len == 0 || ctx->super->clean == 0 || ctx->force_recover) {
    4988         134 :                 bs_recover(ctx);
    4989         134 :         } else {
    4990         222 :                 bs_load_read_used_pages(ctx);
    4991             :         }
    4992         386 : }
    4993             : 
    4994             : static inline int
    4995         395 : bs_opts_copy(struct spdk_bs_opts *src, struct spdk_bs_opts *dst)
    4996             : {
    4997             : 
    4998         395 :         if (!src->opts_size) {
    4999           0 :                 SPDK_ERRLOG("opts_size should not be zero value\n");
    5000           0 :                 return -1;
    5001             :         }
    5002             : 
    5003             : #define FIELD_OK(field) \
    5004             :         offsetof(struct spdk_bs_opts, field) + sizeof(src->field) <= src->opts_size
    5005             : 
    5006             : #define SET_FIELD(field) \
    5007             :         if (FIELD_OK(field)) { \
    5008             :                 dst->field = src->field; \
    5009             :         } \
    5010             : 
    5011         395 :         SET_FIELD(cluster_sz);
    5012         395 :         SET_FIELD(num_md_pages);
    5013         395 :         SET_FIELD(max_md_ops);
    5014         395 :         SET_FIELD(max_channel_ops);
    5015         395 :         SET_FIELD(clear_method);
    5016             : 
    5017         395 :         if (FIELD_OK(bstype)) {
    5018         395 :                 memcpy(&dst->bstype, &src->bstype, sizeof(dst->bstype));
    5019         395 :         }
    5020         395 :         SET_FIELD(md_page_size);
    5021         395 :         SET_FIELD(iter_cb_fn);
    5022         395 :         SET_FIELD(iter_cb_arg);
    5023         395 :         SET_FIELD(force_recover);
    5024         395 :         SET_FIELD(esnap_bs_dev_create);
    5025         395 :         SET_FIELD(esnap_ctx);
    5026             : 
    5027         395 :         dst->opts_size = src->opts_size;
    5028             : 
    5029             :         /* You should not remove this statement, but need to update the assert statement
    5030             :          * if you add a new field, and also add a corresponding SET_FIELD statement */
    5031             :         SPDK_STATIC_ASSERT(sizeof(struct spdk_bs_opts) == 88, "Incorrect size");
    5032             : 
    5033             : #undef FIELD_OK
    5034             : #undef SET_FIELD
    5035             : 
    5036         395 :         return 0;
    5037         395 : }
    5038             : 
    5039             : void
    5040         401 : spdk_bs_load(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
    5041             :              spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
    5042             : {
    5043             :         struct spdk_blob_store  *bs;
    5044             :         struct spdk_bs_cpl      cpl;
    5045             :         struct spdk_bs_load_ctx *ctx;
    5046         401 :         struct spdk_bs_opts     opts = {};
    5047             :         int err;
    5048             : 
    5049         401 :         SPDK_DEBUGLOG(blob, "Loading blobstore from dev %p\n", dev);
    5050             : 
    5051         401 :         if ((dev->phys_blocklen % dev->blocklen) != 0) {
    5052           5 :                 SPDK_DEBUGLOG(blob, "unsupported dev block length of %d\n", dev->blocklen);
    5053           5 :                 dev->destroy(dev);
    5054           5 :                 cb_fn(cb_arg, NULL, -EINVAL);
    5055           5 :                 return;
    5056             :         }
    5057             : 
    5058         396 :         spdk_bs_opts_init(&opts, sizeof(opts));
    5059         396 :         if (o) {
    5060         162 :                 if (bs_opts_copy(o, &opts)) {
    5061           0 :                         dev->destroy(dev);
    5062           0 :                         cb_fn(cb_arg, NULL, -EINVAL);
    5063           0 :                         return;
    5064             :                 }
    5065         162 :         }
    5066             : 
    5067         396 :         if (opts.max_md_ops == 0 || opts.max_channel_ops == 0) {
    5068          10 :                 dev->destroy(dev);
    5069          10 :                 cb_fn(cb_arg, NULL, -EINVAL);
    5070          10 :                 return;
    5071             :         }
    5072             : 
    5073         386 :         err = bs_alloc(dev, &opts, &bs, &ctx);
    5074         386 :         if (err) {
    5075           0 :                 dev->destroy(dev);
    5076           0 :                 cb_fn(cb_arg, NULL, err);
    5077           0 :                 return;
    5078             :         }
    5079             : 
    5080         386 :         cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
    5081         386 :         cpl.u.bs_handle.cb_fn = cb_fn;
    5082         386 :         cpl.u.bs_handle.cb_arg = cb_arg;
    5083         386 :         cpl.u.bs_handle.bs = bs;
    5084             : 
    5085         386 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5086         386 :         if (!ctx->seq) {
    5087           0 :                 spdk_free(ctx->super);
    5088           0 :                 free(ctx);
    5089           0 :                 bs_free(bs);
    5090           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5091           0 :                 return;
    5092             :         }
    5093             : 
    5094             :         /* Read the super block */
    5095         772 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    5096         386 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    5097         386 :                              bs_load_super_cpl, ctx);
    5098         401 : }
    5099             : 
    5100             : /* END spdk_bs_load */
    5101             : 
    5102             : /* START spdk_bs_dump */
    5103             : 
    5104             : static void
    5105           0 : bs_dump_finish(spdk_bs_sequence_t *seq, struct spdk_bs_load_ctx *ctx, int bserrno)
    5106             : {
    5107           0 :         spdk_free(ctx->super);
    5108             : 
    5109             :         /*
    5110             :          * We need to defer calling bs_call_cpl() until after
    5111             :          * dev destruction, so tuck these away for later use.
    5112             :          */
    5113           0 :         ctx->bs->unload_err = bserrno;
    5114           0 :         memcpy(&ctx->bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
    5115           0 :         seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
    5116             : 
    5117           0 :         bs_sequence_finish(seq, 0);
    5118           0 :         bs_free(ctx->bs);
    5119           0 :         free(ctx);
    5120           0 : }
    5121             : 
    5122             : static void
    5123           0 : bs_dump_print_xattr(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_descriptor *desc)
    5124             : {
    5125             :         struct spdk_blob_md_descriptor_xattr *desc_xattr;
    5126             :         uint32_t i;
    5127             :         const char *type;
    5128             : 
    5129           0 :         desc_xattr = (struct spdk_blob_md_descriptor_xattr *)desc;
    5130             : 
    5131           0 :         if (desc_xattr->length !=
    5132           0 :             sizeof(desc_xattr->name_length) + sizeof(desc_xattr->value_length) +
    5133           0 :             desc_xattr->name_length + desc_xattr->value_length) {
    5134           0 :         }
    5135             : 
    5136           0 :         memcpy(ctx->xattr_name, desc_xattr->name, desc_xattr->name_length);
    5137           0 :         ctx->xattr_name[desc_xattr->name_length] = '\0';
    5138           0 :         if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
    5139           0 :                 type = "XATTR";
    5140           0 :         } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
    5141           0 :                 type = "XATTR_INTERNAL";
    5142           0 :         } else {
    5143           0 :                 assert(false);
    5144             :                 type = "XATTR_?";
    5145             :         }
    5146           0 :         fprintf(ctx->fp, "%s: name = \"%s\"\n", type, ctx->xattr_name);
    5147           0 :         fprintf(ctx->fp, "       value = \"");
    5148           0 :         ctx->print_xattr_fn(ctx->fp, ctx->super->bstype.bstype, ctx->xattr_name,
    5149           0 :                             (void *)((uintptr_t)desc_xattr->name + desc_xattr->name_length),
    5150           0 :                             desc_xattr->value_length);
    5151           0 :         fprintf(ctx->fp, "\"\n");
    5152           0 :         for (i = 0; i < desc_xattr->value_length; i++) {
    5153           0 :                 if (i % 16 == 0) {
    5154           0 :                         fprintf(ctx->fp, "               ");
    5155           0 :                 }
    5156           0 :                 fprintf(ctx->fp, "%02" PRIx8 " ", *((uint8_t *)desc_xattr->name + desc_xattr->name_length + i));
    5157           0 :                 if ((i + 1) % 16 == 0) {
    5158           0 :                         fprintf(ctx->fp, "\n");
    5159           0 :                 }
    5160           0 :         }
    5161           0 :         if (i % 16 != 0) {
    5162           0 :                 fprintf(ctx->fp, "\n");
    5163           0 :         }
    5164           0 : }
    5165             : 
    5166             : struct type_flag_desc {
    5167             :         uint64_t mask;
    5168             :         uint64_t val;
    5169             :         const char *name;
    5170             : };
    5171             : 
    5172             : static void
    5173           0 : bs_dump_print_type_bits(struct spdk_bs_load_ctx *ctx, uint64_t flags,
    5174             :                         struct type_flag_desc *desc, size_t numflags)
    5175             : {
    5176           0 :         uint64_t covered = 0;
    5177             :         size_t i;
    5178             : 
    5179           0 :         for (i = 0; i < numflags; i++) {
    5180           0 :                 if ((desc[i].mask & flags) != desc[i].val) {
    5181           0 :                         continue;
    5182             :                 }
    5183           0 :                 fprintf(ctx->fp, "\t\t 0x%016" PRIx64 " %s", desc[i].val, desc[i].name);
    5184           0 :                 if (desc[i].mask != desc[i].val) {
    5185           0 :                         fprintf(ctx->fp, " (mask 0x%" PRIx64 " value 0x%" PRIx64 ")",
    5186           0 :                                 desc[i].mask, desc[i].val);
    5187           0 :                 }
    5188           0 :                 fprintf(ctx->fp, "\n");
    5189           0 :                 covered |= desc[i].mask;
    5190           0 :         }
    5191           0 :         if ((flags & ~covered) != 0) {
    5192           0 :                 fprintf(ctx->fp, "\t\t 0x%016" PRIx64 " Unknown\n", flags & ~covered);
    5193           0 :         }
    5194           0 : }
    5195             : 
    5196             : static void
    5197           0 : bs_dump_print_type_flags(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_descriptor *desc)
    5198             : {
    5199             :         struct spdk_blob_md_descriptor_flags *type_desc;
    5200             : #define ADD_FLAG(f) { f, f, #f }
    5201             : #define ADD_MASK_VAL(m, v) { m, v, #v }
    5202             :         static struct type_flag_desc invalid[] = {
    5203             :                 ADD_FLAG(SPDK_BLOB_THIN_PROV),
    5204             :                 ADD_FLAG(SPDK_BLOB_INTERNAL_XATTR),
    5205             :                 ADD_FLAG(SPDK_BLOB_EXTENT_TABLE),
    5206             :         };
    5207             :         static struct type_flag_desc data_ro[] = {
    5208             :                 ADD_FLAG(SPDK_BLOB_READ_ONLY),
    5209             :         };
    5210             :         static struct type_flag_desc md_ro[] = {
    5211             :                 ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_DEFAULT),
    5212             :                 ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_NONE),
    5213             :                 ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_UNMAP),
    5214             :                 ADD_MASK_VAL(SPDK_BLOB_MD_RO_FLAGS_MASK, BLOB_CLEAR_WITH_WRITE_ZEROES),
    5215             :         };
    5216             : #undef ADD_FLAG
    5217             : #undef ADD_MASK_VAL
    5218             : 
    5219           0 :         type_desc = (struct spdk_blob_md_descriptor_flags *)desc;
    5220           0 :         fprintf(ctx->fp, "Flags:\n");
    5221           0 :         fprintf(ctx->fp, "\tinvalid: 0x%016" PRIx64 "\n", type_desc->invalid_flags);
    5222           0 :         bs_dump_print_type_bits(ctx, type_desc->invalid_flags, invalid,
    5223             :                                 SPDK_COUNTOF(invalid));
    5224           0 :         fprintf(ctx->fp, "\tdata_ro: 0x%016" PRIx64 "\n", type_desc->data_ro_flags);
    5225           0 :         bs_dump_print_type_bits(ctx, type_desc->data_ro_flags, data_ro,
    5226             :                                 SPDK_COUNTOF(data_ro));
    5227           0 :         fprintf(ctx->fp, "\t  md_ro: 0x%016" PRIx64 "\n", type_desc->md_ro_flags);
    5228           0 :         bs_dump_print_type_bits(ctx, type_desc->md_ro_flags, md_ro,
    5229             :                                 SPDK_COUNTOF(md_ro));
    5230           0 : }
    5231             : 
    5232             : static void
    5233           0 : bs_dump_print_extent_table(struct spdk_bs_load_ctx *ctx, struct spdk_blob_md_descriptor *desc)
    5234             : {
    5235             :         struct spdk_blob_md_descriptor_extent_table *et_desc;
    5236             :         uint64_t num_extent_pages;
    5237             :         uint32_t et_idx;
    5238             : 
    5239           0 :         et_desc = (struct spdk_blob_md_descriptor_extent_table *)desc;
    5240           0 :         num_extent_pages = (et_desc->length - sizeof(et_desc->num_clusters)) /
    5241             :                            sizeof(et_desc->extent_page[0]);
    5242             : 
    5243           0 :         fprintf(ctx->fp, "Extent table:\n");
    5244           0 :         for (et_idx = 0; et_idx < num_extent_pages; et_idx++) {
    5245           0 :                 if (et_desc->extent_page[et_idx].page_idx == 0) {
    5246             :                         /* Zeroes represent unallocated extent pages. */
    5247           0 :                         continue;
    5248             :                 }
    5249           0 :                 fprintf(ctx->fp, "\tExtent page: %5" PRIu32 " length %3" PRIu32
    5250           0 :                         " at LBA %" PRIu64 "\n", et_desc->extent_page[et_idx].page_idx,
    5251           0 :                         et_desc->extent_page[et_idx].num_pages,
    5252           0 :                         bs_md_page_to_lba(ctx->bs, et_desc->extent_page[et_idx].page_idx));
    5253           0 :         }
    5254           0 : }
    5255             : 
    5256             : static void
    5257           0 : bs_dump_print_md_page(struct spdk_bs_load_ctx *ctx)
    5258             : {
    5259           0 :         uint32_t page_idx = ctx->cur_page;
    5260           0 :         struct spdk_blob_md_page *page = ctx->page;
    5261             :         struct spdk_blob_md_descriptor *desc;
    5262           0 :         size_t cur_desc = 0;
    5263             :         uint32_t crc;
    5264             : 
    5265           0 :         fprintf(ctx->fp, "=========\n");
    5266           0 :         fprintf(ctx->fp, "Metadata Page Index: %" PRIu32 " (0x%" PRIx32 ")\n", page_idx, page_idx);
    5267           0 :         fprintf(ctx->fp, "Start LBA: %" PRIu64 "\n", bs_md_page_to_lba(ctx->bs, page_idx));
    5268           0 :         fprintf(ctx->fp, "Blob ID: 0x%" PRIx64 "\n", page->id);
    5269           0 :         fprintf(ctx->fp, "Sequence: %" PRIu32 "\n", page->sequence_num);
    5270           0 :         if (page->next == SPDK_INVALID_MD_PAGE) {
    5271           0 :                 fprintf(ctx->fp, "Next: None\n");
    5272           0 :         } else {
    5273           0 :                 fprintf(ctx->fp, "Next: %" PRIu32 "\n", page->next);
    5274             :         }
    5275           0 :         fprintf(ctx->fp, "In used bit array%s:", ctx->super->clean ? "" : " (not clean: dubious)");
    5276           0 :         if (spdk_bit_array_get(ctx->bs->used_md_pages, page_idx)) {
    5277           0 :                 fprintf(ctx->fp, " md");
    5278           0 :         }
    5279           0 :         if (spdk_bit_array_get(ctx->bs->used_blobids, page_idx)) {
    5280           0 :                 fprintf(ctx->fp, " blob");
    5281           0 :         }
    5282           0 :         fprintf(ctx->fp, "\n");
    5283             : 
    5284           0 :         crc = blob_md_page_calc_crc(page);
    5285           0 :         fprintf(ctx->fp, "CRC: 0x%" PRIx32 " (%s)\n", page->crc, crc == page->crc ? "OK" : "Mismatch");
    5286             : 
    5287           0 :         desc = (struct spdk_blob_md_descriptor *)page->descriptors;
    5288           0 :         while (cur_desc < sizeof(page->descriptors)) {
    5289           0 :                 if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_PADDING) {
    5290           0 :                         if (desc->length == 0) {
    5291             :                                 /* If padding and length are 0, this terminates the page */
    5292           0 :                                 break;
    5293             :                         }
    5294           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_RLE) {
    5295             :                         struct spdk_blob_md_descriptor_extent_rle       *desc_extent_rle;
    5296             :                         unsigned int                            i;
    5297             : 
    5298           0 :                         desc_extent_rle = (struct spdk_blob_md_descriptor_extent_rle *)desc;
    5299             : 
    5300           0 :                         for (i = 0; i < desc_extent_rle->length / sizeof(desc_extent_rle->extents[0]); i++) {
    5301           0 :                                 if (desc_extent_rle->extents[i].cluster_idx != 0) {
    5302           0 :                                         fprintf(ctx->fp, "Allocated Extent - Start: %" PRIu32,
    5303           0 :                                                 desc_extent_rle->extents[i].cluster_idx);
    5304           0 :                                 } else {
    5305           0 :                                         fprintf(ctx->fp, "Unallocated Extent - ");
    5306             :                                 }
    5307           0 :                                 fprintf(ctx->fp, " Length: %" PRIu32, desc_extent_rle->extents[i].length);
    5308           0 :                                 fprintf(ctx->fp, "\n");
    5309           0 :                         }
    5310           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_PAGE) {
    5311             :                         struct spdk_blob_md_descriptor_extent_page      *desc_extent;
    5312             :                         unsigned int                                    i;
    5313             : 
    5314           0 :                         desc_extent = (struct spdk_blob_md_descriptor_extent_page *)desc;
    5315             : 
    5316           0 :                         for (i = 0; i < desc_extent->length / sizeof(desc_extent->cluster_idx[0]); i++) {
    5317           0 :                                 if (desc_extent->cluster_idx[i] != 0) {
    5318           0 :                                         fprintf(ctx->fp, "Allocated Extent - Start: %" PRIu32,
    5319           0 :                                                 desc_extent->cluster_idx[i]);
    5320           0 :                                 } else {
    5321           0 :                                         fprintf(ctx->fp, "Unallocated Extent");
    5322             :                                 }
    5323           0 :                                 fprintf(ctx->fp, "\n");
    5324           0 :                         }
    5325           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR) {
    5326           0 :                         bs_dump_print_xattr(ctx, desc);
    5327           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_XATTR_INTERNAL) {
    5328           0 :                         bs_dump_print_xattr(ctx, desc);
    5329           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_FLAGS) {
    5330           0 :                         bs_dump_print_type_flags(ctx, desc);
    5331           0 :                 } else if (desc->type == SPDK_MD_DESCRIPTOR_TYPE_EXTENT_TABLE) {
    5332           0 :                         bs_dump_print_extent_table(ctx, desc);
    5333           0 :                 } else {
    5334             :                         /* Error */
    5335           0 :                         fprintf(ctx->fp, "Unknown descriptor type %" PRIu8 "\n", desc->type);
    5336             :                 }
    5337             :                 /* Advance to the next descriptor */
    5338           0 :                 cur_desc += sizeof(*desc) + desc->length;
    5339           0 :                 if (cur_desc + sizeof(*desc) > sizeof(page->descriptors)) {
    5340           0 :                         break;
    5341             :                 }
    5342           0 :                 desc = (struct spdk_blob_md_descriptor *)((uintptr_t)page->descriptors + cur_desc);
    5343             :         }
    5344           0 : }
    5345             : 
    5346             : static void
    5347           0 : bs_dump_read_md_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5348             : {
    5349           0 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5350             : 
    5351           0 :         if (bserrno != 0) {
    5352           0 :                 bs_dump_finish(seq, ctx, bserrno);
    5353           0 :                 return;
    5354             :         }
    5355             : 
    5356           0 :         if (ctx->page->id != 0) {
    5357           0 :                 bs_dump_print_md_page(ctx);
    5358           0 :         }
    5359             : 
    5360           0 :         ctx->cur_page++;
    5361             : 
    5362           0 :         if (ctx->cur_page < ctx->super->md_len) {
    5363           0 :                 bs_dump_read_md_page(seq, ctx);
    5364           0 :         } else {
    5365           0 :                 spdk_free(ctx->page);
    5366           0 :                 bs_dump_finish(seq, ctx, 0);
    5367             :         }
    5368           0 : }
    5369             : 
    5370             : static void
    5371           0 : bs_dump_read_md_page(spdk_bs_sequence_t *seq, void *cb_arg)
    5372             : {
    5373           0 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5374             :         uint64_t lba;
    5375             : 
    5376           0 :         assert(ctx->cur_page < ctx->super->md_len);
    5377           0 :         lba = bs_page_to_lba(ctx->bs, ctx->super->md_start + ctx->cur_page);
    5378           0 :         bs_sequence_read_dev(seq, ctx->page, lba,
    5379           0 :                              bs_byte_to_lba(ctx->bs, ctx->super->md_page_size),
    5380           0 :                              bs_dump_read_md_page_cpl, ctx);
    5381           0 : }
    5382             : 
    5383             : static void
    5384           0 : bs_dump_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5385             : {
    5386           0 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5387             :         int rc;
    5388             : 
    5389           0 :         fprintf(ctx->fp, "Signature: \"%.8s\" ", ctx->super->signature);
    5390           0 :         if (memcmp(ctx->super->signature, SPDK_BS_SUPER_BLOCK_SIG,
    5391           0 :                    sizeof(ctx->super->signature)) != 0) {
    5392           0 :                 fprintf(ctx->fp, "(Mismatch)\n");
    5393           0 :                 bs_dump_finish(seq, ctx, bserrno);
    5394           0 :                 return;
    5395             :         } else {
    5396           0 :                 fprintf(ctx->fp, "(OK)\n");
    5397             :         }
    5398           0 :         fprintf(ctx->fp, "Version: %" PRIu32 "\n", ctx->super->version);
    5399           0 :         fprintf(ctx->fp, "CRC: 0x%x (%s)\n", ctx->super->crc,
    5400           0 :                 (ctx->super->crc == blob_md_page_calc_crc(ctx->super)) ? "OK" : "Mismatch");
    5401           0 :         fprintf(ctx->fp, "Blobstore Type: %.*s\n", SPDK_BLOBSTORE_TYPE_LENGTH, ctx->super->bstype.bstype);
    5402           0 :         fprintf(ctx->fp, "Cluster Size: %" PRIu32 "\n", ctx->super->cluster_size);
    5403           0 :         fprintf(ctx->fp, "Super Blob ID: ");
    5404           0 :         if (ctx->super->super_blob == SPDK_BLOBID_INVALID) {
    5405           0 :                 fprintf(ctx->fp, "(None)\n");
    5406           0 :         } else {
    5407           0 :                 fprintf(ctx->fp, "0x%" PRIx64 "\n", ctx->super->super_blob);
    5408             :         }
    5409           0 :         fprintf(ctx->fp, "Clean: %" PRIu32 "\n", ctx->super->clean);
    5410           0 :         fprintf(ctx->fp, "Used Metadata Page Mask Start: %" PRIu32 "\n", ctx->super->used_page_mask_start);
    5411           0 :         fprintf(ctx->fp, "Used Metadata Page Mask Length: %" PRIu32 "\n", ctx->super->used_page_mask_len);
    5412           0 :         fprintf(ctx->fp, "Used Cluster Mask Start: %" PRIu32 "\n", ctx->super->used_cluster_mask_start);
    5413           0 :         fprintf(ctx->fp, "Used Cluster Mask Length: %" PRIu32 "\n", ctx->super->used_cluster_mask_len);
    5414           0 :         fprintf(ctx->fp, "Used Blob ID Mask Start: %" PRIu32 "\n", ctx->super->used_blobid_mask_start);
    5415           0 :         fprintf(ctx->fp, "Used Blob ID Mask Length: %" PRIu32 "\n", ctx->super->used_blobid_mask_len);
    5416           0 :         fprintf(ctx->fp, "Metadata Start: %" PRIu32 "\n", ctx->super->md_start);
    5417           0 :         fprintf(ctx->fp, "Metadata Length: %" PRIu32 "\n", ctx->super->md_len);
    5418             : 
    5419           0 :         ctx->cur_page = 0;
    5420           0 :         ctx->page = spdk_zmalloc(ctx->super->md_page_size, 0,
    5421             :                                  NULL, SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    5422           0 :         if (!ctx->page) {
    5423           0 :                 bs_dump_finish(seq, ctx, -ENOMEM);
    5424           0 :                 return;
    5425             :         }
    5426             : 
    5427           0 :         rc = bs_parse_super(ctx);
    5428           0 :         if (rc < 0) {
    5429           0 :                 bs_load_ctx_fail(ctx, rc);
    5430           0 :                 return;
    5431             :         }
    5432             : 
    5433           0 :         bs_load_read_used_pages(ctx);
    5434           0 : }
    5435             : 
    5436             : void
    5437           0 : spdk_bs_dump(struct spdk_bs_dev *dev, FILE *fp, spdk_bs_dump_print_xattr print_xattr_fn,
    5438             :              spdk_bs_op_complete cb_fn, void *cb_arg)
    5439             : {
    5440             :         struct spdk_blob_store  *bs;
    5441             :         struct spdk_bs_cpl      cpl;
    5442             :         struct spdk_bs_load_ctx *ctx;
    5443           0 :         struct spdk_bs_opts     opts = {};
    5444             :         int err;
    5445             : 
    5446           0 :         SPDK_DEBUGLOG(blob, "Dumping blobstore from dev %p\n", dev);
    5447             : 
    5448           0 :         spdk_bs_opts_init(&opts, sizeof(opts));
    5449             : 
    5450           0 :         err = bs_alloc(dev, &opts, &bs, &ctx);
    5451           0 :         if (err) {
    5452           0 :                 dev->destroy(dev);
    5453           0 :                 cb_fn(cb_arg, err);
    5454           0 :                 return;
    5455             :         }
    5456             : 
    5457           0 :         ctx->dumping = true;
    5458           0 :         ctx->fp = fp;
    5459           0 :         ctx->print_xattr_fn = print_xattr_fn;
    5460             : 
    5461           0 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    5462           0 :         cpl.u.bs_basic.cb_fn = cb_fn;
    5463           0 :         cpl.u.bs_basic.cb_arg = cb_arg;
    5464             : 
    5465           0 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5466           0 :         if (!ctx->seq) {
    5467           0 :                 spdk_free(ctx->super);
    5468           0 :                 free(ctx);
    5469           0 :                 bs_free(bs);
    5470           0 :                 cb_fn(cb_arg, -ENOMEM);
    5471           0 :                 return;
    5472             :         }
    5473             : 
    5474             :         /* Read the super block */
    5475           0 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    5476           0 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    5477           0 :                              bs_dump_super_cpl, ctx);
    5478           0 : }
    5479             : 
    5480             : /* END spdk_bs_dump */
    5481             : 
    5482             : /* START spdk_bs_init */
    5483             : 
    5484             : static void
    5485         592 : bs_init_persist_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5486             : {
    5487         592 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5488             : 
    5489         592 :         ctx->bs->used_clusters = spdk_bit_pool_create_from_array(ctx->used_clusters);
    5490         592 :         spdk_free(ctx->super);
    5491         592 :         free(ctx);
    5492             : 
    5493         592 :         bs_sequence_finish(seq, bserrno);
    5494         592 : }
    5495             : 
    5496             : static void
    5497         592 : bs_init_trim_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5498             : {
    5499         592 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5500             : 
    5501             :         /* Write super block */
    5502        1184 :         bs_sequence_write_dev(seq, ctx->super, bs_page_to_lba(ctx->bs, 0),
    5503         592 :                               bs_byte_to_lba(ctx->bs, sizeof(*ctx->super)),
    5504         592 :                               bs_init_persist_super_cpl, ctx);
    5505         592 : }
    5506             : 
    5507             : void
    5508         612 : spdk_bs_init(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
    5509             :              spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
    5510             : {
    5511             :         struct spdk_bs_load_ctx *ctx;
    5512             :         struct spdk_blob_store  *bs;
    5513             :         struct spdk_bs_cpl      cpl;
    5514             :         spdk_bs_sequence_t      *seq;
    5515             :         spdk_bs_batch_t         *batch;
    5516             :         uint64_t                num_md_lba;
    5517             :         uint64_t                num_md_pages;
    5518             :         uint64_t                num_md_clusters;
    5519             :         uint64_t                max_used_cluster_mask_len;
    5520             :         uint32_t                i;
    5521         612 :         struct spdk_bs_opts     opts = {};
    5522             :         int                     rc;
    5523             :         uint64_t                lba, lba_count;
    5524             : 
    5525         612 :         SPDK_DEBUGLOG(blob, "Initializing blobstore on dev %p\n", dev);
    5526         612 :         if ((dev->phys_blocklen % dev->blocklen) != 0) {
    5527           5 :                 SPDK_ERRLOG("unsupported dev block length of %d\n",
    5528             :                             dev->blocklen);
    5529           5 :                 dev->destroy(dev);
    5530           5 :                 cb_fn(cb_arg, NULL, -EINVAL);
    5531           5 :                 return;
    5532             :         }
    5533             : 
    5534         607 :         spdk_bs_opts_init(&opts, sizeof(opts));
    5535         607 :         if (o) {
    5536         228 :                 if (bs_opts_copy(o, &opts)) {
    5537           0 :                         dev->destroy(dev);
    5538           0 :                         cb_fn(cb_arg, NULL, -EINVAL);
    5539           0 :                         return;
    5540             :                 }
    5541         228 :         }
    5542             : 
    5543         607 :         if (bs_opts_verify(&opts) != 0) {
    5544          10 :                 dev->destroy(dev);
    5545          10 :                 cb_fn(cb_arg, NULL, -EINVAL);
    5546          10 :                 return;
    5547             :         }
    5548             : 
    5549         597 :         rc = bs_alloc(dev, &opts, &bs, &ctx);
    5550         597 :         if (rc) {
    5551           0 :                 dev->destroy(dev);
    5552           0 :                 cb_fn(cb_arg, NULL, rc);
    5553           0 :                 return;
    5554             :         }
    5555             : 
    5556         597 :         if (opts.num_md_pages == SPDK_BLOB_OPTS_NUM_MD_PAGES) {
    5557             :                 /* By default, allocate 1 page per cluster.
    5558             :                  * Technically, this over-allocates metadata
    5559             :                  * because more metadata will reduce the number
    5560             :                  * of usable clusters. This can be addressed with
    5561             :                  * more complex math in the future.
    5562             :                  */
    5563         587 :                 bs->md_len = bs->total_clusters;
    5564         587 :         } else {
    5565          10 :                 bs->md_len = opts.num_md_pages;
    5566             :         }
    5567         597 :         rc = spdk_bit_array_resize(&bs->used_md_pages, bs->md_len);
    5568         597 :         if (rc < 0) {
    5569           0 :                 spdk_free(ctx->super);
    5570           0 :                 free(ctx);
    5571           0 :                 bs_free(bs);
    5572           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5573           0 :                 return;
    5574             :         }
    5575             : 
    5576         597 :         rc = spdk_bit_array_resize(&bs->used_blobids, bs->md_len);
    5577         597 :         if (rc < 0) {
    5578           0 :                 spdk_free(ctx->super);
    5579           0 :                 free(ctx);
    5580           0 :                 bs_free(bs);
    5581           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5582           0 :                 return;
    5583             :         }
    5584             : 
    5585         597 :         rc = spdk_bit_array_resize(&bs->open_blobids, bs->md_len);
    5586         597 :         if (rc < 0) {
    5587           0 :                 spdk_free(ctx->super);
    5588           0 :                 free(ctx);
    5589           0 :                 bs_free(bs);
    5590           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5591           0 :                 return;
    5592             :         }
    5593             : 
    5594         597 :         memcpy(ctx->super->signature, SPDK_BS_SUPER_BLOCK_SIG,
    5595             :                sizeof(ctx->super->signature));
    5596         597 :         ctx->super->version = SPDK_BS_VERSION;
    5597         597 :         ctx->super->length = sizeof(*ctx->super);
    5598         597 :         ctx->super->super_blob = bs->super_blob;
    5599         597 :         ctx->super->clean = 0;
    5600         597 :         ctx->super->cluster_size = bs->cluster_sz;
    5601         597 :         ctx->super->io_unit_size = bs->io_unit_size;
    5602         597 :         ctx->super->md_page_size = bs->md_page_size;
    5603         597 :         memcpy(&ctx->super->bstype, &bs->bstype, sizeof(bs->bstype));
    5604             : 
    5605             :         /* Calculate how many pages the metadata consumes at the front
    5606             :          * of the disk.
    5607             :          */
    5608             : 
    5609             :         /* The super block uses 1 page */
    5610         597 :         num_md_pages = 1;
    5611             : 
    5612             :         /* The used_md_pages mask requires 1 bit per metadata page, rounded
    5613             :          * up to the nearest page, plus a header.
    5614             :          */
    5615         597 :         ctx->super->used_page_mask_start = num_md_pages;
    5616        1194 :         ctx->super->used_page_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    5617         597 :                                          spdk_divide_round_up(bs->md_len, 8),
    5618         597 :                                          ctx->super->md_page_size);
    5619         597 :         num_md_pages += ctx->super->used_page_mask_len;
    5620             : 
    5621             :         /* The used_clusters mask requires 1 bit per cluster, rounded
    5622             :          * up to the nearest page, plus a header.
    5623             :          */
    5624         597 :         ctx->super->used_cluster_mask_start = num_md_pages;
    5625        1194 :         ctx->super->used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    5626         597 :                                             spdk_divide_round_up(bs->total_clusters, 8),
    5627         597 :                                             ctx->super->md_page_size);
    5628             :         /* The blobstore might be extended, then the used_cluster bitmap will need more space.
    5629             :          * Here we calculate the max clusters we can support according to the
    5630             :          * num_md_pages (bs->md_len).
    5631             :          */
    5632        1194 :         max_used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    5633         597 :                                     spdk_divide_round_up(bs->md_len, 8),
    5634         597 :                                     ctx->super->md_page_size);
    5635         597 :         max_used_cluster_mask_len = spdk_max(max_used_cluster_mask_len,
    5636             :                                              ctx->super->used_cluster_mask_len);
    5637         597 :         num_md_pages += max_used_cluster_mask_len;
    5638             : 
    5639             :         /* The used_blobids mask requires 1 bit per metadata page, rounded
    5640             :          * up to the nearest page, plus a header.
    5641             :          */
    5642         597 :         ctx->super->used_blobid_mask_start = num_md_pages;
    5643        1194 :         ctx->super->used_blobid_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    5644         597 :                                            spdk_divide_round_up(bs->md_len, 8),
    5645         597 :                                            ctx->super->md_page_size);
    5646         597 :         num_md_pages += ctx->super->used_blobid_mask_len;
    5647             : 
    5648             :         /* The metadata region size was chosen above */
    5649         597 :         ctx->super->md_start = bs->md_start = num_md_pages;
    5650         597 :         ctx->super->md_len = bs->md_len;
    5651         597 :         num_md_pages += bs->md_len;
    5652             : 
    5653         597 :         num_md_lba = bs_page_to_lba(bs, num_md_pages);
    5654             : 
    5655         597 :         ctx->super->size = dev->blockcnt * dev->blocklen;
    5656             : 
    5657         597 :         ctx->super->crc = blob_md_page_calc_crc(ctx->super);
    5658             : 
    5659         597 :         num_md_clusters = spdk_divide_round_up(num_md_pages, bs->pages_per_cluster);
    5660         597 :         if (num_md_clusters > bs->total_clusters) {
    5661           5 :                 SPDK_ERRLOG("Blobstore metadata cannot use more clusters than is available, "
    5662             :                             "please decrease number of pages reserved for metadata "
    5663             :                             "or increase cluster size.\n");
    5664           5 :                 spdk_free(ctx->super);
    5665           5 :                 spdk_bit_array_free(&ctx->used_clusters);
    5666           5 :                 free(ctx);
    5667           5 :                 bs_free(bs);
    5668           5 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5669           5 :                 return;
    5670             :         }
    5671             :         /* Claim all of the clusters used by the metadata */
    5672       79368 :         for (i = 0; i < num_md_clusters; i++) {
    5673       78776 :                 spdk_bit_array_set(ctx->used_clusters, i);
    5674       78776 :         }
    5675             : 
    5676         592 :         bs->num_free_clusters -= num_md_clusters;
    5677         592 :         bs->total_data_clusters = bs->num_free_clusters;
    5678             : 
    5679         592 :         cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
    5680         592 :         cpl.u.bs_handle.cb_fn = cb_fn;
    5681         592 :         cpl.u.bs_handle.cb_arg = cb_arg;
    5682         592 :         cpl.u.bs_handle.bs = bs;
    5683             : 
    5684         592 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5685         592 :         if (!seq) {
    5686           0 :                 spdk_free(ctx->super);
    5687           0 :                 free(ctx);
    5688           0 :                 bs_free(bs);
    5689           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    5690           0 :                 return;
    5691             :         }
    5692             : 
    5693         592 :         batch = bs_sequence_to_batch(seq, bs_init_trim_cpl, ctx);
    5694             : 
    5695             :         /* Clear metadata space */
    5696         592 :         bs_batch_write_zeroes_dev(batch, 0, num_md_lba);
    5697             : 
    5698         592 :         lba = num_md_lba;
    5699         592 :         lba_count = ctx->bs->dev->blockcnt - lba;
    5700         592 :         switch (opts.clear_method) {
    5701             :         case BS_CLEAR_WITH_UNMAP:
    5702             :                 /* Trim data clusters */
    5703         572 :                 bs_batch_unmap_dev(batch, lba, lba_count);
    5704         572 :                 break;
    5705             :         case BS_CLEAR_WITH_WRITE_ZEROES:
    5706             :                 /* Write_zeroes to data clusters */
    5707           0 :                 bs_batch_write_zeroes_dev(batch, lba, lba_count);
    5708           0 :                 break;
    5709          20 :         case BS_CLEAR_WITH_NONE:
    5710             :         default:
    5711          20 :                 break;
    5712             :         }
    5713             : 
    5714         592 :         bs_batch_close(batch);
    5715         612 : }
    5716             : 
    5717             : /* END spdk_bs_init */
    5718             : 
    5719             : /* START spdk_bs_destroy */
    5720             : 
    5721             : static void
    5722           5 : bs_destroy_trim_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5723             : {
    5724           5 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5725           5 :         struct spdk_blob_store *bs = ctx->bs;
    5726             : 
    5727             :         /*
    5728             :          * We need to defer calling bs_call_cpl() until after
    5729             :          * dev destruction, so tuck these away for later use.
    5730             :          */
    5731           5 :         bs->unload_err = bserrno;
    5732           5 :         memcpy(&bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
    5733           5 :         seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
    5734             : 
    5735           5 :         bs_sequence_finish(seq, bserrno);
    5736             : 
    5737           5 :         bs_free(bs);
    5738           5 :         free(ctx);
    5739           5 : }
    5740             : 
    5741             : void
    5742           5 : spdk_bs_destroy(struct spdk_blob_store *bs, spdk_bs_op_complete cb_fn,
    5743             :                 void *cb_arg)
    5744             : {
    5745             :         struct spdk_bs_cpl      cpl;
    5746             :         spdk_bs_sequence_t      *seq;
    5747             :         struct spdk_bs_load_ctx *ctx;
    5748             : 
    5749           5 :         SPDK_DEBUGLOG(blob, "Destroying blobstore\n");
    5750             : 
    5751           5 :         if (!RB_EMPTY(&bs->open_blobs)) {
    5752           0 :                 SPDK_ERRLOG("Blobstore still has open blobs\n");
    5753           0 :                 cb_fn(cb_arg, -EBUSY);
    5754           0 :                 return;
    5755             :         }
    5756             : 
    5757           5 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    5758           5 :         cpl.u.bs_basic.cb_fn = cb_fn;
    5759           5 :         cpl.u.bs_basic.cb_arg = cb_arg;
    5760             : 
    5761           5 :         ctx = calloc(1, sizeof(*ctx));
    5762           5 :         if (!ctx) {
    5763           0 :                 cb_fn(cb_arg, -ENOMEM);
    5764           0 :                 return;
    5765             :         }
    5766             : 
    5767           5 :         ctx->bs = bs;
    5768             : 
    5769           5 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5770           5 :         if (!seq) {
    5771           0 :                 free(ctx);
    5772           0 :                 cb_fn(cb_arg, -ENOMEM);
    5773           0 :                 return;
    5774             :         }
    5775             : 
    5776             :         /* Write zeroes to the super block */
    5777          10 :         bs_sequence_write_zeroes_dev(seq,
    5778           5 :                                      bs_page_to_lba(bs, 0),
    5779           5 :                                      bs_byte_to_lba(bs, sizeof(struct spdk_bs_super_block)),
    5780           5 :                                      bs_destroy_trim_cpl, ctx);
    5781           5 : }
    5782             : 
    5783             : /* END spdk_bs_destroy */
    5784             : 
    5785             : /* START spdk_bs_unload */
    5786             : 
    5787             : static void
    5788         819 : bs_unload_finish(struct spdk_bs_load_ctx *ctx, int bserrno)
    5789             : {
    5790         819 :         spdk_bs_sequence_t *seq = ctx->seq;
    5791             : 
    5792         819 :         spdk_free(ctx->super);
    5793             : 
    5794             :         /*
    5795             :          * We need to defer calling bs_call_cpl() until after
    5796             :          * dev destruction, so tuck these away for later use.
    5797             :          */
    5798         819 :         ctx->bs->unload_err = bserrno;
    5799         819 :         memcpy(&ctx->bs->unload_cpl, &seq->cpl, sizeof(struct spdk_bs_cpl));
    5800         819 :         seq->cpl.type = SPDK_BS_CPL_TYPE_NONE;
    5801             : 
    5802         819 :         bs_sequence_finish(seq, bserrno);
    5803             : 
    5804         819 :         bs_free(ctx->bs);
    5805         819 :         free(ctx);
    5806         819 : }
    5807             : 
    5808             : static void
    5809         819 : bs_unload_write_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5810             : {
    5811         819 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5812             : 
    5813         819 :         bs_unload_finish(ctx, bserrno);
    5814         819 : }
    5815             : 
    5816             : static void
    5817         819 : bs_unload_write_used_clusters_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5818             : {
    5819         819 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5820             : 
    5821         819 :         spdk_free(ctx->mask);
    5822             : 
    5823         819 :         if (bserrno != 0) {
    5824           0 :                 bs_unload_finish(ctx, bserrno);
    5825           0 :                 return;
    5826             :         }
    5827             : 
    5828         819 :         ctx->super->clean = 1;
    5829             : 
    5830         819 :         bs_write_super(seq, ctx->bs, ctx->super, bs_unload_write_super_cpl, ctx);
    5831         819 : }
    5832             : 
    5833             : static void
    5834         819 : bs_unload_write_used_blobids_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5835             : {
    5836         819 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5837             : 
    5838         819 :         spdk_free(ctx->mask);
    5839         819 :         ctx->mask = NULL;
    5840             : 
    5841         819 :         if (bserrno != 0) {
    5842           0 :                 bs_unload_finish(ctx, bserrno);
    5843           0 :                 return;
    5844             :         }
    5845             : 
    5846         819 :         bs_write_used_clusters(seq, ctx, bs_unload_write_used_clusters_cpl);
    5847         819 : }
    5848             : 
    5849             : static void
    5850         819 : bs_unload_write_used_pages_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5851             : {
    5852         819 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5853             : 
    5854         819 :         spdk_free(ctx->mask);
    5855         819 :         ctx->mask = NULL;
    5856             : 
    5857         819 :         if (bserrno != 0) {
    5858           0 :                 bs_unload_finish(ctx, bserrno);
    5859           0 :                 return;
    5860             :         }
    5861             : 
    5862         819 :         bs_write_used_blobids(seq, ctx, bs_unload_write_used_blobids_cpl);
    5863         819 : }
    5864             : 
    5865             : static void
    5866         819 : bs_unload_read_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5867             : {
    5868         819 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    5869             :         int rc;
    5870             : 
    5871         819 :         if (bserrno != 0) {
    5872           0 :                 bs_unload_finish(ctx, bserrno);
    5873           0 :                 return;
    5874             :         }
    5875             : 
    5876         819 :         rc = bs_super_validate(ctx->super, ctx->bs);
    5877         819 :         if (rc != 0) {
    5878           0 :                 bs_unload_finish(ctx, rc);
    5879           0 :                 return;
    5880             :         }
    5881             : 
    5882         819 :         bs_write_used_md(seq, cb_arg, bs_unload_write_used_pages_cpl);
    5883         819 : }
    5884             : 
    5885             : void
    5886         829 : spdk_bs_unload(struct spdk_blob_store *bs, spdk_bs_op_complete cb_fn, void *cb_arg)
    5887             : {
    5888             :         struct spdk_bs_cpl      cpl;
    5889             :         struct spdk_bs_load_ctx *ctx;
    5890             : 
    5891         829 :         SPDK_DEBUGLOG(blob, "Syncing blobstore\n");
    5892             : 
    5893             :         /*
    5894             :          * If external snapshot channels are being destroyed while the blobstore is unloaded, the
    5895             :          * unload is deferred until after the channel destruction completes.
    5896             :          */
    5897         829 :         if (bs->esnap_channels_unloading != 0) {
    5898           5 :                 if (bs->esnap_unload_cb_fn != NULL) {
    5899           0 :                         SPDK_ERRLOG("Blobstore unload in progress\n");
    5900           0 :                         cb_fn(cb_arg, -EBUSY);
    5901           0 :                         return;
    5902             :                 }
    5903           5 :                 SPDK_DEBUGLOG(blob_esnap, "Blobstore unload deferred: %" PRIu32
    5904             :                               " esnap clones are unloading\n", bs->esnap_channels_unloading);
    5905           5 :                 bs->esnap_unload_cb_fn = cb_fn;
    5906           5 :                 bs->esnap_unload_cb_arg = cb_arg;
    5907           5 :                 return;
    5908             :         }
    5909         824 :         if (bs->esnap_unload_cb_fn != NULL) {
    5910           5 :                 SPDK_DEBUGLOG(blob_esnap, "Blobstore deferred unload progressing\n");
    5911           5 :                 assert(bs->esnap_unload_cb_fn == cb_fn);
    5912           5 :                 assert(bs->esnap_unload_cb_arg == cb_arg);
    5913           5 :                 bs->esnap_unload_cb_fn = NULL;
    5914           5 :                 bs->esnap_unload_cb_arg = NULL;
    5915           5 :         }
    5916             : 
    5917         824 :         if (!RB_EMPTY(&bs->open_blobs)) {
    5918           5 :                 SPDK_ERRLOG("Blobstore still has open blobs\n");
    5919           5 :                 cb_fn(cb_arg, -EBUSY);
    5920           5 :                 return;
    5921             :         }
    5922             : 
    5923         819 :         ctx = calloc(1, sizeof(*ctx));
    5924         819 :         if (!ctx) {
    5925           0 :                 cb_fn(cb_arg, -ENOMEM);
    5926           0 :                 return;
    5927             :         }
    5928             : 
    5929         819 :         ctx->bs = bs;
    5930             : 
    5931         819 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    5932             :                                   SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    5933         819 :         if (!ctx->super) {
    5934           0 :                 free(ctx);
    5935           0 :                 cb_fn(cb_arg, -ENOMEM);
    5936           0 :                 return;
    5937             :         }
    5938             : 
    5939         819 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    5940         819 :         cpl.u.bs_basic.cb_fn = cb_fn;
    5941         819 :         cpl.u.bs_basic.cb_arg = cb_arg;
    5942             : 
    5943         819 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    5944         819 :         if (!ctx->seq) {
    5945           0 :                 spdk_free(ctx->super);
    5946           0 :                 free(ctx);
    5947           0 :                 cb_fn(cb_arg, -ENOMEM);
    5948           0 :                 return;
    5949             :         }
    5950             : 
    5951             :         /* Read super block */
    5952        1638 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    5953         819 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    5954         819 :                              bs_unload_read_super_cpl, ctx);
    5955         829 : }
    5956             : 
    5957             : /* END spdk_bs_unload */
    5958             : 
    5959             : /* START spdk_bs_set_super */
    5960             : 
    5961             : struct spdk_bs_set_super_ctx {
    5962             :         struct spdk_blob_store          *bs;
    5963             :         struct spdk_bs_super_block      *super;
    5964             : };
    5965             : 
    5966             : static void
    5967          10 : bs_set_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5968             : {
    5969          10 :         struct spdk_bs_set_super_ctx    *ctx = cb_arg;
    5970             : 
    5971          10 :         if (bserrno != 0) {
    5972           0 :                 SPDK_ERRLOG("Unable to write to super block of blobstore\n");
    5973           0 :         }
    5974             : 
    5975          10 :         spdk_free(ctx->super);
    5976             : 
    5977          10 :         bs_sequence_finish(seq, bserrno);
    5978             : 
    5979          10 :         free(ctx);
    5980          10 : }
    5981             : 
    5982             : static void
    5983          10 : bs_set_super_read_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    5984             : {
    5985          10 :         struct spdk_bs_set_super_ctx    *ctx = cb_arg;
    5986             :         int rc;
    5987             : 
    5988          10 :         if (bserrno != 0) {
    5989           0 :                 SPDK_ERRLOG("Unable to read super block of blobstore\n");
    5990           0 :                 spdk_free(ctx->super);
    5991           0 :                 bs_sequence_finish(seq, bserrno);
    5992           0 :                 free(ctx);
    5993           0 :                 return;
    5994             :         }
    5995             : 
    5996          10 :         rc = bs_super_validate(ctx->super, ctx->bs);
    5997          10 :         if (rc != 0) {
    5998           0 :                 SPDK_ERRLOG("Not a valid super block\n");
    5999           0 :                 spdk_free(ctx->super);
    6000           0 :                 bs_sequence_finish(seq, rc);
    6001           0 :                 free(ctx);
    6002           0 :                 return;
    6003             :         }
    6004             : 
    6005          10 :         bs_write_super(seq, ctx->bs, ctx->super, bs_set_super_write_cpl, ctx);
    6006          10 : }
    6007             : 
    6008             : void
    6009          10 : spdk_bs_set_super(struct spdk_blob_store *bs, spdk_blob_id blobid,
    6010             :                   spdk_bs_op_complete cb_fn, void *cb_arg)
    6011             : {
    6012             :         struct spdk_bs_cpl              cpl;
    6013             :         spdk_bs_sequence_t              *seq;
    6014             :         struct spdk_bs_set_super_ctx    *ctx;
    6015             : 
    6016          10 :         SPDK_DEBUGLOG(blob, "Setting super blob id on blobstore\n");
    6017             : 
    6018          10 :         ctx = calloc(1, sizeof(*ctx));
    6019          10 :         if (!ctx) {
    6020           0 :                 cb_fn(cb_arg, -ENOMEM);
    6021           0 :                 return;
    6022             :         }
    6023             : 
    6024          10 :         ctx->bs = bs;
    6025             : 
    6026          10 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    6027             :                                   SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    6028          10 :         if (!ctx->super) {
    6029           0 :                 free(ctx);
    6030           0 :                 cb_fn(cb_arg, -ENOMEM);
    6031           0 :                 return;
    6032             :         }
    6033             : 
    6034          10 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    6035          10 :         cpl.u.bs_basic.cb_fn = cb_fn;
    6036          10 :         cpl.u.bs_basic.cb_arg = cb_arg;
    6037             : 
    6038          10 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    6039          10 :         if (!seq) {
    6040           0 :                 spdk_free(ctx->super);
    6041           0 :                 free(ctx);
    6042           0 :                 cb_fn(cb_arg, -ENOMEM);
    6043           0 :                 return;
    6044             :         }
    6045             : 
    6046          10 :         bs->super_blob = blobid;
    6047             : 
    6048             :         /* Read super block */
    6049          20 :         bs_sequence_read_dev(seq, ctx->super, bs_page_to_lba(bs, 0),
    6050          10 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    6051          10 :                              bs_set_super_read_cpl, ctx);
    6052          10 : }
    6053             : 
    6054             : /* END spdk_bs_set_super */
    6055             : 
    6056             : void
    6057          15 : spdk_bs_get_super(struct spdk_blob_store *bs,
    6058             :                   spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6059             : {
    6060          15 :         if (bs->super_blob == SPDK_BLOBID_INVALID) {
    6061           5 :                 cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOENT);
    6062           5 :         } else {
    6063          10 :                 cb_fn(cb_arg, bs->super_blob, 0);
    6064             :         }
    6065          15 : }
    6066             : 
    6067             : uint64_t
    6068         254 : spdk_bs_get_cluster_size(struct spdk_blob_store *bs)
    6069             : {
    6070         254 :         return bs->cluster_sz;
    6071             : }
    6072             : 
    6073             : uint64_t
    6074         114 : spdk_bs_get_page_size(struct spdk_blob_store *bs)
    6075             : {
    6076         114 :         return bs->md_page_size;
    6077             : }
    6078             : 
    6079             : uint64_t
    6080        1006 : spdk_bs_get_io_unit_size(struct spdk_blob_store *bs)
    6081             : {
    6082        1006 :         return bs->io_unit_size;
    6083             : }
    6084             : 
    6085             : uint64_t
    6086         700 : spdk_bs_free_cluster_count(struct spdk_blob_store *bs)
    6087             : {
    6088         700 :         return bs->num_free_clusters;
    6089             : }
    6090             : 
    6091             : uint64_t
    6092         194 : spdk_bs_total_data_cluster_count(struct spdk_blob_store *bs)
    6093             : {
    6094         194 :         return bs->total_data_clusters;
    6095             : }
    6096             : 
    6097             : static int
    6098         988 : bs_register_md_thread(struct spdk_blob_store *bs)
    6099             : {
    6100         988 :         bs->md_channel = spdk_get_io_channel(bs);
    6101         988 :         if (!bs->md_channel) {
    6102           0 :                 SPDK_ERRLOG("Failed to get IO channel.\n");
    6103           0 :                 return -1;
    6104             :         }
    6105             : 
    6106         988 :         return 0;
    6107         988 : }
    6108             : 
    6109             : static int
    6110         988 : bs_unregister_md_thread(struct spdk_blob_store *bs)
    6111             : {
    6112         988 :         spdk_put_io_channel(bs->md_channel);
    6113             : 
    6114         988 :         return 0;
    6115             : }
    6116             : 
    6117             : spdk_blob_id
    6118         712 : spdk_blob_get_id(struct spdk_blob *blob)
    6119             : {
    6120         712 :         assert(blob != NULL);
    6121             : 
    6122         712 :         return blob->id;
    6123             : }
    6124             : 
    6125             : uint64_t
    6126          30 : spdk_blob_get_num_io_units(struct spdk_blob *blob)
    6127             : {
    6128          30 :         assert(blob != NULL);
    6129             : 
    6130          30 :         return bs_cluster_to_io_unit(blob->bs, blob->active.num_clusters);
    6131             : }
    6132             : 
    6133             : uint64_t
    6134         707 : spdk_blob_get_num_clusters(struct spdk_blob *blob)
    6135             : {
    6136         707 :         assert(blob != NULL);
    6137             : 
    6138         707 :         return blob->active.num_clusters;
    6139             : }
    6140             : 
    6141             : uint64_t
    6142         415 : spdk_blob_get_num_allocated_clusters(struct spdk_blob *blob)
    6143             : {
    6144         415 :         assert(blob != NULL);
    6145             : 
    6146         415 :         return blob->active.num_allocated_clusters;
    6147             : }
    6148             : 
    6149             : static uint64_t
    6150          30 : blob_find_io_unit(struct spdk_blob *blob, uint64_t offset, bool is_allocated)
    6151             : {
    6152          30 :         uint64_t blob_io_unit_num = spdk_blob_get_num_io_units(blob);
    6153             : 
    6154          55 :         while (offset < blob_io_unit_num) {
    6155          50 :                 if (bs_io_unit_is_allocated(blob, offset) == is_allocated) {
    6156          25 :                         return offset;
    6157             :                 }
    6158             : 
    6159          25 :                 offset += bs_num_io_units_to_cluster_boundary(blob, offset);
    6160             :         }
    6161             : 
    6162           5 :         return UINT64_MAX;
    6163          30 : }
    6164             : 
    6165             : uint64_t
    6166          15 : spdk_blob_get_next_allocated_io_unit(struct spdk_blob *blob, uint64_t offset)
    6167             : {
    6168          15 :         return blob_find_io_unit(blob, offset, true);
    6169             : }
    6170             : 
    6171             : uint64_t
    6172          15 : spdk_blob_get_next_unallocated_io_unit(struct spdk_blob *blob, uint64_t offset)
    6173             : {
    6174          15 :         return blob_find_io_unit(blob, offset, false);
    6175             : }
    6176             : 
    6177             : /* START spdk_bs_create_blob */
    6178             : 
    6179             : static void
    6180        2346 : bs_create_blob_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    6181             : {
    6182        2346 :         struct spdk_blob *blob = cb_arg;
    6183        2346 :         uint32_t page_idx = bs_blobid_to_page(blob->id);
    6184             : 
    6185        2346 :         if (bserrno != 0) {
    6186           0 :                 spdk_spin_lock(&blob->bs->used_lock);
    6187           0 :                 spdk_bit_array_clear(blob->bs->used_blobids, page_idx);
    6188           0 :                 bs_release_md_page(blob->bs, page_idx);
    6189           0 :                 spdk_spin_unlock(&blob->bs->used_lock);
    6190           0 :         }
    6191             : 
    6192        2346 :         blob_free(blob);
    6193             : 
    6194        2346 :         bs_sequence_finish(seq, bserrno);
    6195        2346 : }
    6196             : 
    6197             : static int
    6198        4717 : blob_set_xattrs(struct spdk_blob *blob, const struct spdk_blob_xattr_opts *xattrs,
    6199             :                 bool internal)
    6200             : {
    6201             :         uint64_t i;
    6202        4717 :         size_t value_len = 0;
    6203             :         int rc;
    6204        4717 :         const void *value = NULL;
    6205        4717 :         if (xattrs->count > 0 && xattrs->get_value == NULL) {
    6206          10 :                 return -EINVAL;
    6207             :         }
    6208        5103 :         for (i = 0; i < xattrs->count; i++) {
    6209         401 :                 xattrs->get_value(xattrs->ctx, xattrs->names[i], &value, &value_len);
    6210         401 :                 if (value == NULL || value_len == 0) {
    6211           5 :                         return -EINVAL;
    6212             :                 }
    6213         396 :                 rc = blob_set_xattr(blob, xattrs->names[i], value, value_len, internal);
    6214         396 :                 if (rc < 0) {
    6215           0 :                         return rc;
    6216             :                 }
    6217         396 :         }
    6218        4702 :         return 0;
    6219        4717 : }
    6220             : 
    6221             : static void
    6222        2330 : blob_opts_copy(const struct spdk_blob_opts *src, struct spdk_blob_opts *dst)
    6223             : {
    6224             : #define FIELD_OK(field) \
    6225             :         offsetof(struct spdk_blob_opts, field) + sizeof(src->field) <= src->opts_size
    6226             : 
    6227             : #define SET_FIELD(field) \
    6228             :         if (FIELD_OK(field)) { \
    6229             :                 dst->field = src->field; \
    6230             :         } \
    6231             : 
    6232        2330 :         SET_FIELD(num_clusters);
    6233        2330 :         SET_FIELD(thin_provision);
    6234        2330 :         SET_FIELD(clear_method);
    6235             : 
    6236        2330 :         if (FIELD_OK(xattrs)) {
    6237        2330 :                 memcpy(&dst->xattrs, &src->xattrs, sizeof(src->xattrs));
    6238        2330 :         }
    6239             : 
    6240        2330 :         SET_FIELD(use_extent_table);
    6241        2330 :         SET_FIELD(esnap_id);
    6242        2330 :         SET_FIELD(esnap_id_len);
    6243             : 
    6244        2330 :         dst->opts_size = src->opts_size;
    6245             : 
    6246             :         /* You should not remove this statement, but need to update the assert statement
    6247             :          * if you add a new field, and also add a corresponding SET_FIELD statement */
    6248             :         SPDK_STATIC_ASSERT(sizeof(struct spdk_blob_opts) == 80, "Incorrect size");
    6249             : 
    6250             : #undef FIELD_OK
    6251             : #undef SET_FIELD
    6252        2330 : }
    6253             : 
    6254             : static void
    6255        2366 : bs_create_blob(struct spdk_blob_store *bs,
    6256             :                const struct spdk_blob_opts *opts,
    6257             :                const struct spdk_blob_xattr_opts *internal_xattrs,
    6258             :                spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6259             : {
    6260             :         struct spdk_blob        *blob;
    6261             :         uint32_t                page_idx;
    6262             :         struct spdk_bs_cpl      cpl;
    6263             :         struct spdk_blob_opts   opts_local;
    6264             :         struct spdk_blob_xattr_opts internal_xattrs_default;
    6265             :         spdk_bs_sequence_t      *seq;
    6266             :         spdk_blob_id            id;
    6267             :         int rc;
    6268             : 
    6269        2366 :         assert(spdk_get_thread() == bs->md_thread);
    6270             : 
    6271        2366 :         spdk_spin_lock(&bs->used_lock);
    6272        2366 :         page_idx = spdk_bit_array_find_first_clear(bs->used_md_pages, 0);
    6273        2366 :         if (page_idx == UINT32_MAX) {
    6274           0 :                 spdk_spin_unlock(&bs->used_lock);
    6275           0 :                 cb_fn(cb_arg, 0, -ENOMEM);
    6276           0 :                 return;
    6277             :         }
    6278        2366 :         spdk_bit_array_set(bs->used_blobids, page_idx);
    6279        2366 :         bs_claim_md_page(bs, page_idx);
    6280        2366 :         spdk_spin_unlock(&bs->used_lock);
    6281             : 
    6282        2366 :         id = bs_page_to_blobid(page_idx);
    6283             : 
    6284        2366 :         SPDK_DEBUGLOG(blob, "Creating blob with id 0x%" PRIx64 " at page %u\n", id, page_idx);
    6285             : 
    6286        2366 :         spdk_blob_opts_init(&opts_local, sizeof(opts_local));
    6287        2366 :         if (opts) {
    6288        2330 :                 blob_opts_copy(opts, &opts_local);
    6289        2330 :         }
    6290             : 
    6291        2366 :         blob = blob_alloc(bs, id);
    6292        2366 :         if (!blob) {
    6293           0 :                 rc = -ENOMEM;
    6294           0 :                 goto error;
    6295             :         }
    6296             : 
    6297        2366 :         blob->use_extent_table = opts_local.use_extent_table;
    6298        2366 :         if (blob->use_extent_table) {
    6299        1440 :                 blob->invalid_flags |= SPDK_BLOB_EXTENT_TABLE;
    6300        1440 :         }
    6301             : 
    6302        2366 :         if (!internal_xattrs) {
    6303        2025 :                 blob_xattrs_init(&internal_xattrs_default);
    6304        2025 :                 internal_xattrs = &internal_xattrs_default;
    6305        2025 :         }
    6306             : 
    6307        2366 :         rc = blob_set_xattrs(blob, &opts_local.xattrs, false);
    6308        2366 :         if (rc < 0) {
    6309          15 :                 goto error;
    6310             :         }
    6311             : 
    6312        2351 :         rc = blob_set_xattrs(blob, internal_xattrs, true);
    6313        2351 :         if (rc < 0) {
    6314           0 :                 goto error;
    6315             :         }
    6316             : 
    6317        2351 :         if (opts_local.thin_provision) {
    6318         446 :                 blob_set_thin_provision(blob);
    6319         446 :         }
    6320             : 
    6321        2351 :         blob_set_clear_method(blob, opts_local.clear_method);
    6322             : 
    6323        2351 :         if (opts_local.esnap_id != NULL) {
    6324          75 :                 if (opts_local.esnap_id_len > UINT16_MAX) {
    6325           0 :                         SPDK_ERRLOG("esnap id length %" PRIu64 "is too long\n",
    6326             :                                     opts_local.esnap_id_len);
    6327           0 :                         rc = -EINVAL;
    6328           0 :                         goto error;
    6329             : 
    6330             :                 }
    6331          75 :                 blob_set_thin_provision(blob);
    6332          75 :                 blob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
    6333         150 :                 rc = blob_set_xattr(blob, BLOB_EXTERNAL_SNAPSHOT_ID,
    6334          75 :                                     opts_local.esnap_id, opts_local.esnap_id_len, true);
    6335          75 :                 if (rc != 0) {
    6336           0 :                         goto error;
    6337             :                 }
    6338          75 :         }
    6339             : 
    6340        2351 :         rc = blob_resize(blob, opts_local.num_clusters);
    6341        2351 :         if (rc < 0) {
    6342           5 :                 goto error;
    6343             :         }
    6344        2346 :         cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
    6345        2346 :         cpl.u.blobid.cb_fn = cb_fn;
    6346        2346 :         cpl.u.blobid.cb_arg = cb_arg;
    6347        2346 :         cpl.u.blobid.blobid = blob->id;
    6348             : 
    6349        2346 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    6350        2346 :         if (!seq) {
    6351           0 :                 rc = -ENOMEM;
    6352           0 :                 goto error;
    6353             :         }
    6354             : 
    6355        2346 :         blob_persist(seq, blob, bs_create_blob_cpl, blob);
    6356        2346 :         return;
    6357             : 
    6358             : error:
    6359          20 :         SPDK_ERRLOG("Failed to create blob: %s, size in clusters/size: %lu (clusters)\n",
    6360             :                     spdk_strerror(rc), opts_local.num_clusters);
    6361          20 :         if (blob != NULL) {
    6362          20 :                 blob_free(blob);
    6363          20 :         }
    6364          20 :         spdk_spin_lock(&bs->used_lock);
    6365          20 :         spdk_bit_array_clear(bs->used_blobids, page_idx);
    6366          20 :         bs_release_md_page(bs, page_idx);
    6367          20 :         spdk_spin_unlock(&bs->used_lock);
    6368          20 :         cb_fn(cb_arg, 0, rc);
    6369        2366 : }
    6370             : 
    6371             : void
    6372          16 : spdk_bs_create_blob(struct spdk_blob_store *bs,
    6373             :                     spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6374             : {
    6375          16 :         bs_create_blob(bs, NULL, NULL, cb_fn, cb_arg);
    6376          16 : }
    6377             : 
    6378             : void
    6379        1999 : spdk_bs_create_blob_ext(struct spdk_blob_store *bs, const struct spdk_blob_opts *opts,
    6380             :                         spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6381             : {
    6382        1999 :         bs_create_blob(bs, opts, NULL, cb_fn, cb_arg);
    6383        1999 : }
    6384             : 
    6385             : /* END spdk_bs_create_blob */
    6386             : 
    6387             : /* START blob_cleanup */
    6388             : 
    6389             : struct spdk_clone_snapshot_ctx {
    6390             :         struct spdk_bs_cpl      cpl;
    6391             :         int bserrno;
    6392             :         bool frozen;
    6393             : 
    6394             :         struct spdk_io_channel *channel;
    6395             : 
    6396             :         /* Current cluster for inflate operation */
    6397             :         uint64_t cluster;
    6398             : 
    6399             :         /* For inflation force allocation of all unallocated clusters and remove
    6400             :          * thin-provisioning. Otherwise only decouple parent and keep clone thin. */
    6401             :         bool allocate_all;
    6402             : 
    6403             :         struct {
    6404             :                 spdk_blob_id id;
    6405             :                 struct spdk_blob *blob;
    6406             :                 bool md_ro;
    6407             :         } original;
    6408             :         struct {
    6409             :                 spdk_blob_id id;
    6410             :                 struct spdk_blob *blob;
    6411             :         } new;
    6412             : 
    6413             :         /* xattrs specified for snapshot/clones only. They have no impact on
    6414             :          * the original blobs xattrs. */
    6415             :         const struct spdk_blob_xattr_opts *xattrs;
    6416             : };
    6417             : 
    6418             : static void
    6419         429 : bs_clone_snapshot_cleanup_finish(void *cb_arg, int bserrno)
    6420             : {
    6421         429 :         struct spdk_clone_snapshot_ctx *ctx = cb_arg;
    6422         429 :         struct spdk_bs_cpl *cpl = &ctx->cpl;
    6423             : 
    6424         429 :         if (bserrno != 0) {
    6425           8 :                 if (ctx->bserrno != 0) {
    6426           0 :                         SPDK_ERRLOG("Cleanup error %d\n", bserrno);
    6427           0 :                 } else {
    6428           8 :                         ctx->bserrno = bserrno;
    6429             :                 }
    6430           8 :         }
    6431             : 
    6432         429 :         switch (cpl->type) {
    6433             :         case SPDK_BS_CPL_TYPE_BLOBID:
    6434         354 :                 cpl->u.blobid.cb_fn(cpl->u.blobid.cb_arg, cpl->u.blobid.blobid, ctx->bserrno);
    6435         354 :                 break;
    6436             :         case SPDK_BS_CPL_TYPE_BLOB_BASIC:
    6437          75 :                 cpl->u.blob_basic.cb_fn(cpl->u.blob_basic.cb_arg, ctx->bserrno);
    6438          75 :                 break;
    6439             :         default:
    6440           0 :                 SPDK_UNREACHABLE();
    6441             :                 break;
    6442             :         }
    6443             : 
    6444         429 :         free(ctx);
    6445         429 : }
    6446             : 
    6447             : static void
    6448         411 : bs_snapshot_unfreeze_cpl(void *cb_arg, int bserrno)
    6449             : {
    6450         411 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6451         411 :         struct spdk_blob *origblob = ctx->original.blob;
    6452             : 
    6453         411 :         if (bserrno != 0) {
    6454           0 :                 if (ctx->bserrno != 0) {
    6455           0 :                         SPDK_ERRLOG("Unfreeze error %d\n", bserrno);
    6456           0 :                 } else {
    6457           0 :                         ctx->bserrno = bserrno;
    6458             :                 }
    6459           0 :         }
    6460             : 
    6461         411 :         ctx->original.id = origblob->id;
    6462         411 :         origblob->locked_operation_in_progress = false;
    6463             : 
    6464             :         /* Revert md_ro to original state */
    6465         411 :         origblob->md_ro = ctx->original.md_ro;
    6466             : 
    6467         411 :         spdk_blob_close(origblob, bs_clone_snapshot_cleanup_finish, ctx);
    6468         411 : }
    6469             : 
    6470             : static void
    6471         411 : bs_clone_snapshot_origblob_cleanup(void *cb_arg, int bserrno)
    6472             : {
    6473         411 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6474         411 :         struct spdk_blob *origblob = ctx->original.blob;
    6475             : 
    6476         411 :         if (bserrno != 0) {
    6477          30 :                 if (ctx->bserrno != 0) {
    6478           5 :                         SPDK_ERRLOG("Cleanup error %d\n", bserrno);
    6479           5 :                 } else {
    6480          25 :                         ctx->bserrno = bserrno;
    6481             :                 }
    6482          30 :         }
    6483             : 
    6484         411 :         if (ctx->frozen) {
    6485             :                 /* Unfreeze any outstanding I/O */
    6486         266 :                 blob_unfreeze_io(origblob, bs_snapshot_unfreeze_cpl, ctx);
    6487         266 :         } else {
    6488         145 :                 bs_snapshot_unfreeze_cpl(ctx, 0);
    6489             :         }
    6490             : 
    6491         411 : }
    6492             : 
    6493             : static void
    6494           5 : bs_clone_snapshot_newblob_cleanup(struct spdk_clone_snapshot_ctx *ctx, int bserrno)
    6495             : {
    6496           5 :         struct spdk_blob *newblob = ctx->new.blob;
    6497             : 
    6498           5 :         if (bserrno != 0) {
    6499           5 :                 if (ctx->bserrno != 0) {
    6500           0 :                         SPDK_ERRLOG("Cleanup error %d\n", bserrno);
    6501           0 :                 } else {
    6502           5 :                         ctx->bserrno = bserrno;
    6503             :                 }
    6504           5 :         }
    6505             : 
    6506           5 :         ctx->new.id = newblob->id;
    6507           5 :         spdk_blob_close(newblob, bs_clone_snapshot_origblob_cleanup, ctx);
    6508           5 : }
    6509             : 
    6510             : /* END blob_cleanup */
    6511             : 
    6512             : /* START spdk_bs_create_snapshot */
    6513             : 
    6514             : static void
    6515         276 : bs_snapshot_swap_cluster_maps(struct spdk_blob *blob1, struct spdk_blob *blob2)
    6516             : {
    6517             :         uint64_t *cluster_temp;
    6518             :         uint64_t num_allocated_clusters_temp;
    6519             :         uint32_t *extent_page_temp;
    6520             : 
    6521         276 :         cluster_temp = blob1->active.clusters;
    6522         276 :         blob1->active.clusters = blob2->active.clusters;
    6523         276 :         blob2->active.clusters = cluster_temp;
    6524             : 
    6525         276 :         num_allocated_clusters_temp = blob1->active.num_allocated_clusters;
    6526         276 :         blob1->active.num_allocated_clusters = blob2->active.num_allocated_clusters;
    6527         276 :         blob2->active.num_allocated_clusters = num_allocated_clusters_temp;
    6528             : 
    6529         276 :         extent_page_temp = blob1->active.extent_pages;
    6530         276 :         blob1->active.extent_pages = blob2->active.extent_pages;
    6531         276 :         blob2->active.extent_pages = extent_page_temp;
    6532         276 : }
    6533             : 
    6534             : /* Copies an internal xattr */
    6535             : static int
    6536          25 : bs_snapshot_copy_xattr(struct spdk_blob *toblob, struct spdk_blob *fromblob, const char *name)
    6537             : {
    6538          25 :         const void      *val = NULL;
    6539             :         size_t          len;
    6540             :         int             bserrno;
    6541             : 
    6542          25 :         bserrno = blob_get_xattr_value(fromblob, name, &val, &len, true);
    6543          25 :         if (bserrno != 0) {
    6544           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " missing %s XATTR\n", fromblob->id, name);
    6545           0 :                 return bserrno;
    6546             :         }
    6547             : 
    6548          25 :         bserrno = blob_set_xattr(toblob, name, val, len, true);
    6549          25 :         if (bserrno != 0) {
    6550           0 :                 SPDK_ERRLOG("could not set %s XATTR on blob 0x%" PRIx64 "\n",
    6551             :                             name, toblob->id);
    6552           0 :                 return bserrno;
    6553             :         }
    6554          25 :         return 0;
    6555          25 : }
    6556             : 
    6557             : static void
    6558         261 : bs_snapshot_origblob_sync_cpl(void *cb_arg, int bserrno)
    6559             : {
    6560         261 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6561         261 :         struct spdk_blob *origblob = ctx->original.blob;
    6562         261 :         struct spdk_blob *newblob = ctx->new.blob;
    6563             : 
    6564         261 :         if (bserrno != 0) {
    6565           5 :                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6566           5 :                 if (blob_is_esnap_clone(newblob)) {
    6567           0 :                         bs_snapshot_copy_xattr(origblob, newblob, BLOB_EXTERNAL_SNAPSHOT_ID);
    6568           0 :                         origblob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
    6569           0 :                 }
    6570           5 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6571           5 :                 return;
    6572             :         }
    6573             : 
    6574             :         /* Remove metadata descriptor SNAPSHOT_IN_PROGRESS */
    6575         256 :         bserrno = blob_remove_xattr(newblob, SNAPSHOT_IN_PROGRESS, true);
    6576         256 :         if (bserrno != 0) {
    6577           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6578           0 :                 return;
    6579             :         }
    6580             : 
    6581         256 :         bs_blob_list_add(ctx->original.blob);
    6582             : 
    6583         256 :         spdk_blob_set_read_only(newblob);
    6584             : 
    6585             :         /* sync snapshot metadata */
    6586         256 :         spdk_blob_sync_md(newblob, bs_clone_snapshot_origblob_cleanup, ctx);
    6587         261 : }
    6588             : 
    6589             : static void
    6590         266 : bs_snapshot_newblob_sync_cpl(void *cb_arg, int bserrno)
    6591             : {
    6592         266 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6593         266 :         struct spdk_blob *origblob = ctx->original.blob;
    6594         266 :         struct spdk_blob *newblob = ctx->new.blob;
    6595             : 
    6596         266 :         if (bserrno != 0) {
    6597             :                 /* return cluster map back to original */
    6598           5 :                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6599             : 
    6600             :                 /* Newblob md sync failed. Valid clusters are only present in origblob.
    6601             :                  * Since I/O is frozen on origblob, not changes to zeroed out cluster map should have occurred.
    6602             :                  * Newblob needs to be reverted to thin_provisioned state at creation to properly close. */
    6603           5 :                 blob_set_thin_provision(newblob);
    6604           5 :                 assert(spdk_mem_all_zero(newblob->active.clusters,
    6605             :                                          newblob->active.num_clusters * sizeof(*newblob->active.clusters)));
    6606           5 :                 assert(spdk_mem_all_zero(newblob->active.extent_pages,
    6607             :                                          newblob->active.num_extent_pages * sizeof(*newblob->active.extent_pages)));
    6608             : 
    6609           5 :                 bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6610           5 :                 return;
    6611             :         }
    6612             : 
    6613             :         /* Set internal xattr for snapshot id */
    6614         261 :         bserrno = blob_set_xattr(origblob, BLOB_SNAPSHOT, &newblob->id, sizeof(spdk_blob_id), true);
    6615         261 :         if (bserrno != 0) {
    6616             :                 /* return cluster map back to original */
    6617           0 :                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6618           0 :                 blob_set_thin_provision(newblob);
    6619           0 :                 bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6620           0 :                 return;
    6621             :         }
    6622             : 
    6623             :         /* Create new back_bs_dev for snapshot */
    6624         261 :         origblob->back_bs_dev = bs_create_blob_bs_dev(newblob);
    6625         261 :         if (origblob->back_bs_dev == NULL) {
    6626             :                 /* return cluster map back to original */
    6627           0 :                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6628           0 :                 blob_set_thin_provision(newblob);
    6629           0 :                 bs_clone_snapshot_newblob_cleanup(ctx, -EINVAL);
    6630           0 :                 return;
    6631             :         }
    6632             : 
    6633             :         /* Remove the xattr that references an external snapshot */
    6634         261 :         if (blob_is_esnap_clone(origblob)) {
    6635          15 :                 origblob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
    6636          15 :                 bserrno = blob_remove_xattr(origblob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
    6637          15 :                 if (bserrno != 0) {
    6638           0 :                         if (bserrno == -ENOENT) {
    6639           0 :                                 SPDK_ERRLOG("blob 0x%" PRIx64 " has no " BLOB_EXTERNAL_SNAPSHOT_ID
    6640             :                                             " xattr to remove\n", origblob->id);
    6641           0 :                                 assert(false);
    6642             :                         } else {
    6643             :                                 /* return cluster map back to original */
    6644           0 :                                 bs_snapshot_swap_cluster_maps(newblob, origblob);
    6645           0 :                                 blob_set_thin_provision(newblob);
    6646           0 :                                 bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6647           0 :                                 return;
    6648             :                         }
    6649             :                 }
    6650          15 :         }
    6651             : 
    6652         261 :         bs_blob_list_remove(origblob);
    6653         261 :         origblob->parent_id = newblob->id;
    6654             :         /* set clone blob as thin provisioned */
    6655         261 :         blob_set_thin_provision(origblob);
    6656             : 
    6657         261 :         bs_blob_list_add(newblob);
    6658             : 
    6659             :         /* sync clone metadata */
    6660         261 :         spdk_blob_sync_md(origblob, bs_snapshot_origblob_sync_cpl, ctx);
    6661         266 : }
    6662             : 
    6663             : static void
    6664         266 : bs_snapshot_freeze_cpl(void *cb_arg, int rc)
    6665             : {
    6666         266 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6667         266 :         struct spdk_blob *origblob = ctx->original.blob;
    6668         266 :         struct spdk_blob *newblob = ctx->new.blob;
    6669             :         int bserrno;
    6670             : 
    6671         266 :         if (rc != 0) {
    6672           0 :                 bs_clone_snapshot_newblob_cleanup(ctx, rc);
    6673           0 :                 return;
    6674             :         }
    6675             : 
    6676         266 :         ctx->frozen = true;
    6677             : 
    6678         266 :         if (blob_is_esnap_clone(origblob)) {
    6679             :                 /* Clean up any channels associated with the original blob id because future IO will
    6680             :                  * perform IO using the snapshot blob_id.
    6681             :                  */
    6682          15 :                 blob_esnap_destroy_bs_dev_channels(origblob, false, NULL, NULL);
    6683          15 :         }
    6684         266 :         if (newblob->back_bs_dev) {
    6685         266 :                 blob_back_bs_destroy(newblob);
    6686         266 :         }
    6687             :         /* set new back_bs_dev for snapshot */
    6688         266 :         newblob->back_bs_dev = origblob->back_bs_dev;
    6689             :         /* Set invalid flags from origblob */
    6690         266 :         newblob->invalid_flags = origblob->invalid_flags;
    6691             : 
    6692             :         /* inherit parent from original blob if set */
    6693         266 :         newblob->parent_id = origblob->parent_id;
    6694         266 :         switch (origblob->parent_id) {
    6695             :         case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
    6696          15 :                 bserrno = bs_snapshot_copy_xattr(newblob, origblob, BLOB_EXTERNAL_SNAPSHOT_ID);
    6697          15 :                 if (bserrno != 0) {
    6698           0 :                         bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6699           0 :                         return;
    6700             :                 }
    6701          15 :                 break;
    6702             :         case SPDK_BLOBID_INVALID:
    6703         186 :                 break;
    6704             :         default:
    6705             :                 /* Set internal xattr for snapshot id */
    6706         130 :                 bserrno = blob_set_xattr(newblob, BLOB_SNAPSHOT,
    6707          65 :                                          &origblob->parent_id, sizeof(spdk_blob_id), true);
    6708          65 :                 if (bserrno != 0) {
    6709           0 :                         bs_clone_snapshot_newblob_cleanup(ctx, bserrno);
    6710           0 :                         return;
    6711             :                 }
    6712          65 :         }
    6713             : 
    6714             :         /* swap cluster maps */
    6715         266 :         bs_snapshot_swap_cluster_maps(newblob, origblob);
    6716             : 
    6717             :         /* Set the clear method on the new blob to match the original. */
    6718         266 :         blob_set_clear_method(newblob, origblob->clear_method);
    6719             : 
    6720             :         /* sync snapshot metadata */
    6721         266 :         spdk_blob_sync_md(newblob, bs_snapshot_newblob_sync_cpl, ctx);
    6722         266 : }
    6723             : 
    6724             : static void
    6725         271 : bs_snapshot_newblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    6726             : {
    6727         271 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6728         271 :         struct spdk_blob *origblob = ctx->original.blob;
    6729         271 :         struct spdk_blob *newblob = _blob;
    6730             : 
    6731         271 :         if (bserrno != 0) {
    6732           5 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6733           5 :                 return;
    6734             :         }
    6735             : 
    6736         266 :         ctx->new.blob = newblob;
    6737         266 :         assert(spdk_blob_is_thin_provisioned(newblob));
    6738         266 :         assert(spdk_mem_all_zero(newblob->active.clusters,
    6739             :                                  newblob->active.num_clusters * sizeof(*newblob->active.clusters)));
    6740         266 :         assert(spdk_mem_all_zero(newblob->active.extent_pages,
    6741             :                                  newblob->active.num_extent_pages * sizeof(*newblob->active.extent_pages)));
    6742             : 
    6743         266 :         blob_freeze_io(origblob, bs_snapshot_freeze_cpl, ctx);
    6744         271 : }
    6745             : 
    6746             : static void
    6747         276 : bs_snapshot_newblob_create_cpl(void *cb_arg, spdk_blob_id blobid, int bserrno)
    6748             : {
    6749         276 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6750         276 :         struct spdk_blob *origblob = ctx->original.blob;
    6751             : 
    6752         276 :         if (bserrno != 0) {
    6753           5 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6754           5 :                 return;
    6755             :         }
    6756             : 
    6757         271 :         ctx->new.id = blobid;
    6758         271 :         ctx->cpl.u.blobid.blobid = blobid;
    6759             : 
    6760         271 :         spdk_bs_open_blob(origblob->bs, ctx->new.id, bs_snapshot_newblob_open_cpl, ctx);
    6761         276 : }
    6762             : 
    6763             : 
    6764             : static void
    6765         276 : bs_xattr_snapshot(void *arg, const char *name,
    6766             :                   const void **value, size_t *value_len)
    6767             : {
    6768         276 :         assert(strncmp(name, SNAPSHOT_IN_PROGRESS, sizeof(SNAPSHOT_IN_PROGRESS)) == 0);
    6769             : 
    6770         276 :         struct spdk_blob *blob = (struct spdk_blob *)arg;
    6771         276 :         *value = &blob->id;
    6772         276 :         *value_len = sizeof(blob->id);
    6773         276 : }
    6774             : 
    6775             : static void
    6776         289 : bs_snapshot_origblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    6777             : {
    6778         289 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6779             :         struct spdk_blob_opts opts;
    6780             :         struct spdk_blob_xattr_opts internal_xattrs;
    6781         289 :         char *xattrs_names[] = { SNAPSHOT_IN_PROGRESS };
    6782             : 
    6783         289 :         if (bserrno != 0) {
    6784           8 :                 bs_clone_snapshot_cleanup_finish(ctx, bserrno);
    6785           8 :                 return;
    6786             :         }
    6787             : 
    6788         281 :         ctx->original.blob = _blob;
    6789             : 
    6790         281 :         if (_blob->data_ro || _blob->md_ro) {
    6791           5 :                 SPDK_DEBUGLOG(blob, "Cannot create snapshot from read only blob with id 0x%"
    6792             :                               PRIx64 "\n", _blob->id);
    6793           5 :                 ctx->bserrno = -EINVAL;
    6794           5 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    6795           5 :                 return;
    6796             :         }
    6797             : 
    6798         276 :         if (_blob->locked_operation_in_progress) {
    6799           0 :                 SPDK_DEBUGLOG(blob, "Cannot create snapshot - another operation in progress\n");
    6800           0 :                 ctx->bserrno = -EBUSY;
    6801           0 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    6802           0 :                 return;
    6803             :         }
    6804             : 
    6805         276 :         _blob->locked_operation_in_progress = true;
    6806             : 
    6807         276 :         spdk_blob_opts_init(&opts, sizeof(opts));
    6808         276 :         blob_xattrs_init(&internal_xattrs);
    6809             : 
    6810             :         /* Change the size of new blob to the same as in original blob,
    6811             :          * but do not allocate clusters */
    6812         276 :         opts.thin_provision = true;
    6813         276 :         opts.num_clusters = spdk_blob_get_num_clusters(_blob);
    6814         276 :         opts.use_extent_table = _blob->use_extent_table;
    6815             : 
    6816             :         /* If there are any xattrs specified for snapshot, set them now */
    6817         276 :         if (ctx->xattrs) {
    6818           5 :                 memcpy(&opts.xattrs, ctx->xattrs, sizeof(*ctx->xattrs));
    6819           5 :         }
    6820             :         /* Set internal xattr SNAPSHOT_IN_PROGRESS */
    6821         276 :         internal_xattrs.count = 1;
    6822         276 :         internal_xattrs.ctx = _blob;
    6823         276 :         internal_xattrs.names = xattrs_names;
    6824         276 :         internal_xattrs.get_value = bs_xattr_snapshot;
    6825             : 
    6826         552 :         bs_create_blob(_blob->bs, &opts, &internal_xattrs,
    6827         276 :                        bs_snapshot_newblob_create_cpl, ctx);
    6828         289 : }
    6829             : 
    6830             : void
    6831         289 : spdk_bs_create_snapshot(struct spdk_blob_store *bs, spdk_blob_id blobid,
    6832             :                         const struct spdk_blob_xattr_opts *snapshot_xattrs,
    6833             :                         spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6834             : {
    6835         289 :         struct spdk_clone_snapshot_ctx *ctx = calloc(1, sizeof(*ctx));
    6836             : 
    6837         289 :         if (!ctx) {
    6838           0 :                 cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOMEM);
    6839           0 :                 return;
    6840             :         }
    6841         289 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
    6842         289 :         ctx->cpl.u.blobid.cb_fn = cb_fn;
    6843         289 :         ctx->cpl.u.blobid.cb_arg = cb_arg;
    6844         289 :         ctx->cpl.u.blobid.blobid = SPDK_BLOBID_INVALID;
    6845         289 :         ctx->bserrno = 0;
    6846         289 :         ctx->frozen = false;
    6847         289 :         ctx->original.id = blobid;
    6848         289 :         ctx->xattrs = snapshot_xattrs;
    6849             : 
    6850         289 :         spdk_bs_open_blob(bs, ctx->original.id, bs_snapshot_origblob_open_cpl, ctx);
    6851         289 : }
    6852             : /* END spdk_bs_create_snapshot */
    6853             : 
    6854             : /* START spdk_bs_create_clone */
    6855             : 
    6856             : static void
    6857          60 : bs_xattr_clone(void *arg, const char *name,
    6858             :                const void **value, size_t *value_len)
    6859             : {
    6860          60 :         assert(strncmp(name, BLOB_SNAPSHOT, sizeof(BLOB_SNAPSHOT)) == 0);
    6861             : 
    6862          60 :         struct spdk_blob *blob = (struct spdk_blob *)arg;
    6863          60 :         *value = &blob->id;
    6864          60 :         *value_len = sizeof(blob->id);
    6865          60 : }
    6866             : 
    6867             : static void
    6868          60 : bs_clone_newblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    6869             : {
    6870          60 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6871          60 :         struct spdk_blob *clone = _blob;
    6872             : 
    6873          60 :         ctx->new.blob = clone;
    6874          60 :         bs_blob_list_add(clone);
    6875             : 
    6876          60 :         spdk_blob_close(clone, bs_clone_snapshot_origblob_cleanup, ctx);
    6877          60 : }
    6878             : 
    6879             : static void
    6880          60 : bs_clone_newblob_create_cpl(void *cb_arg, spdk_blob_id blobid, int bserrno)
    6881             : {
    6882          60 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6883             : 
    6884          60 :         ctx->cpl.u.blobid.blobid = blobid;
    6885          60 :         spdk_bs_open_blob(ctx->original.blob->bs, blobid, bs_clone_newblob_open_cpl, ctx);
    6886          60 : }
    6887             : 
    6888             : static void
    6889          65 : bs_clone_origblob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    6890             : {
    6891          65 :         struct spdk_clone_snapshot_ctx  *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6892             :         struct spdk_blob_opts           opts;
    6893             :         struct spdk_blob_xattr_opts internal_xattrs;
    6894          65 :         char *xattr_names[] = { BLOB_SNAPSHOT };
    6895             : 
    6896          65 :         if (bserrno != 0) {
    6897           0 :                 bs_clone_snapshot_cleanup_finish(ctx, bserrno);
    6898           0 :                 return;
    6899             :         }
    6900             : 
    6901          65 :         ctx->original.blob = _blob;
    6902          65 :         ctx->original.md_ro = _blob->md_ro;
    6903             : 
    6904          65 :         if (!_blob->data_ro || !_blob->md_ro) {
    6905           5 :                 SPDK_DEBUGLOG(blob, "Clone not from read-only blob\n");
    6906           5 :                 ctx->bserrno = -EINVAL;
    6907           5 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    6908           5 :                 return;
    6909             :         }
    6910             : 
    6911          60 :         if (_blob->locked_operation_in_progress) {
    6912           0 :                 SPDK_DEBUGLOG(blob, "Cannot create clone - another operation in progress\n");
    6913           0 :                 ctx->bserrno = -EBUSY;
    6914           0 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    6915           0 :                 return;
    6916             :         }
    6917             : 
    6918          60 :         _blob->locked_operation_in_progress = true;
    6919             : 
    6920          60 :         spdk_blob_opts_init(&opts, sizeof(opts));
    6921          60 :         blob_xattrs_init(&internal_xattrs);
    6922             : 
    6923          60 :         opts.thin_provision = true;
    6924          60 :         opts.num_clusters = spdk_blob_get_num_clusters(_blob);
    6925          60 :         opts.use_extent_table = _blob->use_extent_table;
    6926          60 :         if (ctx->xattrs) {
    6927           5 :                 memcpy(&opts.xattrs, ctx->xattrs, sizeof(*ctx->xattrs));
    6928           5 :         }
    6929             : 
    6930             :         /* Set internal xattr BLOB_SNAPSHOT */
    6931          60 :         internal_xattrs.count = 1;
    6932          60 :         internal_xattrs.ctx = _blob;
    6933          60 :         internal_xattrs.names = xattr_names;
    6934          60 :         internal_xattrs.get_value = bs_xattr_clone;
    6935             : 
    6936         120 :         bs_create_blob(_blob->bs, &opts, &internal_xattrs,
    6937          60 :                        bs_clone_newblob_create_cpl, ctx);
    6938          65 : }
    6939             : 
    6940             : void
    6941          65 : spdk_bs_create_clone(struct spdk_blob_store *bs, spdk_blob_id blobid,
    6942             :                      const struct spdk_blob_xattr_opts *clone_xattrs,
    6943             :                      spdk_blob_op_with_id_complete cb_fn, void *cb_arg)
    6944             : {
    6945          65 :         struct spdk_clone_snapshot_ctx  *ctx = calloc(1, sizeof(*ctx));
    6946             : 
    6947          65 :         if (!ctx) {
    6948           0 :                 cb_fn(cb_arg, SPDK_BLOBID_INVALID, -ENOMEM);
    6949           0 :                 return;
    6950             :         }
    6951             : 
    6952          65 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOBID;
    6953          65 :         ctx->cpl.u.blobid.cb_fn = cb_fn;
    6954          65 :         ctx->cpl.u.blobid.cb_arg = cb_arg;
    6955          65 :         ctx->cpl.u.blobid.blobid = SPDK_BLOBID_INVALID;
    6956          65 :         ctx->bserrno = 0;
    6957          65 :         ctx->xattrs = clone_xattrs;
    6958          65 :         ctx->original.id = blobid;
    6959             : 
    6960          65 :         spdk_bs_open_blob(bs, ctx->original.id, bs_clone_origblob_open_cpl, ctx);
    6961          65 : }
    6962             : 
    6963             : /* END spdk_bs_create_clone */
    6964             : 
    6965             : /* START spdk_bs_inflate_blob */
    6966             : 
    6967             : static void
    6968          15 : bs_inflate_blob_set_parent_cpl(void *cb_arg, struct spdk_blob *_parent, int bserrno)
    6969             : {
    6970          15 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    6971          15 :         struct spdk_blob *_blob = ctx->original.blob;
    6972             : 
    6973          15 :         if (bserrno != 0) {
    6974           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6975           0 :                 return;
    6976             :         }
    6977             : 
    6978             :         /* Temporarily override md_ro flag for MD modification */
    6979          15 :         _blob->md_ro = false;
    6980             : 
    6981          15 :         bserrno = blob_set_xattr(_blob, BLOB_SNAPSHOT, &_parent->id, sizeof(spdk_blob_id), true);
    6982          15 :         if (bserrno != 0) {
    6983           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    6984           0 :                 return;
    6985             :         }
    6986             : 
    6987          15 :         assert(_parent != NULL);
    6988             : 
    6989          15 :         bs_blob_list_remove(_blob);
    6990          15 :         _blob->parent_id = _parent->id;
    6991             : 
    6992          15 :         blob_back_bs_destroy(_blob);
    6993          15 :         _blob->back_bs_dev = bs_create_blob_bs_dev(_parent);
    6994          15 :         bs_blob_list_add(_blob);
    6995             : 
    6996          15 :         spdk_blob_sync_md(_blob, bs_clone_snapshot_origblob_cleanup, ctx);
    6997          15 : }
    6998             : 
    6999             : static void
    7000          70 : bs_inflate_blob_done(struct spdk_clone_snapshot_ctx *ctx)
    7001             : {
    7002          70 :         struct spdk_blob *_blob = ctx->original.blob;
    7003             :         struct spdk_blob *_parent;
    7004             : 
    7005          70 :         if (ctx->allocate_all) {
    7006             :                 /* remove thin provisioning */
    7007          40 :                 bs_blob_list_remove(_blob);
    7008          40 :                 if (_blob->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    7009          10 :                         blob_remove_xattr(_blob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
    7010          10 :                         _blob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
    7011          10 :                 } else {
    7012          30 :                         blob_remove_xattr(_blob, BLOB_SNAPSHOT, true);
    7013             :                 }
    7014          40 :                 _blob->invalid_flags = _blob->invalid_flags & ~SPDK_BLOB_THIN_PROV;
    7015          40 :                 blob_back_bs_destroy(_blob);
    7016          40 :                 _blob->parent_id = SPDK_BLOBID_INVALID;
    7017          40 :         } else {
    7018             :                 /* For now, esnap clones always have allocate_all set. */
    7019          30 :                 assert(!blob_is_esnap_clone(_blob));
    7020             : 
    7021          30 :                 _parent = ((struct spdk_blob_bs_dev *)(_blob->back_bs_dev))->blob;
    7022          30 :                 if (_parent->parent_id != SPDK_BLOBID_INVALID) {
    7023             :                         /* We must change the parent of the inflated blob */
    7024          30 :                         spdk_bs_open_blob(_blob->bs, _parent->parent_id,
    7025          15 :                                           bs_inflate_blob_set_parent_cpl, ctx);
    7026          15 :                         return;
    7027             :                 }
    7028             : 
    7029          15 :                 bs_blob_list_remove(_blob);
    7030          15 :                 _blob->parent_id = SPDK_BLOBID_INVALID;
    7031          15 :                 blob_back_bs_destroy(_blob);
    7032          15 :                 _blob->back_bs_dev = bs_create_zeroes_dev();
    7033             :         }
    7034             : 
    7035             :         /* Temporarily override md_ro flag for MD modification */
    7036          55 :         _blob->md_ro = false;
    7037          55 :         blob_remove_xattr(_blob, BLOB_SNAPSHOT, true);
    7038          55 :         _blob->state = SPDK_BLOB_STATE_DIRTY;
    7039             : 
    7040          55 :         spdk_blob_sync_md(_blob, bs_clone_snapshot_origblob_cleanup, ctx);
    7041          70 : }
    7042             : 
    7043             : /* Check if cluster needs allocation */
    7044             : static inline bool
    7045        1500 : bs_cluster_needs_allocation(struct spdk_blob *blob, uint64_t cluster, bool allocate_all)
    7046             : {
    7047             :         struct spdk_blob_bs_dev *b;
    7048             : 
    7049        1500 :         assert(blob != NULL);
    7050             : 
    7051        1500 :         if (blob->active.clusters[cluster] != 0) {
    7052             :                 /* Cluster is already allocated */
    7053          40 :                 return false;
    7054             :         }
    7055             : 
    7056        1460 :         if (blob->parent_id == SPDK_BLOBID_INVALID) {
    7057             :                 /* Blob have no parent blob */
    7058         100 :                 return allocate_all;
    7059             :         }
    7060             : 
    7061        1360 :         if (blob->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    7062          80 :                 return true;
    7063             :         }
    7064             : 
    7065        1280 :         b = (struct spdk_blob_bs_dev *)blob->back_bs_dev;
    7066        1280 :         return (allocate_all || b->blob->active.clusters[cluster] != 0);
    7067        1500 : }
    7068             : 
    7069             : static void
    7070         635 : bs_inflate_blob_touch_next(void *cb_arg, int bserrno)
    7071             : {
    7072         635 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    7073         635 :         struct spdk_blob *_blob = ctx->original.blob;
    7074             :         struct spdk_bs_cpl cpl;
    7075             :         spdk_bs_user_op_t *op;
    7076             :         uint64_t offset;
    7077             : 
    7078         635 :         if (bserrno != 0) {
    7079           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, bserrno);
    7080           0 :                 return;
    7081             :         }
    7082             : 
    7083         820 :         for (; ctx->cluster < _blob->active.num_clusters; ctx->cluster++) {
    7084         750 :                 if (bs_cluster_needs_allocation(_blob, ctx->cluster, ctx->allocate_all)) {
    7085         565 :                         break;
    7086             :                 }
    7087         185 :         }
    7088             : 
    7089         635 :         if (ctx->cluster < _blob->active.num_clusters) {
    7090         565 :                 offset = bs_cluster_to_lba(_blob->bs, ctx->cluster);
    7091             : 
    7092             :                 /* We may safely increment a cluster before copying */
    7093         565 :                 ctx->cluster++;
    7094             : 
    7095             :                 /* Use a dummy 0B read as a context for cluster copy */
    7096         565 :                 cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    7097         565 :                 cpl.u.blob_basic.cb_fn = bs_inflate_blob_touch_next;
    7098         565 :                 cpl.u.blob_basic.cb_arg = ctx;
    7099             : 
    7100        1130 :                 op = bs_user_op_alloc(ctx->channel, &cpl, SPDK_BLOB_READ, _blob,
    7101         565 :                                       NULL, 0, offset, 0);
    7102         565 :                 if (!op) {
    7103           0 :                         bs_clone_snapshot_origblob_cleanup(ctx, -ENOMEM);
    7104           0 :                         return;
    7105             :                 }
    7106             : 
    7107         565 :                 bs_allocate_and_copy_cluster(_blob, ctx->channel, offset, op);
    7108         565 :         } else {
    7109          70 :                 bs_inflate_blob_done(ctx);
    7110             :         }
    7111         635 : }
    7112             : 
    7113             : static void
    7114          75 : bs_inflate_blob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    7115             : {
    7116          75 :         struct spdk_clone_snapshot_ctx *ctx = (struct spdk_clone_snapshot_ctx *)cb_arg;
    7117             :         uint64_t clusters_needed;
    7118             :         uint64_t i;
    7119             : 
    7120          75 :         if (bserrno != 0) {
    7121           0 :                 bs_clone_snapshot_cleanup_finish(ctx, bserrno);
    7122           0 :                 return;
    7123             :         }
    7124             : 
    7125          75 :         ctx->original.blob = _blob;
    7126          75 :         ctx->original.md_ro = _blob->md_ro;
    7127             : 
    7128          75 :         if (_blob->locked_operation_in_progress) {
    7129           0 :                 SPDK_DEBUGLOG(blob, "Cannot inflate blob - another operation in progress\n");
    7130           0 :                 ctx->bserrno = -EBUSY;
    7131           0 :                 spdk_blob_close(_blob, bs_clone_snapshot_cleanup_finish, ctx);
    7132           0 :                 return;
    7133             :         }
    7134             : 
    7135          75 :         _blob->locked_operation_in_progress = true;
    7136             : 
    7137          75 :         switch (_blob->parent_id) {
    7138             :         case SPDK_BLOBID_INVALID:
    7139          10 :                 if (!ctx->allocate_all) {
    7140             :                         /* This blob has no parent, so we cannot decouple it. */
    7141           5 :                         SPDK_ERRLOG("Cannot decouple parent of blob with no parent.\n");
    7142           5 :                         bs_clone_snapshot_origblob_cleanup(ctx, -EINVAL);
    7143           5 :                         return;
    7144             :                 }
    7145           5 :                 break;
    7146             :         case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
    7147             :                 /*
    7148             :                  * It would be better to rely on back_bs_dev->is_zeroes(), to determine which
    7149             :                  * clusters require allocation. Until there is a blobstore consumer that
    7150             :                  * uses esnaps with an spdk_bs_dev that implements a useful is_zeroes() it is not
    7151             :                  * worth the effort.
    7152             :                  */
    7153          10 :                 ctx->allocate_all = true;
    7154          10 :                 break;
    7155             :         default:
    7156          55 :                 break;
    7157             :         }
    7158             : 
    7159          70 :         if (spdk_blob_is_thin_provisioned(_blob) == false) {
    7160             :                 /* This is not thin provisioned blob. No need to inflate. */
    7161           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, 0);
    7162           0 :                 return;
    7163             :         }
    7164             : 
    7165             :         /* Do two passes - one to verify that we can obtain enough clusters
    7166             :          * and another to actually claim them.
    7167             :          */
    7168          70 :         clusters_needed = 0;
    7169         820 :         for (i = 0; i < _blob->active.num_clusters; i++) {
    7170         750 :                 if (bs_cluster_needs_allocation(_blob, i, ctx->allocate_all)) {
    7171         565 :                         clusters_needed++;
    7172         565 :                 }
    7173         750 :         }
    7174             : 
    7175          70 :         if (clusters_needed > _blob->bs->num_free_clusters) {
    7176             :                 /* Not enough free clusters. Cannot satisfy the request. */
    7177           0 :                 bs_clone_snapshot_origblob_cleanup(ctx, -ENOSPC);
    7178           0 :                 return;
    7179             :         }
    7180             : 
    7181          70 :         ctx->cluster = 0;
    7182          70 :         bs_inflate_blob_touch_next(ctx, 0);
    7183          75 : }
    7184             : 
    7185             : static void
    7186          75 : bs_inflate_blob(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
    7187             :                 spdk_blob_id blobid, bool allocate_all, spdk_blob_op_complete cb_fn, void *cb_arg)
    7188             : {
    7189          75 :         struct spdk_clone_snapshot_ctx *ctx = calloc(1, sizeof(*ctx));
    7190             : 
    7191          75 :         if (!ctx) {
    7192           0 :                 cb_fn(cb_arg, -ENOMEM);
    7193           0 :                 return;
    7194             :         }
    7195          75 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    7196          75 :         ctx->cpl.u.bs_basic.cb_fn = cb_fn;
    7197          75 :         ctx->cpl.u.bs_basic.cb_arg = cb_arg;
    7198          75 :         ctx->bserrno = 0;
    7199          75 :         ctx->original.id = blobid;
    7200          75 :         ctx->channel = channel;
    7201          75 :         ctx->allocate_all = allocate_all;
    7202             : 
    7203          75 :         spdk_bs_open_blob(bs, ctx->original.id, bs_inflate_blob_open_cpl, ctx);
    7204          75 : }
    7205             : 
    7206             : void
    7207          35 : spdk_bs_inflate_blob(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
    7208             :                      spdk_blob_id blobid, spdk_blob_op_complete cb_fn, void *cb_arg)
    7209             : {
    7210          35 :         bs_inflate_blob(bs, channel, blobid, true, cb_fn, cb_arg);
    7211          35 : }
    7212             : 
    7213             : void
    7214          40 : spdk_bs_blob_decouple_parent(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
    7215             :                              spdk_blob_id blobid, spdk_blob_op_complete cb_fn, void *cb_arg)
    7216             : {
    7217          40 :         bs_inflate_blob(bs, channel, blobid, false, cb_fn, cb_arg);
    7218          40 : }
    7219             : /* END spdk_bs_inflate_blob */
    7220             : 
    7221             : /* START spdk_bs_blob_shallow_copy */
    7222             : 
    7223             : struct shallow_copy_ctx {
    7224             :         struct spdk_bs_cpl cpl;
    7225             :         int bserrno;
    7226             : 
    7227             :         /* Blob source for copy */
    7228             :         struct spdk_blob_store *bs;
    7229             :         spdk_blob_id blobid;
    7230             :         struct spdk_blob *blob;
    7231             :         struct spdk_io_channel *blob_channel;
    7232             : 
    7233             :         /* Destination device for copy */
    7234             :         struct spdk_bs_dev *ext_dev;
    7235             :         struct spdk_io_channel *ext_channel;
    7236             : 
    7237             :         /* Current cluster for copy operation */
    7238             :         uint64_t cluster;
    7239             : 
    7240             :         /* Buffer for blob reading */
    7241             :         uint8_t *read_buff;
    7242             : 
    7243             :         /* Struct for external device writing */
    7244             :         struct spdk_bs_dev_cb_args ext_args;
    7245             : 
    7246             :         /* Actual number of copied clusters */
    7247             :         uint64_t copied_clusters_count;
    7248             : 
    7249             :         /* Status callback for updates about the ongoing operation */
    7250             :         spdk_blob_shallow_copy_status status_cb;
    7251             : 
    7252             :         /* Argument passed to function status_cb */
    7253             :         void *status_cb_arg;
    7254             : };
    7255             : 
    7256             : static void
    7257          20 : bs_shallow_copy_cleanup_finish(void *cb_arg, int bserrno)
    7258             : {
    7259          20 :         struct shallow_copy_ctx *ctx = cb_arg;
    7260          20 :         struct spdk_bs_cpl *cpl = &ctx->cpl;
    7261             : 
    7262          20 :         if (bserrno != 0) {
    7263           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, cleanup error %d\n", ctx->blob->id, bserrno);
    7264           0 :                 ctx->bserrno = bserrno;
    7265           0 :         }
    7266             : 
    7267          20 :         ctx->ext_dev->destroy_channel(ctx->ext_dev, ctx->ext_channel);
    7268          20 :         spdk_free(ctx->read_buff);
    7269             : 
    7270          20 :         cpl->u.blob_basic.cb_fn(cpl->u.blob_basic.cb_arg, ctx->bserrno);
    7271             : 
    7272          20 :         free(ctx);
    7273          20 : }
    7274             : 
    7275             : static void
    7276          10 : bs_shallow_copy_bdev_write_cpl(struct spdk_io_channel *channel, void *cb_arg, int bserrno)
    7277             : {
    7278          10 :         struct shallow_copy_ctx *ctx = cb_arg;
    7279          10 :         struct spdk_blob *_blob = ctx->blob;
    7280             : 
    7281          10 :         if (bserrno != 0) {
    7282           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, ext dev write error %d\n", ctx->blob->id, bserrno);
    7283           0 :                 ctx->bserrno = bserrno;
    7284           0 :                 _blob->locked_operation_in_progress = false;
    7285           0 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7286           0 :                 return;
    7287             :         }
    7288             : 
    7289          10 :         ctx->cluster++;
    7290          10 :         if (ctx->status_cb) {
    7291          10 :                 ctx->copied_clusters_count++;
    7292          10 :                 ctx->status_cb(ctx->copied_clusters_count, ctx->status_cb_arg);
    7293          10 :         }
    7294             : 
    7295          10 :         bs_shallow_copy_cluster_find_next(ctx);
    7296          10 : }
    7297             : 
    7298             : static void
    7299          10 : bs_shallow_copy_blob_read_cpl(void *cb_arg, int bserrno)
    7300             : {
    7301          10 :         struct shallow_copy_ctx *ctx = cb_arg;
    7302          10 :         struct spdk_bs_dev *ext_dev = ctx->ext_dev;
    7303          10 :         struct spdk_blob *_blob = ctx->blob;
    7304             : 
    7305          10 :         if (bserrno != 0) {
    7306           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, blob read error %d\n", ctx->blob->id, bserrno);
    7307           0 :                 ctx->bserrno = bserrno;
    7308           0 :                 _blob->locked_operation_in_progress = false;
    7309           0 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7310           0 :                 return;
    7311             :         }
    7312             : 
    7313          10 :         ctx->ext_args.channel = ctx->ext_channel;
    7314          10 :         ctx->ext_args.cb_fn = bs_shallow_copy_bdev_write_cpl;
    7315          10 :         ctx->ext_args.cb_arg = ctx;
    7316             : 
    7317          20 :         ext_dev->write(ext_dev, ctx->ext_channel, ctx->read_buff,
    7318          10 :                        bs_cluster_to_lba(_blob->bs, ctx->cluster),
    7319          10 :                        bs_dev_byte_to_lba(_blob->bs->dev, _blob->bs->cluster_sz),
    7320          10 :                        &ctx->ext_args);
    7321          10 : }
    7322             : 
    7323             : static void
    7324          15 : bs_shallow_copy_cluster_find_next(void *cb_arg)
    7325             : {
    7326          15 :         struct shallow_copy_ctx *ctx = cb_arg;
    7327          15 :         struct spdk_blob *_blob = ctx->blob;
    7328             : 
    7329          25 :         while (ctx->cluster < _blob->active.num_clusters) {
    7330          20 :                 if (_blob->active.clusters[ctx->cluster] != 0) {
    7331          10 :                         break;
    7332             :                 }
    7333             : 
    7334          10 :                 ctx->cluster++;
    7335             :         }
    7336             : 
    7337          15 :         if (ctx->cluster < _blob->active.num_clusters) {
    7338          20 :                 blob_request_submit_op_single(ctx->blob_channel, _blob, ctx->read_buff,
    7339          10 :                                               bs_cluster_to_lba(_blob->bs, ctx->cluster),
    7340          10 :                                               bs_dev_byte_to_lba(_blob->bs->dev, _blob->bs->cluster_sz),
    7341          10 :                                               bs_shallow_copy_blob_read_cpl, ctx, SPDK_BLOB_READ);
    7342          10 :         } else {
    7343           5 :                 _blob->locked_operation_in_progress = false;
    7344           5 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7345             :         }
    7346          15 : }
    7347             : 
    7348             : static void
    7349          20 : bs_shallow_copy_blob_open_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    7350             : {
    7351          20 :         struct shallow_copy_ctx *ctx = cb_arg;
    7352          20 :         struct spdk_bs_dev *ext_dev = ctx->ext_dev;
    7353             :         uint32_t blob_block_size;
    7354             :         uint64_t blob_total_size;
    7355             : 
    7356          20 :         if (bserrno != 0) {
    7357           0 :                 SPDK_ERRLOG("Shallow copy blob open error %d\n", bserrno);
    7358           0 :                 ctx->bserrno = bserrno;
    7359           0 :                 bs_shallow_copy_cleanup_finish(ctx, 0);
    7360           0 :                 return;
    7361             :         }
    7362             : 
    7363          20 :         if (!spdk_blob_is_read_only(_blob)) {
    7364           5 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, blob must be read only\n", _blob->id);
    7365           5 :                 ctx->bserrno = -EPERM;
    7366           5 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7367           5 :                 return;
    7368             :         }
    7369             : 
    7370          15 :         blob_block_size = _blob->bs->dev->blocklen;
    7371          15 :         blob_total_size = spdk_blob_get_num_clusters(_blob) * spdk_bs_get_cluster_size(_blob->bs);
    7372             : 
    7373          15 :         if (blob_total_size > ext_dev->blockcnt * ext_dev->blocklen) {
    7374           5 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, external device must have at least blob size\n",
    7375             :                             _blob->id);
    7376           5 :                 ctx->bserrno = -EINVAL;
    7377           5 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7378           5 :                 return;
    7379             :         }
    7380             : 
    7381          10 :         if (blob_block_size % ext_dev->blocklen != 0) {
    7382           5 :                 SPDK_ERRLOG("blob 0x%" PRIx64 " shallow copy, external device block size is not compatible with \
    7383             : blobstore block size\n", _blob->id);
    7384           5 :                 ctx->bserrno = -EINVAL;
    7385           5 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7386           5 :                 return;
    7387             :         }
    7388             : 
    7389           5 :         ctx->blob = _blob;
    7390             : 
    7391           5 :         if (_blob->locked_operation_in_progress) {
    7392           0 :                 SPDK_DEBUGLOG(blob, "blob 0x%" PRIx64 " shallow copy - another operation in progress\n", _blob->id);
    7393           0 :                 ctx->bserrno = -EBUSY;
    7394           0 :                 spdk_blob_close(_blob, bs_shallow_copy_cleanup_finish, ctx);
    7395           0 :                 return;
    7396             :         }
    7397             : 
    7398           5 :         _blob->locked_operation_in_progress = true;
    7399             : 
    7400           5 :         ctx->cluster = 0;
    7401           5 :         bs_shallow_copy_cluster_find_next(ctx);
    7402          20 : }
    7403             : 
    7404             : int
    7405          20 : spdk_bs_blob_shallow_copy(struct spdk_blob_store *bs, struct spdk_io_channel *channel,
    7406             :                           spdk_blob_id blobid, struct spdk_bs_dev *ext_dev,
    7407             :                           spdk_blob_shallow_copy_status status_cb_fn, void *status_cb_arg,
    7408             :                           spdk_blob_op_complete cb_fn, void *cb_arg)
    7409             : {
    7410             :         struct shallow_copy_ctx *ctx;
    7411             :         struct spdk_io_channel *ext_channel;
    7412             : 
    7413          20 :         ctx = calloc(1, sizeof(*ctx));
    7414          20 :         if (!ctx) {
    7415           0 :                 return -ENOMEM;
    7416             :         }
    7417             : 
    7418          20 :         ctx->bs = bs;
    7419          20 :         ctx->blobid = blobid;
    7420          20 :         ctx->cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    7421          20 :         ctx->cpl.u.bs_basic.cb_fn = cb_fn;
    7422          20 :         ctx->cpl.u.bs_basic.cb_arg = cb_arg;
    7423          20 :         ctx->bserrno = 0;
    7424          20 :         ctx->blob_channel = channel;
    7425          20 :         ctx->status_cb = status_cb_fn;
    7426          20 :         ctx->status_cb_arg = status_cb_arg;
    7427          20 :         ctx->read_buff = spdk_malloc(bs->cluster_sz, bs->dev->blocklen, NULL,
    7428             :                                      SPDK_ENV_LCORE_ID_ANY, SPDK_MALLOC_DMA);
    7429          20 :         if (!ctx->read_buff) {
    7430           0 :                 free(ctx);
    7431           0 :                 return -ENOMEM;
    7432             :         }
    7433             : 
    7434          20 :         ext_channel = ext_dev->create_channel(ext_dev);
    7435          20 :         if (!ext_channel) {
    7436           0 :                 spdk_free(ctx->read_buff);
    7437           0 :                 free(ctx);
    7438           0 :                 return -ENOMEM;
    7439             :         }
    7440          20 :         ctx->ext_dev = ext_dev;
    7441          20 :         ctx->ext_channel = ext_channel;
    7442             : 
    7443          20 :         spdk_bs_open_blob(ctx->bs, ctx->blobid, bs_shallow_copy_blob_open_cpl, ctx);
    7444             : 
    7445          20 :         return 0;
    7446          20 : }
    7447             : /* END spdk_bs_blob_shallow_copy */
    7448             : 
    7449             : /* START spdk_bs_blob_set_parent */
    7450             : 
    7451             : struct set_parent_ctx {
    7452             :         struct spdk_blob_store *bs;
    7453             :         int                     bserrno;
    7454             :         spdk_bs_op_complete     cb_fn;
    7455             :         void                    *cb_arg;
    7456             : 
    7457             :         struct spdk_blob        *blob;
    7458             :         bool                    blob_md_ro;
    7459             : 
    7460             :         struct blob_parent      parent;
    7461             : };
    7462             : 
    7463             : static void
    7464          30 : bs_set_parent_cleanup_finish(void *cb_arg, int bserrno)
    7465             : {
    7466          30 :         struct set_parent_ctx *ctx = cb_arg;
    7467             : 
    7468          30 :         assert(ctx != NULL);
    7469             : 
    7470          30 :         if (bserrno != 0) {
    7471           0 :                 SPDK_ERRLOG("blob set parent finish error %d\n", bserrno);
    7472           0 :                 if (ctx->bserrno == 0) {
    7473           0 :                         ctx->bserrno = bserrno;
    7474           0 :                 }
    7475           0 :         }
    7476             : 
    7477          30 :         ctx->cb_fn(ctx->cb_arg, ctx->bserrno);
    7478             : 
    7479          30 :         free(ctx);
    7480          30 : }
    7481             : 
    7482             : static void
    7483          25 : bs_set_parent_close_snapshot(void *cb_arg, int bserrno)
    7484             : {
    7485          25 :         struct set_parent_ctx *ctx = cb_arg;
    7486             : 
    7487          25 :         if (ctx->bserrno != 0) {
    7488          10 :                 spdk_blob_close(ctx->parent.u.snapshot.blob, bs_set_parent_cleanup_finish, ctx);
    7489          10 :                 return;
    7490             :         }
    7491             : 
    7492          15 :         if (bserrno != 0) {
    7493           0 :                 SPDK_ERRLOG("blob close error %d\n", bserrno);
    7494           0 :                 ctx->bserrno = bserrno;
    7495           0 :         }
    7496             : 
    7497          15 :         bs_set_parent_cleanup_finish(ctx, ctx->bserrno);
    7498          25 : }
    7499             : 
    7500             : static void
    7501          15 : bs_set_parent_close_blob(void *cb_arg, int bserrno)
    7502             : {
    7503          15 :         struct set_parent_ctx *ctx = cb_arg;
    7504          15 :         struct spdk_blob *blob = ctx->blob;
    7505          15 :         struct spdk_blob *snapshot = ctx->parent.u.snapshot.blob;
    7506             : 
    7507          15 :         if (bserrno != 0 && ctx->bserrno == 0) {
    7508           0 :                 SPDK_ERRLOG("error %d in metadata sync\n", bserrno);
    7509           0 :                 ctx->bserrno = bserrno;
    7510           0 :         }
    7511             : 
    7512             :         /* Revert md_ro to original state */
    7513          15 :         blob->md_ro = ctx->blob_md_ro;
    7514             : 
    7515          15 :         blob->locked_operation_in_progress = false;
    7516          15 :         snapshot->locked_operation_in_progress = false;
    7517             : 
    7518          15 :         spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
    7519          15 : }
    7520             : 
    7521             : static void
    7522          15 : bs_set_parent_set_back_bs_dev_done(void *cb_arg, int bserrno)
    7523             : {
    7524          15 :         struct set_parent_ctx *ctx = cb_arg;
    7525          15 :         struct spdk_blob *blob = ctx->blob;
    7526             : 
    7527          15 :         if (bserrno != 0) {
    7528           0 :                 SPDK_ERRLOG("error %d setting back_bs_dev\n", bserrno);
    7529           0 :                 ctx->bserrno = bserrno;
    7530           0 :                 bs_set_parent_close_blob(ctx, bserrno);
    7531           0 :                 return;
    7532             :         }
    7533             : 
    7534          15 :         spdk_blob_sync_md(blob, bs_set_parent_close_blob, ctx);
    7535          15 : }
    7536             : 
    7537             : static int
    7538          15 : bs_set_parent_refs(struct spdk_blob *blob, struct blob_parent *parent)
    7539             : {
    7540             :         int rc;
    7541             : 
    7542          15 :         bs_blob_list_remove(blob);
    7543             : 
    7544          15 :         rc = blob_set_xattr(blob, BLOB_SNAPSHOT, &parent->u.snapshot.id, sizeof(spdk_blob_id), true);
    7545          15 :         if (rc != 0) {
    7546           0 :                 SPDK_ERRLOG("error %d setting snapshot xattr\n", rc);
    7547           0 :                 return rc;
    7548             :         }
    7549          15 :         blob->parent_id = parent->u.snapshot.id;
    7550             : 
    7551          15 :         if (blob_is_esnap_clone(blob)) {
    7552             :                 /* Remove the xattr that references the external snapshot */
    7553           5 :                 blob->invalid_flags &= ~SPDK_BLOB_EXTERNAL_SNAPSHOT;
    7554           5 :                 blob_remove_xattr(blob, BLOB_EXTERNAL_SNAPSHOT_ID, true);
    7555           5 :         }
    7556             : 
    7557          15 :         bs_blob_list_add(blob);
    7558             : 
    7559          15 :         return 0;
    7560          15 : }
    7561             : 
    7562             : static void
    7563          25 : bs_set_parent_snapshot_open_cpl(void *cb_arg, struct spdk_blob *snapshot, int bserrno)
    7564             : {
    7565          25 :         struct set_parent_ctx *ctx = cb_arg;
    7566          25 :         struct spdk_blob *blob = ctx->blob;
    7567             :         struct spdk_bs_dev *back_bs_dev;
    7568             : 
    7569          25 :         if (bserrno != 0) {
    7570           0 :                 SPDK_ERRLOG("snapshot open error %d\n", bserrno);
    7571           0 :                 ctx->bserrno = bserrno;
    7572           0 :                 spdk_blob_close(blob, bs_set_parent_cleanup_finish, ctx);
    7573           0 :                 return;
    7574             :         }
    7575             : 
    7576          25 :         ctx->parent.u.snapshot.blob = snapshot;
    7577          25 :         ctx->parent.u.snapshot.id = snapshot->id;
    7578             : 
    7579          25 :         if (!spdk_blob_is_snapshot(snapshot)) {
    7580           5 :                 SPDK_ERRLOG("parent blob is not a snapshot\n");
    7581           5 :                 ctx->bserrno = -EINVAL;
    7582           5 :                 spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
    7583           5 :                 return;
    7584             :         }
    7585             : 
    7586          20 :         if (blob->active.num_clusters != snapshot->active.num_clusters) {
    7587           5 :                 SPDK_ERRLOG("parent blob has a number of clusters different from child's ones\n");
    7588           5 :                 ctx->bserrno = -EINVAL;
    7589           5 :                 spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
    7590           5 :                 return;
    7591             :         }
    7592             : 
    7593          15 :         if (blob->locked_operation_in_progress || snapshot->locked_operation_in_progress) {
    7594           0 :                 SPDK_ERRLOG("cannot set parent of blob, another operation in progress\n");
    7595           0 :                 ctx->bserrno = -EBUSY;
    7596           0 :                 spdk_blob_close(blob, bs_set_parent_close_snapshot, ctx);
    7597           0 :                 return;
    7598             :         }
    7599             : 
    7600          15 :         blob->locked_operation_in_progress = true;
    7601          15 :         snapshot->locked_operation_in_progress = true;
    7602             : 
    7603             :         /* Temporarily override md_ro flag for MD modification */
    7604          15 :         blob->md_ro = false;
    7605             : 
    7606          15 :         back_bs_dev = bs_create_blob_bs_dev(snapshot);
    7607             : 
    7608          30 :         blob_set_back_bs_dev(blob, back_bs_dev, bs_set_parent_refs, &ctx->parent,
    7609             :                              bs_set_parent_set_back_bs_dev_done,
    7610          15 :                              ctx);
    7611          25 : }
    7612             : 
    7613             : static void
    7614          30 : bs_set_parent_blob_open_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno)
    7615             : {
    7616          30 :         struct set_parent_ctx *ctx = cb_arg;
    7617             : 
    7618          30 :         if (bserrno != 0) {
    7619           0 :                 SPDK_ERRLOG("blob open error %d\n", bserrno);
    7620           0 :                 ctx->bserrno = bserrno;
    7621           0 :                 bs_set_parent_cleanup_finish(ctx, 0);
    7622           0 :                 return;
    7623             :         }
    7624             : 
    7625          30 :         if (!spdk_blob_is_thin_provisioned(blob)) {
    7626           5 :                 SPDK_ERRLOG("blob is not thin-provisioned\n");
    7627           5 :                 ctx->bserrno = -EINVAL;
    7628           5 :                 spdk_blob_close(blob, bs_set_parent_cleanup_finish, ctx);
    7629           5 :                 return;
    7630             :         }
    7631             : 
    7632          25 :         ctx->blob = blob;
    7633          25 :         ctx->blob_md_ro = blob->md_ro;
    7634             : 
    7635          25 :         spdk_bs_open_blob(ctx->bs, ctx->parent.u.snapshot.id, bs_set_parent_snapshot_open_cpl, ctx);
    7636          30 : }
    7637             : 
    7638             : void
    7639          45 : spdk_bs_blob_set_parent(struct spdk_blob_store *bs, spdk_blob_id blob_id,
    7640             :                         spdk_blob_id snapshot_id, spdk_blob_op_complete cb_fn, void *cb_arg)
    7641             : {
    7642             :         struct set_parent_ctx *ctx;
    7643             : 
    7644          45 :         if (snapshot_id == SPDK_BLOBID_INVALID) {
    7645           5 :                 SPDK_ERRLOG("snapshot id not valid\n");
    7646           5 :                 cb_fn(cb_arg, -EINVAL);
    7647           5 :                 return;
    7648             :         }
    7649             : 
    7650          40 :         if (blob_id == snapshot_id) {
    7651           5 :                 SPDK_ERRLOG("blob id and snapshot id cannot be the same\n");
    7652           5 :                 cb_fn(cb_arg, -EINVAL);
    7653           5 :                 return;
    7654             :         }
    7655             : 
    7656          35 :         if (spdk_blob_get_parent_snapshot(bs, blob_id) == snapshot_id) {
    7657           5 :                 SPDK_NOTICELOG("snapshot is already the parent of blob\n");
    7658           5 :                 cb_fn(cb_arg, -EEXIST);
    7659           5 :                 return;
    7660             :         }
    7661             : 
    7662          30 :         ctx = calloc(1, sizeof(*ctx));
    7663          30 :         if (!ctx) {
    7664           0 :                 cb_fn(cb_arg, -ENOMEM);
    7665           0 :                 return;
    7666             :         }
    7667             : 
    7668          30 :         ctx->bs = bs;
    7669          30 :         ctx->parent.u.snapshot.id = snapshot_id;
    7670          30 :         ctx->cb_fn = cb_fn;
    7671          30 :         ctx->cb_arg = cb_arg;
    7672          30 :         ctx->bserrno = 0;
    7673             : 
    7674          30 :         spdk_bs_open_blob(bs, blob_id, bs_set_parent_blob_open_cpl, ctx);
    7675          45 : }
    7676             : /* END spdk_bs_blob_set_parent */
    7677             : 
    7678             : /* START spdk_bs_blob_set_external_parent */
    7679             : 
    7680             : static void
    7681          20 : bs_set_external_parent_cleanup_finish(void *cb_arg, int bserrno)
    7682             : {
    7683          20 :         struct set_parent_ctx *ctx = cb_arg;
    7684             : 
    7685          20 :         if (bserrno != 0) {
    7686           0 :                 SPDK_ERRLOG("blob set external parent finish error %d\n", bserrno);
    7687           0 :                 if (ctx->bserrno == 0) {
    7688           0 :                         ctx->bserrno = bserrno;
    7689           0 :                 }
    7690           0 :         }
    7691             : 
    7692          20 :         ctx->cb_fn(ctx->cb_arg, ctx->bserrno);
    7693             : 
    7694          20 :         free(ctx->parent.u.esnap.id);
    7695          20 :         free(ctx);
    7696          20 : }
    7697             : 
    7698             : static void
    7699          10 : bs_set_external_parent_close_blob(void *cb_arg, int bserrno)
    7700             : {
    7701          10 :         struct set_parent_ctx *ctx = cb_arg;
    7702          10 :         struct spdk_blob *blob = ctx->blob;
    7703             : 
    7704          10 :         if (bserrno != 0 && ctx->bserrno == 0) {
    7705           0 :                 SPDK_ERRLOG("error %d in metadata sync\n", bserrno);
    7706           0 :                 ctx->bserrno = bserrno;
    7707           0 :         }
    7708             : 
    7709             :         /* Revert md_ro to original state */
    7710          10 :         blob->md_ro = ctx->blob_md_ro;
    7711             : 
    7712          10 :         blob->locked_operation_in_progress = false;
    7713             : 
    7714          10 :         spdk_blob_close(blob, bs_set_external_parent_cleanup_finish, ctx);
    7715          10 : }
    7716             : 
    7717             : static void
    7718          10 : bs_set_external_parent_unfrozen(void *cb_arg, int bserrno)
    7719             : {
    7720          10 :         struct set_parent_ctx *ctx = cb_arg;
    7721          10 :         struct spdk_blob *blob = ctx->blob;
    7722             : 
    7723          10 :         if (bserrno != 0) {
    7724           0 :                 SPDK_ERRLOG("error %d setting back_bs_dev\n", bserrno);
    7725           0 :                 ctx->bserrno = bserrno;
    7726           0 :                 bs_set_external_parent_close_blob(ctx, bserrno);
    7727           0 :                 return;
    7728             :         }
    7729             : 
    7730          10 :         spdk_blob_sync_md(blob, bs_set_external_parent_close_blob, ctx);
    7731          10 : }
    7732             : 
    7733             : static int
    7734          10 : bs_set_external_parent_refs(struct spdk_blob *blob, struct blob_parent *parent)
    7735             : {
    7736             :         int rc;
    7737             : 
    7738          10 :         bs_blob_list_remove(blob);
    7739             : 
    7740          10 :         if (spdk_blob_is_clone(blob)) {
    7741             :                 /* Remove the xattr that references the snapshot */
    7742           0 :                 blob->parent_id = SPDK_BLOBID_INVALID;
    7743           0 :                 blob_remove_xattr(blob, BLOB_SNAPSHOT, true);
    7744           0 :         }
    7745             : 
    7746          20 :         rc = blob_set_xattr(blob, BLOB_EXTERNAL_SNAPSHOT_ID, parent->u.esnap.id,
    7747          10 :                             parent->u.esnap.id_len, true);
    7748          10 :         if (rc != 0) {
    7749           0 :                 SPDK_ERRLOG("error %d setting external snapshot xattr\n", rc);
    7750           0 :                 return rc;
    7751             :         }
    7752          10 :         blob->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
    7753          10 :         blob->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
    7754             : 
    7755          10 :         bs_blob_list_add(blob);
    7756             : 
    7757          10 :         return 0;
    7758          10 : }
    7759             : 
    7760             : static void
    7761          20 : bs_set_external_parent_blob_open_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno)
    7762             : {
    7763          20 :         struct set_parent_ctx *ctx = cb_arg;
    7764             :         const void *esnap_id;
    7765             :         size_t esnap_id_len;
    7766             :         int rc;
    7767             : 
    7768          20 :         if (bserrno != 0) {
    7769           0 :                 SPDK_ERRLOG("blob open error %d\n", bserrno);
    7770           0 :                 ctx->bserrno = bserrno;
    7771           0 :                 bs_set_parent_cleanup_finish(ctx, 0);
    7772           0 :                 return;
    7773             :         }
    7774             : 
    7775          20 :         ctx->blob = blob;
    7776          20 :         ctx->blob_md_ro = blob->md_ro;
    7777             : 
    7778          20 :         rc = spdk_blob_get_esnap_id(blob, &esnap_id, &esnap_id_len);
    7779          20 :         if (rc == 0 && esnap_id != NULL && esnap_id_len == ctx->parent.u.esnap.id_len &&
    7780           5 :             memcmp(esnap_id, ctx->parent.u.esnap.id, esnap_id_len) == 0) {
    7781           5 :                 SPDK_ERRLOG("external snapshot is already the parent of blob\n");
    7782           5 :                 ctx->bserrno = -EEXIST;
    7783           5 :                 goto error;
    7784             :         }
    7785             : 
    7786          15 :         if (!spdk_blob_is_thin_provisioned(blob)) {
    7787           5 :                 SPDK_ERRLOG("blob is not thin-provisioned\n");
    7788           5 :                 ctx->bserrno = -EINVAL;
    7789           5 :                 goto error;
    7790             :         }
    7791             : 
    7792          10 :         if (blob->locked_operation_in_progress) {
    7793           0 :                 SPDK_ERRLOG("cannot set external parent of blob, another operation in progress\n");
    7794           0 :                 ctx->bserrno = -EBUSY;
    7795           0 :                 goto error;
    7796             :         }
    7797             : 
    7798          10 :         blob->locked_operation_in_progress = true;
    7799             : 
    7800             :         /* Temporarily override md_ro flag for MD modification */
    7801          10 :         blob->md_ro = false;
    7802             : 
    7803          20 :         blob_set_back_bs_dev(blob, ctx->parent.u.esnap.back_bs_dev, bs_set_external_parent_refs,
    7804          10 :                              &ctx->parent, bs_set_external_parent_unfrozen, ctx);
    7805          10 :         return;
    7806             : 
    7807             : error:
    7808          10 :         spdk_blob_close(blob, bs_set_external_parent_cleanup_finish, ctx);
    7809          20 : }
    7810             : 
    7811             : void
    7812          30 : spdk_bs_blob_set_external_parent(struct spdk_blob_store *bs, spdk_blob_id blob_id,
    7813             :                                  struct spdk_bs_dev *esnap_bs_dev, const void *esnap_id,
    7814             :                                  uint32_t esnap_id_len, spdk_blob_op_complete cb_fn, void *cb_arg)
    7815             : {
    7816             :         struct set_parent_ctx *ctx;
    7817             :         uint64_t esnap_dev_size, cluster_sz;
    7818             : 
    7819          30 :         if (sizeof(blob_id) == esnap_id_len && memcmp(&blob_id, esnap_id, sizeof(blob_id)) == 0) {
    7820           5 :                 SPDK_ERRLOG("blob id and external snapshot id cannot be the same\n");
    7821           5 :                 cb_fn(cb_arg, -EINVAL);
    7822           5 :                 return;
    7823             :         }
    7824             : 
    7825          25 :         esnap_dev_size = esnap_bs_dev->blockcnt * esnap_bs_dev->blocklen;
    7826          25 :         cluster_sz = spdk_bs_get_cluster_size(bs);
    7827          25 :         if ((esnap_dev_size % cluster_sz) != 0) {
    7828           5 :                 SPDK_ERRLOG("Esnap device size %" PRIu64 " is not an integer multiple of "
    7829             :                             "cluster size %" PRIu64 "\n", esnap_dev_size, cluster_sz);
    7830           5 :                 cb_fn(cb_arg, -EINVAL);
    7831           5 :                 return;
    7832             :         }
    7833             : 
    7834          20 :         ctx = calloc(1, sizeof(*ctx));
    7835          20 :         if (!ctx) {
    7836           0 :                 cb_fn(cb_arg, -ENOMEM);
    7837           0 :                 return;
    7838             :         }
    7839             : 
    7840          20 :         ctx->parent.u.esnap.id = calloc(1, esnap_id_len);
    7841          20 :         if (!ctx->parent.u.esnap.id) {
    7842           0 :                 free(ctx);
    7843           0 :                 cb_fn(cb_arg, -ENOMEM);
    7844           0 :                 return;
    7845             :         }
    7846             : 
    7847          20 :         ctx->bs = bs;
    7848          20 :         ctx->parent.u.esnap.back_bs_dev = esnap_bs_dev;
    7849          20 :         memcpy(ctx->parent.u.esnap.id, esnap_id, esnap_id_len);
    7850          20 :         ctx->parent.u.esnap.id_len = esnap_id_len;
    7851          20 :         ctx->cb_fn = cb_fn;
    7852          20 :         ctx->cb_arg = cb_arg;
    7853          20 :         ctx->bserrno = 0;
    7854             : 
    7855          20 :         spdk_bs_open_blob(bs, blob_id, bs_set_external_parent_blob_open_cpl, ctx);
    7856          30 : }
    7857             : /* END spdk_bs_blob_set_external_parent */
    7858             : 
    7859             : /* START spdk_blob_resize */
    7860             : struct spdk_bs_resize_ctx {
    7861             :         spdk_blob_op_complete cb_fn;
    7862             :         void *cb_arg;
    7863             :         struct spdk_blob *blob;
    7864             :         uint64_t sz;
    7865             :         int rc;
    7866             : };
    7867             : 
    7868             : static void
    7869         252 : bs_resize_unfreeze_cpl(void *cb_arg, int rc)
    7870             : {
    7871         252 :         struct spdk_bs_resize_ctx *ctx = (struct spdk_bs_resize_ctx *)cb_arg;
    7872             : 
    7873         252 :         if (rc != 0) {
    7874           0 :                 SPDK_ERRLOG("Unfreeze failed, rc=%d\n", rc);
    7875           0 :         }
    7876             : 
    7877         252 :         if (ctx->rc != 0) {
    7878           5 :                 SPDK_ERRLOG("Unfreeze failed, ctx->rc=%d\n", ctx->rc);
    7879           5 :                 rc = ctx->rc;
    7880           5 :         }
    7881             : 
    7882         252 :         ctx->blob->locked_operation_in_progress = false;
    7883             : 
    7884         252 :         ctx->cb_fn(ctx->cb_arg, rc);
    7885         252 :         free(ctx);
    7886         252 : }
    7887             : 
    7888             : static void
    7889         252 : bs_resize_freeze_cpl(void *cb_arg, int rc)
    7890             : {
    7891         252 :         struct spdk_bs_resize_ctx *ctx = (struct spdk_bs_resize_ctx *)cb_arg;
    7892             : 
    7893         252 :         if (rc != 0) {
    7894           0 :                 ctx->blob->locked_operation_in_progress = false;
    7895           0 :                 ctx->cb_fn(ctx->cb_arg, rc);
    7896           0 :                 free(ctx);
    7897           0 :                 return;
    7898             :         }
    7899             : 
    7900         252 :         ctx->rc = blob_resize(ctx->blob, ctx->sz);
    7901             : 
    7902         252 :         blob_unfreeze_io(ctx->blob, bs_resize_unfreeze_cpl, ctx);
    7903         252 : }
    7904             : 
    7905             : void
    7906         269 : spdk_blob_resize(struct spdk_blob *blob, uint64_t sz, spdk_blob_op_complete cb_fn, void *cb_arg)
    7907             : {
    7908             :         struct spdk_bs_resize_ctx *ctx;
    7909             : 
    7910         269 :         blob_verify_md_op(blob);
    7911             : 
    7912         269 :         SPDK_DEBUGLOG(blob, "Resizing blob 0x%" PRIx64 " to %" PRIu64 " clusters\n", blob->id, sz);
    7913             : 
    7914         269 :         if (blob->md_ro) {
    7915           5 :                 cb_fn(cb_arg, -EPERM);
    7916           5 :                 return;
    7917             :         }
    7918             : 
    7919         264 :         if (sz == blob->active.num_clusters) {
    7920          12 :                 cb_fn(cb_arg, 0);
    7921          12 :                 return;
    7922             :         }
    7923             : 
    7924         252 :         if (blob->locked_operation_in_progress) {
    7925           0 :                 cb_fn(cb_arg, -EBUSY);
    7926           0 :                 return;
    7927             :         }
    7928             : 
    7929         252 :         ctx = calloc(1, sizeof(*ctx));
    7930         252 :         if (!ctx) {
    7931           0 :                 cb_fn(cb_arg, -ENOMEM);
    7932           0 :                 return;
    7933             :         }
    7934             : 
    7935         252 :         blob->locked_operation_in_progress = true;
    7936         252 :         ctx->cb_fn = cb_fn;
    7937         252 :         ctx->cb_arg = cb_arg;
    7938         252 :         ctx->blob = blob;
    7939         252 :         ctx->sz = sz;
    7940         252 :         blob_freeze_io(blob, bs_resize_freeze_cpl, ctx);
    7941         269 : }
    7942             : 
    7943             : /* END spdk_blob_resize */
    7944             : 
    7945             : 
    7946             : /* START spdk_bs_delete_blob */
    7947             : 
    7948             : static void
    7949        1862 : bs_delete_close_cpl(void *cb_arg, int bserrno)
    7950             : {
    7951        1862 :         spdk_bs_sequence_t *seq = cb_arg;
    7952             : 
    7953        1862 :         bs_sequence_finish(seq, bserrno);
    7954        1862 : }
    7955             : 
    7956             : static void
    7957        1862 : bs_delete_persist_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    7958             : {
    7959        1862 :         struct spdk_blob *blob = cb_arg;
    7960             : 
    7961        1862 :         if (bserrno != 0) {
    7962             :                 /*
    7963             :                  * We already removed this blob from the blobstore tailq, so
    7964             :                  *  we need to free it here since this is the last reference
    7965             :                  *  to it.
    7966             :                  */
    7967           0 :                 blob_free(blob);
    7968           0 :                 bs_delete_close_cpl(seq, bserrno);
    7969           0 :                 return;
    7970             :         }
    7971             : 
    7972             :         /*
    7973             :          * This will immediately decrement the ref_count and call
    7974             :          *  the completion routine since the metadata state is clean.
    7975             :          *  By calling spdk_blob_close, we reduce the number of call
    7976             :          *  points into code that touches the blob->open_ref count
    7977             :          *  and the blobstore's blob list.
    7978             :          */
    7979        1862 :         spdk_blob_close(blob, bs_delete_close_cpl, seq);
    7980        1862 : }
    7981             : 
    7982             : struct delete_snapshot_ctx {
    7983             :         struct spdk_blob_list *parent_snapshot_entry;
    7984             :         struct spdk_blob *snapshot;
    7985             :         struct spdk_blob_md_page *page;
    7986             :         bool snapshot_md_ro;
    7987             :         struct spdk_blob *clone;
    7988             :         bool clone_md_ro;
    7989             :         spdk_blob_op_with_handle_complete cb_fn;
    7990             :         void *cb_arg;
    7991             :         int bserrno;
    7992             :         uint32_t next_extent_page;
    7993             : };
    7994             : 
    7995             : static void
    7996         138 : delete_blob_cleanup_finish(void *cb_arg, int bserrno)
    7997             : {
    7998         138 :         struct delete_snapshot_ctx *ctx = cb_arg;
    7999             : 
    8000         138 :         if (bserrno != 0) {
    8001           0 :                 SPDK_ERRLOG("Snapshot cleanup error %d\n", bserrno);
    8002           0 :         }
    8003             : 
    8004         138 :         assert(ctx != NULL);
    8005             : 
    8006         138 :         if (bserrno != 0 && ctx->bserrno == 0) {
    8007           0 :                 ctx->bserrno = bserrno;
    8008           0 :         }
    8009             : 
    8010         138 :         ctx->cb_fn(ctx->cb_arg, ctx->snapshot, ctx->bserrno);
    8011         138 :         spdk_free(ctx->page);
    8012         138 :         free(ctx);
    8013         138 : }
    8014             : 
    8015             : static void
    8016          28 : delete_snapshot_cleanup_snapshot(void *cb_arg, int bserrno)
    8017             : {
    8018          28 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8019             : 
    8020          28 :         if (bserrno != 0) {
    8021           0 :                 ctx->bserrno = bserrno;
    8022           0 :                 SPDK_ERRLOG("Clone cleanup error %d\n", bserrno);
    8023           0 :         }
    8024             : 
    8025          28 :         if (ctx->bserrno != 0) {
    8026          28 :                 assert(blob_lookup(ctx->snapshot->bs, ctx->snapshot->id) == NULL);
    8027          28 :                 RB_INSERT(spdk_blob_tree, &ctx->snapshot->bs->open_blobs, ctx->snapshot);
    8028          28 :                 spdk_bit_array_set(ctx->snapshot->bs->open_blobids, ctx->snapshot->id);
    8029          28 :         }
    8030             : 
    8031          28 :         ctx->snapshot->locked_operation_in_progress = false;
    8032          28 :         ctx->snapshot->md_ro = ctx->snapshot_md_ro;
    8033             : 
    8034          28 :         spdk_blob_close(ctx->snapshot, delete_blob_cleanup_finish, ctx);
    8035          28 : }
    8036             : 
    8037             : static void
    8038          15 : delete_snapshot_cleanup_clone(void *cb_arg, int bserrno)
    8039             : {
    8040          15 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8041             : 
    8042          15 :         ctx->clone->locked_operation_in_progress = false;
    8043          15 :         ctx->clone->md_ro = ctx->clone_md_ro;
    8044             : 
    8045          15 :         spdk_blob_close(ctx->clone, delete_snapshot_cleanup_snapshot, ctx);
    8046          15 : }
    8047             : 
    8048             : static void
    8049          60 : delete_snapshot_unfreeze_cpl(void *cb_arg, int bserrno)
    8050             : {
    8051          60 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8052             : 
    8053          60 :         if (bserrno) {
    8054           0 :                 ctx->bserrno = bserrno;
    8055           0 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8056           0 :                 return;
    8057             :         }
    8058             : 
    8059          60 :         ctx->clone->locked_operation_in_progress = false;
    8060          60 :         spdk_blob_close(ctx->clone, delete_blob_cleanup_finish, ctx);
    8061          60 : }
    8062             : 
    8063             : static void
    8064          65 : delete_snapshot_sync_snapshot_cpl(void *cb_arg, int bserrno)
    8065             : {
    8066          65 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8067          65 :         struct spdk_blob_list *parent_snapshot_entry = NULL;
    8068          65 :         struct spdk_blob_list *snapshot_entry = NULL;
    8069          65 :         struct spdk_blob_list *clone_entry = NULL;
    8070          65 :         struct spdk_blob_list *snapshot_clone_entry = NULL;
    8071             : 
    8072          65 :         if (bserrno) {
    8073           5 :                 SPDK_ERRLOG("Failed to sync MD on blob\n");
    8074           5 :                 ctx->bserrno = bserrno;
    8075           5 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8076           5 :                 return;
    8077             :         }
    8078             : 
    8079             :         /* Get snapshot entry for the snapshot we want to remove */
    8080          60 :         snapshot_entry = bs_get_snapshot_entry(ctx->snapshot->bs, ctx->snapshot->id);
    8081             : 
    8082          60 :         assert(snapshot_entry != NULL);
    8083             : 
    8084             :         /* Remove clone entry in this snapshot (at this point there can be only one clone) */
    8085          60 :         clone_entry = TAILQ_FIRST(&snapshot_entry->clones);
    8086          60 :         assert(clone_entry != NULL);
    8087          60 :         TAILQ_REMOVE(&snapshot_entry->clones, clone_entry, link);
    8088          60 :         snapshot_entry->clone_count--;
    8089          60 :         assert(TAILQ_EMPTY(&snapshot_entry->clones));
    8090             : 
    8091          60 :         switch (ctx->snapshot->parent_id) {
    8092             :         case SPDK_BLOBID_INVALID:
    8093             :         case SPDK_BLOBID_EXTERNAL_SNAPSHOT:
    8094             :                 /* No parent snapshot - just remove clone entry */
    8095          50 :                 free(clone_entry);
    8096          50 :                 break;
    8097             :         default:
    8098             :                 /* This snapshot is at the same time a clone of another snapshot - we need to
    8099             :                  * update parent snapshot (remove current clone, add new one inherited from
    8100             :                  * the snapshot that is being removed) */
    8101             : 
    8102             :                 /* Get snapshot entry for parent snapshot and clone entry within that snapshot for
    8103             :                  * snapshot that we are removing */
    8104          10 :                 blob_get_snapshot_and_clone_entries(ctx->snapshot, &parent_snapshot_entry,
    8105             :                                                     &snapshot_clone_entry);
    8106             : 
    8107             :                 /* Switch clone entry in parent snapshot */
    8108          10 :                 TAILQ_INSERT_TAIL(&parent_snapshot_entry->clones, clone_entry, link);
    8109          10 :                 TAILQ_REMOVE(&parent_snapshot_entry->clones, snapshot_clone_entry, link);
    8110          10 :                 free(snapshot_clone_entry);
    8111          10 :         }
    8112             : 
    8113             :         /* Restore md_ro flags */
    8114          60 :         ctx->clone->md_ro = ctx->clone_md_ro;
    8115          60 :         ctx->snapshot->md_ro = ctx->snapshot_md_ro;
    8116             : 
    8117          60 :         blob_unfreeze_io(ctx->clone, delete_snapshot_unfreeze_cpl, ctx);
    8118          65 : }
    8119             : 
    8120             : static void
    8121          70 : delete_snapshot_sync_clone_cpl(void *cb_arg, int bserrno)
    8122             : {
    8123          70 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8124             :         uint64_t i;
    8125             : 
    8126          70 :         ctx->snapshot->md_ro = false;
    8127             : 
    8128          70 :         if (bserrno) {
    8129           5 :                 SPDK_ERRLOG("Failed to sync MD on clone\n");
    8130           5 :                 ctx->bserrno = bserrno;
    8131             : 
    8132             :                 /* Restore snapshot to previous state */
    8133           5 :                 bserrno = blob_remove_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, true);
    8134           5 :                 if (bserrno != 0) {
    8135           0 :                         delete_snapshot_cleanup_clone(ctx, bserrno);
    8136           0 :                         return;
    8137             :                 }
    8138             : 
    8139           5 :                 spdk_blob_sync_md(ctx->snapshot, delete_snapshot_cleanup_clone, ctx);
    8140           5 :                 return;
    8141             :         }
    8142             : 
    8143             :         /* Clear cluster map entries for snapshot */
    8144         690 :         for (i = 0; i < ctx->snapshot->active.num_clusters && i < ctx->clone->active.num_clusters; i++) {
    8145         625 :                 if (ctx->clone->active.clusters[i] == ctx->snapshot->active.clusters[i]) {
    8146         615 :                         if (ctx->snapshot->active.clusters[i] != 0) {
    8147         410 :                                 ctx->snapshot->active.num_allocated_clusters--;
    8148         410 :                         }
    8149         615 :                         ctx->snapshot->active.clusters[i] = 0;
    8150         615 :                 }
    8151         625 :         }
    8152         143 :         for (i = 0; i < ctx->snapshot->active.num_extent_pages &&
    8153          78 :              i < ctx->clone->active.num_extent_pages; i++) {
    8154          39 :                 if (ctx->clone->active.extent_pages[i] == ctx->snapshot->active.extent_pages[i]) {
    8155          36 :                         ctx->snapshot->active.extent_pages[i] = 0;
    8156          36 :                 }
    8157          39 :         }
    8158             : 
    8159          65 :         blob_set_thin_provision(ctx->snapshot);
    8160          65 :         ctx->snapshot->state = SPDK_BLOB_STATE_DIRTY;
    8161             : 
    8162          65 :         if (ctx->parent_snapshot_entry != NULL) {
    8163          10 :                 ctx->snapshot->back_bs_dev = NULL;
    8164          10 :         }
    8165             : 
    8166          65 :         spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_cpl, ctx);
    8167          70 : }
    8168             : 
    8169             : static void
    8170          70 : delete_snapshot_update_extent_pages_cpl(struct delete_snapshot_ctx *ctx)
    8171             : {
    8172             :         int bserrno;
    8173             : 
    8174             :         /* Delete old backing bs_dev from clone (related to snapshot that will be removed) */
    8175          70 :         blob_back_bs_destroy(ctx->clone);
    8176             : 
    8177             :         /* Set/remove snapshot xattr and switch parent ID and backing bs_dev on clone... */
    8178          70 :         if (ctx->snapshot->parent_id == SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    8179          10 :                 bserrno = bs_snapshot_copy_xattr(ctx->clone, ctx->snapshot,
    8180             :                                                  BLOB_EXTERNAL_SNAPSHOT_ID);
    8181          10 :                 if (bserrno != 0) {
    8182           0 :                         ctx->bserrno = bserrno;
    8183             : 
    8184             :                         /* Restore snapshot to previous state */
    8185           0 :                         bserrno = blob_remove_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, true);
    8186           0 :                         if (bserrno != 0) {
    8187           0 :                                 delete_snapshot_cleanup_clone(ctx, bserrno);
    8188           0 :                                 return;
    8189             :                         }
    8190             : 
    8191           0 :                         spdk_blob_sync_md(ctx->snapshot, delete_snapshot_cleanup_clone, ctx);
    8192           0 :                         return;
    8193             :                 }
    8194          10 :                 ctx->clone->parent_id = SPDK_BLOBID_EXTERNAL_SNAPSHOT;
    8195          10 :                 ctx->clone->back_bs_dev = ctx->snapshot->back_bs_dev;
    8196             :                 /* Do not delete the external snapshot along with this snapshot */
    8197          10 :                 ctx->snapshot->back_bs_dev = NULL;
    8198          10 :                 ctx->clone->invalid_flags |= SPDK_BLOB_EXTERNAL_SNAPSHOT;
    8199          70 :         } else if (ctx->parent_snapshot_entry != NULL) {
    8200             :                 /* ...to parent snapshot */
    8201          10 :                 ctx->clone->parent_id = ctx->parent_snapshot_entry->id;
    8202          10 :                 ctx->clone->back_bs_dev = ctx->snapshot->back_bs_dev;
    8203          10 :                 blob_set_xattr(ctx->clone, BLOB_SNAPSHOT, &ctx->parent_snapshot_entry->id,
    8204             :                                sizeof(spdk_blob_id),
    8205             :                                true);
    8206          10 :         } else {
    8207             :                 /* ...to blobid invalid and zeroes dev */
    8208          50 :                 ctx->clone->parent_id = SPDK_BLOBID_INVALID;
    8209          50 :                 ctx->clone->back_bs_dev = bs_create_zeroes_dev();
    8210          50 :                 blob_remove_xattr(ctx->clone, BLOB_SNAPSHOT, true);
    8211             :         }
    8212             : 
    8213          70 :         spdk_blob_sync_md(ctx->clone, delete_snapshot_sync_clone_cpl, ctx);
    8214          70 : }
    8215             : 
    8216             : static void
    8217          73 : delete_snapshot_update_extent_pages(void *cb_arg, int bserrno)
    8218             : {
    8219          73 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8220             :         uint32_t *extent_page;
    8221             :         uint64_t i;
    8222             : 
    8223         154 :         for (i = ctx->next_extent_page; i < ctx->snapshot->active.num_extent_pages &&
    8224          81 :              i < ctx->clone->active.num_extent_pages; i++) {
    8225          42 :                 if (ctx->snapshot->active.extent_pages[i] == 0) {
    8226             :                         /* No extent page to use from snapshot */
    8227          12 :                         continue;
    8228             :                 }
    8229             : 
    8230          30 :                 extent_page = &ctx->clone->active.extent_pages[i];
    8231          30 :                 if (*extent_page == 0) {
    8232             :                         /* Copy extent page from snapshot when clone did not have a matching one */
    8233          27 :                         *extent_page = ctx->snapshot->active.extent_pages[i];
    8234          27 :                         continue;
    8235             :                 }
    8236             : 
    8237             :                 /* Clone and snapshot both contain partially filled matching extent pages.
    8238             :                  * Update the clone extent page in place with cluster map containing the mix of both. */
    8239           3 :                 ctx->next_extent_page = i + 1;
    8240           3 :                 memset(ctx->page, 0, SPDK_BS_PAGE_SIZE);
    8241             : 
    8242           6 :                 blob_write_extent_page(ctx->clone, *extent_page, i * SPDK_EXTENTS_PER_EP, ctx->page,
    8243           3 :                                        delete_snapshot_update_extent_pages, ctx);
    8244           3 :                 return;
    8245             :         }
    8246          70 :         delete_snapshot_update_extent_pages_cpl(ctx);
    8247          73 : }
    8248             : 
    8249             : static void
    8250          75 : delete_snapshot_sync_snapshot_xattr_cpl(void *cb_arg, int bserrno)
    8251             : {
    8252          75 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8253             :         uint64_t i;
    8254             : 
    8255             :         /* Temporarily override md_ro flag for clone for MD modification */
    8256          75 :         ctx->clone_md_ro = ctx->clone->md_ro;
    8257          75 :         ctx->clone->md_ro = false;
    8258             : 
    8259          75 :         if (bserrno) {
    8260           5 :                 SPDK_ERRLOG("Failed to sync MD with xattr on blob\n");
    8261           5 :                 ctx->bserrno = bserrno;
    8262           5 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8263           5 :                 return;
    8264             :         }
    8265             : 
    8266             :         /* Copy snapshot map to clone map (only unallocated clusters in clone) */
    8267         745 :         for (i = 0; i < ctx->snapshot->active.num_clusters && i < ctx->clone->active.num_clusters; i++) {
    8268         675 :                 if (ctx->clone->active.clusters[i] == 0) {
    8269         665 :                         ctx->clone->active.clusters[i] = ctx->snapshot->active.clusters[i];
    8270         665 :                         if (ctx->clone->active.clusters[i] != 0) {
    8271         460 :                                 ctx->clone->active.num_allocated_clusters++;
    8272         460 :                         }
    8273         665 :                 }
    8274         675 :         }
    8275          70 :         ctx->next_extent_page = 0;
    8276          70 :         delete_snapshot_update_extent_pages(ctx, 0);
    8277          75 : }
    8278             : 
    8279             : static void
    8280          10 : delete_snapshot_esnap_channels_destroyed_cb(void *cb_arg, struct spdk_blob *blob, int bserrno)
    8281             : {
    8282          10 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8283             : 
    8284          10 :         if (bserrno != 0) {
    8285           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to destroy esnap channels: %d\n",
    8286             :                             blob->id, bserrno);
    8287             :                 /* That error should not stop us from syncing metadata. */
    8288           0 :         }
    8289             : 
    8290          10 :         spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_xattr_cpl, ctx);
    8291          10 : }
    8292             : 
    8293             : static void
    8294          75 : delete_snapshot_freeze_io_cb(void *cb_arg, int bserrno)
    8295             : {
    8296          75 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8297             : 
    8298          75 :         if (bserrno) {
    8299           0 :                 SPDK_ERRLOG("Failed to freeze I/O on clone\n");
    8300           0 :                 ctx->bserrno = bserrno;
    8301           0 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8302           0 :                 return;
    8303             :         }
    8304             : 
    8305             :         /* Temporarily override md_ro flag for snapshot for MD modification */
    8306          75 :         ctx->snapshot_md_ro = ctx->snapshot->md_ro;
    8307          75 :         ctx->snapshot->md_ro = false;
    8308             : 
    8309             :         /* Mark blob as pending for removal for power failure safety, use clone id for recovery */
    8310          75 :         ctx->bserrno = blob_set_xattr(ctx->snapshot, SNAPSHOT_PENDING_REMOVAL, &ctx->clone->id,
    8311             :                                       sizeof(spdk_blob_id), true);
    8312          75 :         if (ctx->bserrno != 0) {
    8313           0 :                 delete_snapshot_cleanup_clone(ctx, 0);
    8314           0 :                 return;
    8315             :         }
    8316             : 
    8317          75 :         if (blob_is_esnap_clone(ctx->snapshot)) {
    8318          20 :                 blob_esnap_destroy_bs_dev_channels(ctx->snapshot, false,
    8319             :                                                    delete_snapshot_esnap_channels_destroyed_cb,
    8320          10 :                                                    ctx);
    8321          10 :                 return;
    8322             :         }
    8323             : 
    8324          65 :         spdk_blob_sync_md(ctx->snapshot, delete_snapshot_sync_snapshot_xattr_cpl, ctx);
    8325          75 : }
    8326             : 
    8327             : static void
    8328          88 : delete_snapshot_open_clone_cb(void *cb_arg, struct spdk_blob *clone, int bserrno)
    8329             : {
    8330          88 :         struct delete_snapshot_ctx *ctx = cb_arg;
    8331             : 
    8332          88 :         if (bserrno) {
    8333          13 :                 SPDK_ERRLOG("Failed to open clone\n");
    8334          13 :                 ctx->bserrno = bserrno;
    8335          13 :                 delete_snapshot_cleanup_snapshot(ctx, 0);
    8336          13 :                 return;
    8337             :         }
    8338             : 
    8339          75 :         ctx->clone = clone;
    8340             : 
    8341          75 :         if (clone->locked_operation_in_progress) {
    8342           0 :                 SPDK_DEBUGLOG(blob, "Cannot remove blob - another operation in progress on its clone\n");
    8343           0 :                 ctx->bserrno = -EBUSY;
    8344           0 :                 spdk_blob_close(ctx->clone, delete_snapshot_cleanup_snapshot, ctx);
    8345           0 :                 return;
    8346             :         }
    8347             : 
    8348          75 :         clone->locked_operation_in_progress = true;
    8349             : 
    8350          75 :         blob_freeze_io(clone, delete_snapshot_freeze_io_cb, ctx);
    8351          88 : }
    8352             : 
    8353             : static void
    8354          88 : update_clone_on_snapshot_deletion(struct spdk_blob *snapshot, struct delete_snapshot_ctx *ctx)
    8355             : {
    8356          88 :         struct spdk_blob_list *snapshot_entry = NULL;
    8357          88 :         struct spdk_blob_list *clone_entry = NULL;
    8358          88 :         struct spdk_blob_list *snapshot_clone_entry = NULL;
    8359             : 
    8360             :         /* Get snapshot entry for the snapshot we want to remove */
    8361          88 :         snapshot_entry = bs_get_snapshot_entry(snapshot->bs, snapshot->id);
    8362             : 
    8363          88 :         assert(snapshot_entry != NULL);
    8364             : 
    8365             :         /* Get clone of the snapshot (at this point there can be only one clone) */
    8366          88 :         clone_entry = TAILQ_FIRST(&snapshot_entry->clones);
    8367          88 :         assert(snapshot_entry->clone_count == 1);
    8368          88 :         assert(clone_entry != NULL);
    8369             : 
    8370             :         /* Get snapshot entry for parent snapshot and clone entry within that snapshot for
    8371             :          * snapshot that we are removing */
    8372          88 :         blob_get_snapshot_and_clone_entries(snapshot, &ctx->parent_snapshot_entry,
    8373             :                                             &snapshot_clone_entry);
    8374             : 
    8375          88 :         spdk_bs_open_blob(snapshot->bs, clone_entry->id, delete_snapshot_open_clone_cb, ctx);
    8376          88 : }
    8377             : 
    8378             : static void
    8379        1940 : bs_delete_blob_finish(void *cb_arg, struct spdk_blob *blob, int bserrno)
    8380             : {
    8381        1940 :         spdk_bs_sequence_t *seq = cb_arg;
    8382        1940 :         struct spdk_blob_list *snapshot_entry = NULL;
    8383             :         uint32_t page_num;
    8384             : 
    8385        1940 :         if (bserrno) {
    8386          78 :                 SPDK_ERRLOG("Failed to remove blob\n");
    8387          78 :                 bs_sequence_finish(seq, bserrno);
    8388          78 :                 return;
    8389             :         }
    8390             : 
    8391             :         /* Remove snapshot from the list */
    8392        1862 :         snapshot_entry = bs_get_snapshot_entry(blob->bs, blob->id);
    8393        1862 :         if (snapshot_entry != NULL) {
    8394         180 :                 TAILQ_REMOVE(&blob->bs->snapshots, snapshot_entry, link);
    8395         180 :                 free(snapshot_entry);
    8396         180 :         }
    8397             : 
    8398        1862 :         page_num = bs_blobid_to_page(blob->id);
    8399        1862 :         spdk_bit_array_clear(blob->bs->used_blobids, page_num);
    8400        1862 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    8401        1862 :         blob->active.num_pages = 0;
    8402        1862 :         blob_resize(blob, 0);
    8403             : 
    8404        1862 :         blob_persist(seq, blob, bs_delete_persist_cpl, blob);
    8405        1940 : }
    8406             : 
    8407             : static int
    8408        1940 : bs_is_blob_deletable(struct spdk_blob *blob, bool *update_clone)
    8409             : {
    8410        1940 :         struct spdk_blob_list *snapshot_entry = NULL;
    8411        1940 :         struct spdk_blob_list *clone_entry = NULL;
    8412        1940 :         struct spdk_blob *clone = NULL;
    8413        1940 :         bool has_one_clone = false;
    8414             : 
    8415             :         /* Check if this is a snapshot with clones */
    8416        1940 :         snapshot_entry = bs_get_snapshot_entry(blob->bs, blob->id);
    8417        1940 :         if (snapshot_entry != NULL) {
    8418         243 :                 if (snapshot_entry->clone_count > 1) {
    8419          30 :                         SPDK_ERRLOG("Cannot remove snapshot with more than one clone\n");
    8420          30 :                         return -EBUSY;
    8421         213 :                 } else if (snapshot_entry->clone_count == 1) {
    8422          88 :                         has_one_clone = true;
    8423          88 :                 }
    8424         213 :         }
    8425             : 
    8426             :         /* Check if someone has this blob open (besides this delete context):
    8427             :          * - open_ref = 1 - only this context opened blob, so it is ok to remove it
    8428             :          * - open_ref <= 2 && has_one_clone = true - clone is holding snapshot
    8429             :          *      and that is ok, because we will update it accordingly */
    8430        1910 :         if (blob->open_ref <= 2 && has_one_clone) {
    8431          88 :                 clone_entry = TAILQ_FIRST(&snapshot_entry->clones);
    8432          88 :                 assert(clone_entry != NULL);
    8433          88 :                 clone = blob_lookup(blob->bs, clone_entry->id);
    8434             : 
    8435          88 :                 if (blob->open_ref == 2 && clone == NULL) {
    8436             :                         /* Clone is closed and someone else opened this blob */
    8437           0 :                         SPDK_ERRLOG("Cannot remove snapshot because it is open\n");
    8438           0 :                         return -EBUSY;
    8439             :                 }
    8440             : 
    8441          88 :                 *update_clone = true;
    8442          88 :                 return 0;
    8443             :         }
    8444             : 
    8445        1822 :         if (blob->open_ref > 1) {
    8446          20 :                 SPDK_ERRLOG("Cannot remove snapshot because it is open\n");
    8447          20 :                 return -EBUSY;
    8448             :         }
    8449             : 
    8450        1802 :         assert(has_one_clone == false);
    8451        1802 :         *update_clone = false;
    8452        1802 :         return 0;
    8453        1940 : }
    8454             : 
    8455             : static void
    8456           0 : bs_delete_enomem_close_cpl(void *cb_arg, int bserrno)
    8457             : {
    8458           0 :         spdk_bs_sequence_t *seq = cb_arg;
    8459             : 
    8460           0 :         bs_sequence_finish(seq, -ENOMEM);
    8461           0 : }
    8462             : 
    8463             : static void
    8464        1953 : bs_delete_open_cpl(void *cb_arg, struct spdk_blob *blob, int bserrno)
    8465             : {
    8466        1953 :         spdk_bs_sequence_t *seq = cb_arg;
    8467             :         struct delete_snapshot_ctx *ctx;
    8468        1953 :         bool update_clone = false;
    8469             : 
    8470        1953 :         if (bserrno != 0) {
    8471          13 :                 bs_sequence_finish(seq, bserrno);
    8472          13 :                 return;
    8473             :         }
    8474             : 
    8475        1940 :         blob_verify_md_op(blob);
    8476             : 
    8477        1940 :         ctx = calloc(1, sizeof(*ctx));
    8478        1940 :         if (ctx == NULL) {
    8479           0 :                 spdk_blob_close(blob, bs_delete_enomem_close_cpl, seq);
    8480           0 :                 return;
    8481             :         }
    8482             : 
    8483        1940 :         ctx->snapshot = blob;
    8484        1940 :         ctx->cb_fn = bs_delete_blob_finish;
    8485        1940 :         ctx->cb_arg = seq;
    8486             : 
    8487             :         /* Check if blob can be removed and if it is a snapshot with clone on top of it */
    8488        1940 :         ctx->bserrno = bs_is_blob_deletable(blob, &update_clone);
    8489        1940 :         if (ctx->bserrno) {
    8490          50 :                 spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
    8491          50 :                 return;
    8492             :         }
    8493             : 
    8494        1890 :         if (blob->locked_operation_in_progress) {
    8495           0 :                 SPDK_DEBUGLOG(blob, "Cannot remove blob - another operation in progress\n");
    8496           0 :                 ctx->bserrno = -EBUSY;
    8497           0 :                 spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
    8498           0 :                 return;
    8499             :         }
    8500             : 
    8501        1890 :         blob->locked_operation_in_progress = true;
    8502             : 
    8503             :         /*
    8504             :          * Remove the blob from the blob_store list now, to ensure it does not
    8505             :          *  get returned after this point by blob_lookup().
    8506             :          */
    8507        1890 :         spdk_bit_array_clear(blob->bs->open_blobids, blob->id);
    8508        1890 :         RB_REMOVE(spdk_blob_tree, &blob->bs->open_blobs, blob);
    8509             : 
    8510        1890 :         if (update_clone) {
    8511          88 :                 ctx->page = spdk_zmalloc(blob->bs->md_page_size, 0, NULL, SPDK_ENV_NUMA_ID_ANY,
    8512             :                                          SPDK_MALLOC_DMA);
    8513          88 :                 if (!ctx->page) {
    8514           0 :                         ctx->bserrno = -ENOMEM;
    8515           0 :                         spdk_blob_close(blob, delete_blob_cleanup_finish, ctx);
    8516           0 :                         return;
    8517             :                 }
    8518             :                 /* This blob is a snapshot with active clone - update clone first */
    8519          88 :                 update_clone_on_snapshot_deletion(blob, ctx);
    8520          88 :         } else {
    8521             :                 /* This blob does not have any clones - just remove it */
    8522        1802 :                 bs_blob_list_remove(blob);
    8523        1802 :                 bs_delete_blob_finish(seq, blob, 0);
    8524        1802 :                 free(ctx);
    8525             :         }
    8526        1953 : }
    8527             : 
    8528             : void
    8529        1953 : spdk_bs_delete_blob(struct spdk_blob_store *bs, spdk_blob_id blobid,
    8530             :                     spdk_blob_op_complete cb_fn, void *cb_arg)
    8531             : {
    8532             :         struct spdk_bs_cpl      cpl;
    8533             :         spdk_bs_sequence_t      *seq;
    8534             : 
    8535        1953 :         SPDK_DEBUGLOG(blob, "Deleting blob 0x%" PRIx64 "\n", blobid);
    8536             : 
    8537        1953 :         assert(spdk_get_thread() == bs->md_thread);
    8538             : 
    8539        1953 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    8540        1953 :         cpl.u.blob_basic.cb_fn = cb_fn;
    8541        1953 :         cpl.u.blob_basic.cb_arg = cb_arg;
    8542             : 
    8543        1953 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    8544        1953 :         if (!seq) {
    8545           0 :                 cb_fn(cb_arg, -ENOMEM);
    8546           0 :                 return;
    8547             :         }
    8548             : 
    8549        1953 :         spdk_bs_open_blob(bs, blobid, bs_delete_open_cpl, seq);
    8550        1953 : }
    8551             : 
    8552             : /* END spdk_bs_delete_blob */
    8553             : 
    8554             : /* START spdk_bs_open_blob */
    8555             : 
    8556             : static void
    8557        4341 : bs_open_blob_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8558             : {
    8559        4341 :         struct spdk_blob *blob = cb_arg;
    8560             :         struct spdk_blob *existing;
    8561             : 
    8562        4341 :         if (bserrno != 0) {
    8563          82 :                 blob_free(blob);
    8564          82 :                 seq->cpl.u.blob_handle.blob = NULL;
    8565          82 :                 bs_sequence_finish(seq, bserrno);
    8566          82 :                 return;
    8567             :         }
    8568             : 
    8569        4259 :         existing = blob_lookup(blob->bs, blob->id);
    8570        4259 :         if (existing) {
    8571           5 :                 blob_free(blob);
    8572           5 :                 existing->open_ref++;
    8573           5 :                 seq->cpl.u.blob_handle.blob = existing;
    8574           5 :                 bs_sequence_finish(seq, 0);
    8575           5 :                 return;
    8576             :         }
    8577             : 
    8578        4254 :         blob->open_ref++;
    8579             : 
    8580        4254 :         spdk_bit_array_set(blob->bs->open_blobids, blob->id);
    8581        4254 :         RB_INSERT(spdk_blob_tree, &blob->bs->open_blobs, blob);
    8582             : 
    8583        4254 :         bs_sequence_finish(seq, bserrno);
    8584        4341 : }
    8585             : 
    8586             : static inline void
    8587           5 : blob_open_opts_copy(const struct spdk_blob_open_opts *src, struct spdk_blob_open_opts *dst)
    8588             : {
    8589             : #define FIELD_OK(field) \
    8590             :         offsetof(struct spdk_blob_open_opts, field) + sizeof(src->field) <= src->opts_size
    8591             : 
    8592             : #define SET_FIELD(field) \
    8593             :         if (FIELD_OK(field)) { \
    8594             :                 dst->field = src->field; \
    8595             :         } \
    8596             : 
    8597           5 :         SET_FIELD(clear_method);
    8598           5 :         SET_FIELD(esnap_ctx);
    8599             : 
    8600           5 :         dst->opts_size = src->opts_size;
    8601             : 
    8602             :         /* You should not remove this statement, but need to update the assert statement
    8603             :          * if you add a new field, and also add a corresponding SET_FIELD statement */
    8604             :         SPDK_STATIC_ASSERT(sizeof(struct spdk_blob_open_opts) == 24, "Incorrect size");
    8605             : 
    8606             : #undef FIELD_OK
    8607             : #undef SET_FIELD
    8608           5 : }
    8609             : 
    8610             : static void
    8611        5353 : bs_open_blob(struct spdk_blob_store *bs,
    8612             :              spdk_blob_id blobid,
    8613             :              struct spdk_blob_open_opts *opts,
    8614             :              spdk_blob_op_with_handle_complete cb_fn,
    8615             :              void *cb_arg)
    8616             : {
    8617             :         struct spdk_blob                *blob;
    8618             :         struct spdk_bs_cpl              cpl;
    8619             :         struct spdk_blob_open_opts      opts_local;
    8620             :         spdk_bs_sequence_t              *seq;
    8621             :         uint32_t                        page_num;
    8622             : 
    8623        5353 :         SPDK_DEBUGLOG(blob, "Opening blob 0x%" PRIx64 "\n", blobid);
    8624        5353 :         assert(spdk_get_thread() == bs->md_thread);
    8625             : 
    8626        5353 :         page_num = bs_blobid_to_page(blobid);
    8627        5353 :         if (spdk_bit_array_get(bs->used_blobids, page_num) == false) {
    8628             :                 /* Invalid blobid */
    8629          60 :                 cb_fn(cb_arg, NULL, -ENOENT);
    8630          60 :                 return;
    8631             :         }
    8632             : 
    8633        5293 :         blob = blob_lookup(bs, blobid);
    8634        5293 :         if (blob) {
    8635         952 :                 blob->open_ref++;
    8636         952 :                 cb_fn(cb_arg, blob, 0);
    8637         952 :                 return;
    8638             :         }
    8639             : 
    8640        4341 :         blob = blob_alloc(bs, blobid);
    8641        4341 :         if (!blob) {
    8642           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    8643           0 :                 return;
    8644             :         }
    8645             : 
    8646        4341 :         spdk_blob_open_opts_init(&opts_local, sizeof(opts_local));
    8647        4341 :         if (opts) {
    8648           5 :                 blob_open_opts_copy(opts, &opts_local);
    8649           5 :         }
    8650             : 
    8651        4341 :         blob->clear_method = opts_local.clear_method;
    8652             : 
    8653        4341 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_HANDLE;
    8654        4341 :         cpl.u.blob_handle.cb_fn = cb_fn;
    8655        4341 :         cpl.u.blob_handle.cb_arg = cb_arg;
    8656        4341 :         cpl.u.blob_handle.blob = blob;
    8657        4341 :         cpl.u.blob_handle.esnap_ctx = opts_local.esnap_ctx;
    8658             : 
    8659        4341 :         seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    8660        4341 :         if (!seq) {
    8661           0 :                 blob_free(blob);
    8662           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    8663           0 :                 return;
    8664             :         }
    8665             : 
    8666        4341 :         blob_load(seq, blob, bs_open_blob_cpl, blob);
    8667        5353 : }
    8668             : 
    8669             : void
    8670        5348 : spdk_bs_open_blob(struct spdk_blob_store *bs, spdk_blob_id blobid,
    8671             :                   spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
    8672             : {
    8673        5348 :         bs_open_blob(bs, blobid, NULL, cb_fn, cb_arg);
    8674        5348 : }
    8675             : 
    8676             : void
    8677           5 : spdk_bs_open_blob_ext(struct spdk_blob_store *bs, spdk_blob_id blobid,
    8678             :                       struct spdk_blob_open_opts *opts, spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
    8679             : {
    8680           5 :         bs_open_blob(bs, blobid, opts, cb_fn, cb_arg);
    8681           5 : }
    8682             : 
    8683             : /* END spdk_bs_open_blob */
    8684             : 
    8685             : /* START spdk_blob_set_read_only */
    8686             : int
    8687         296 : spdk_blob_set_read_only(struct spdk_blob *blob)
    8688             : {
    8689         296 :         blob_verify_md_op(blob);
    8690             : 
    8691         296 :         blob->data_ro_flags |= SPDK_BLOB_READ_ONLY;
    8692             : 
    8693         296 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    8694         296 :         return 0;
    8695             : }
    8696             : /* END spdk_blob_set_read_only */
    8697             : 
    8698             : /* START spdk_blob_sync_md */
    8699             : 
    8700             : static void
    8701        1927 : blob_sync_md_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8702             : {
    8703        1927 :         struct spdk_blob *blob = cb_arg;
    8704             : 
    8705        1927 :         if (bserrno == 0 && (blob->data_ro_flags & SPDK_BLOB_READ_ONLY)) {
    8706         497 :                 blob->data_ro = true;
    8707         497 :                 blob->md_ro = true;
    8708         497 :         }
    8709             : 
    8710        1927 :         bs_sequence_finish(seq, bserrno);
    8711        1927 : }
    8712             : 
    8713             : static void
    8714        1927 : blob_sync_md(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
    8715             : {
    8716             :         struct spdk_bs_cpl      cpl;
    8717             :         spdk_bs_sequence_t      *seq;
    8718             : 
    8719        1927 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    8720        1927 :         cpl.u.blob_basic.cb_fn = cb_fn;
    8721        1927 :         cpl.u.blob_basic.cb_arg = cb_arg;
    8722             : 
    8723        1927 :         seq = bs_sequence_start_bs(blob->bs->md_channel, &cpl);
    8724        1927 :         if (!seq) {
    8725           0 :                 cb_fn(cb_arg, -ENOMEM);
    8726           0 :                 return;
    8727             :         }
    8728             : 
    8729        1927 :         blob_persist(seq, blob, blob_sync_md_cpl, blob);
    8730        1927 : }
    8731             : 
    8732             : void
    8733        1370 : spdk_blob_sync_md(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
    8734             : {
    8735        1370 :         blob_verify_md_op(blob);
    8736             : 
    8737        1370 :         SPDK_DEBUGLOG(blob, "Syncing blob 0x%" PRIx64 "\n", blob->id);
    8738             : 
    8739        1370 :         if (blob->md_ro) {
    8740           5 :                 assert(blob->state == SPDK_BLOB_STATE_CLEAN);
    8741           5 :                 cb_fn(cb_arg, 0);
    8742           5 :                 return;
    8743             :         }
    8744             : 
    8745        1365 :         blob_sync_md(blob, cb_fn, cb_arg);
    8746        1370 : }
    8747             : 
    8748             : /* END spdk_blob_sync_md */
    8749             : 
    8750             : struct spdk_blob_cluster_op_ctx {
    8751             :         struct spdk_thread      *thread;
    8752             :         struct spdk_blob        *blob;
    8753             :         uint32_t                cluster_num;    /* cluster index in blob */
    8754             :         uint32_t                cluster;        /* cluster on disk */
    8755             :         uint32_t                extent_page;    /* extent page on disk */
    8756             :         struct spdk_blob_md_page *page; /* preallocated extent page */
    8757             :         int                     rc;
    8758             :         spdk_blob_op_complete   cb_fn;
    8759             :         void                    *cb_arg;
    8760             : };
    8761             : 
    8762             : static void
    8763        1105 : blob_op_cluster_msg_cpl(void *arg)
    8764             : {
    8765        1105 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8766             : 
    8767        1105 :         ctx->cb_fn(ctx->cb_arg, ctx->rc);
    8768        1105 :         free(ctx);
    8769        1105 : }
    8770             : 
    8771             : static void
    8772        1061 : blob_op_cluster_msg_cb(void *arg, int bserrno)
    8773             : {
    8774        1061 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8775             : 
    8776        1061 :         ctx->rc = bserrno;
    8777        1061 :         spdk_thread_send_msg(ctx->thread, blob_op_cluster_msg_cpl, ctx);
    8778        1061 : }
    8779             : 
    8780             : static void
    8781         126 : blob_insert_new_ep_cb(void *arg, int bserrno)
    8782             : {
    8783         126 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8784             :         uint32_t *extent_page;
    8785             : 
    8786         126 :         extent_page = bs_cluster_to_extent_page(ctx->blob, ctx->cluster_num);
    8787         126 :         *extent_page = ctx->extent_page;
    8788         126 :         ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    8789         126 :         blob_sync_md(ctx->blob, blob_op_cluster_msg_cb, ctx);
    8790         126 : }
    8791             : 
    8792             : struct spdk_blob_write_extent_page_ctx {
    8793             :         struct spdk_blob_store          *bs;
    8794             : 
    8795             :         uint32_t                        extent;
    8796             :         struct spdk_blob_md_page        *page;
    8797             : };
    8798             : 
    8799             : static void
    8800          39 : blob_free_cluster_msg_cb(void *arg, int bserrno)
    8801             : {
    8802          39 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8803             : 
    8804          39 :         spdk_spin_lock(&ctx->blob->bs->used_lock);
    8805          39 :         bs_release_cluster(ctx->blob->bs, ctx->cluster);
    8806          39 :         spdk_spin_unlock(&ctx->blob->bs->used_lock);
    8807             : 
    8808          39 :         ctx->rc = bserrno;
    8809          39 :         spdk_thread_send_msg(ctx->thread, blob_op_cluster_msg_cpl, ctx);
    8810          39 : }
    8811             : 
    8812             : static void
    8813          39 : blob_free_cluster_update_ep_cb(void *arg, int bserrno)
    8814             : {
    8815          39 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8816             : 
    8817          39 :         if (bserrno != 0 || ctx->blob->bs->clean == 0) {
    8818          39 :                 blob_free_cluster_msg_cb(ctx, bserrno);
    8819          39 :                 return;
    8820             :         }
    8821             : 
    8822           0 :         ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    8823           0 :         blob_sync_md(ctx->blob, blob_free_cluster_msg_cb, ctx);
    8824          39 : }
    8825             : 
    8826             : static void
    8827           0 : blob_free_cluster_free_ep_cb(void *arg, int bserrno)
    8828             : {
    8829           0 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8830             : 
    8831           0 :         spdk_spin_lock(&ctx->blob->bs->used_lock);
    8832           0 :         assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
    8833           0 :         bs_release_md_page(ctx->blob->bs, ctx->extent_page);
    8834           0 :         spdk_spin_unlock(&ctx->blob->bs->used_lock);
    8835           0 :         ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    8836           0 :         blob_sync_md(ctx->blob, blob_free_cluster_msg_cb, ctx);
    8837           0 : }
    8838             : 
    8839             : static void
    8840         657 : blob_persist_extent_page_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8841             : {
    8842         657 :         struct spdk_blob_write_extent_page_ctx *ctx = cb_arg;
    8843             : 
    8844         657 :         free(ctx);
    8845         657 :         bs_sequence_finish(seq, bserrno);
    8846         657 : }
    8847             : 
    8848             : static void
    8849         657 : blob_write_extent_page_ready(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    8850             : {
    8851         657 :         struct spdk_blob_write_extent_page_ctx *ctx = cb_arg;
    8852             : 
    8853         657 :         if (bserrno != 0) {
    8854           0 :                 blob_persist_extent_page_cpl(seq, ctx, bserrno);
    8855           0 :                 return;
    8856             :         }
    8857        1314 :         bs_sequence_write_dev(seq, ctx->page, bs_md_page_to_lba(ctx->bs, ctx->extent),
    8858         657 :                               bs_byte_to_lba(ctx->bs, ctx->bs->md_page_size),
    8859         657 :                               blob_persist_extent_page_cpl, ctx);
    8860         657 : }
    8861             : 
    8862             : static void
    8863         657 : blob_write_extent_page(struct spdk_blob *blob, uint32_t extent, uint64_t cluster_num,
    8864             :                        struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg)
    8865             : {
    8866             :         struct spdk_blob_write_extent_page_ctx  *ctx;
    8867             :         spdk_bs_sequence_t                      *seq;
    8868             :         struct spdk_bs_cpl                      cpl;
    8869             : 
    8870         657 :         ctx = calloc(1, sizeof(*ctx));
    8871         657 :         if (!ctx) {
    8872           0 :                 cb_fn(cb_arg, -ENOMEM);
    8873           0 :                 return;
    8874             :         }
    8875         657 :         ctx->bs = blob->bs;
    8876         657 :         ctx->extent = extent;
    8877         657 :         ctx->page = page;
    8878             : 
    8879         657 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    8880         657 :         cpl.u.blob_basic.cb_fn = cb_fn;
    8881         657 :         cpl.u.blob_basic.cb_arg = cb_arg;
    8882             : 
    8883         657 :         seq = bs_sequence_start_bs(blob->bs->md_channel, &cpl);
    8884         657 :         if (!seq) {
    8885           0 :                 free(ctx);
    8886           0 :                 cb_fn(cb_arg, -ENOMEM);
    8887           0 :                 return;
    8888             :         }
    8889             : 
    8890         657 :         assert(page);
    8891         657 :         page->next = SPDK_INVALID_MD_PAGE;
    8892         657 :         page->id = blob->id;
    8893         657 :         page->sequence_num = 0;
    8894             : 
    8895         657 :         blob_serialize_extent_page(blob, cluster_num, page);
    8896             : 
    8897         657 :         page->crc = blob_md_page_calc_crc(page);
    8898             : 
    8899         657 :         assert(spdk_bit_array_get(blob->bs->used_md_pages, extent) == true);
    8900             : 
    8901         657 :         bs_mark_dirty(seq, blob->bs, blob_write_extent_page_ready, ctx);
    8902         657 : }
    8903             : 
    8904             : static void
    8905        1030 : blob_insert_cluster_msg(void *arg)
    8906             : {
    8907        1030 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8908             :         uint32_t *extent_page;
    8909             : 
    8910        1030 :         ctx->rc = blob_insert_cluster(ctx->blob, ctx->cluster_num, ctx->cluster);
    8911        1030 :         if (ctx->rc != 0) {
    8912           5 :                 spdk_thread_send_msg(ctx->thread, blob_op_cluster_msg_cpl, ctx);
    8913           5 :                 return;
    8914             :         }
    8915             : 
    8916        1025 :         if (ctx->blob->use_extent_table == false) {
    8917             :                 /* Extent table is not used, proceed with sync of md that will only use extents_rle. */
    8918         410 :                 ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    8919         410 :                 blob_sync_md(ctx->blob, blob_op_cluster_msg_cb, ctx);
    8920         410 :                 return;
    8921             :         }
    8922             : 
    8923         615 :         extent_page = bs_cluster_to_extent_page(ctx->blob, ctx->cluster_num);
    8924         615 :         if (*extent_page == 0) {
    8925             :                 /* Extent page requires allocation.
    8926             :                  * It was already claimed in the used_md_pages map and placed in ctx. */
    8927         126 :                 assert(ctx->extent_page != 0);
    8928         126 :                 assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
    8929         252 :                 blob_write_extent_page(ctx->blob, ctx->extent_page, ctx->cluster_num, ctx->page,
    8930         126 :                                        blob_insert_new_ep_cb, ctx);
    8931         126 :         } else {
    8932             :                 /* It is possible for original thread to allocate extent page for
    8933             :                  * different cluster in the same extent page. In such case proceed with
    8934             :                  * updating the existing extent page, but release the additional one. */
    8935         489 :                 if (ctx->extent_page != 0) {
    8936           0 :                         spdk_spin_lock(&ctx->blob->bs->used_lock);
    8937           0 :                         assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
    8938           0 :                         bs_release_md_page(ctx->blob->bs, ctx->extent_page);
    8939           0 :                         spdk_spin_unlock(&ctx->blob->bs->used_lock);
    8940           0 :                         ctx->extent_page = 0;
    8941           0 :                 }
    8942             :                 /* Extent page already allocated.
    8943             :                  * Every cluster allocation, requires just an update of single extent page. */
    8944         978 :                 blob_write_extent_page(ctx->blob, *extent_page, ctx->cluster_num, ctx->page,
    8945         489 :                                        blob_op_cluster_msg_cb, ctx);
    8946             :         }
    8947        1030 : }
    8948             : 
    8949             : static void
    8950        1030 : blob_insert_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num,
    8951             :                                  uint64_t cluster, uint32_t extent_page, struct spdk_blob_md_page *page,
    8952             :                                  spdk_blob_op_complete cb_fn, void *cb_arg)
    8953             : {
    8954             :         struct spdk_blob_cluster_op_ctx *ctx;
    8955             : 
    8956        1030 :         ctx = calloc(1, sizeof(*ctx));
    8957        1030 :         if (ctx == NULL) {
    8958           0 :                 cb_fn(cb_arg, -ENOMEM);
    8959           0 :                 return;
    8960             :         }
    8961             : 
    8962        1030 :         ctx->thread = spdk_get_thread();
    8963        1030 :         ctx->blob = blob;
    8964        1030 :         ctx->cluster_num = cluster_num;
    8965        1030 :         ctx->cluster = cluster;
    8966        1030 :         ctx->extent_page = extent_page;
    8967        1030 :         ctx->page = page;
    8968        1030 :         ctx->cb_fn = cb_fn;
    8969        1030 :         ctx->cb_arg = cb_arg;
    8970             : 
    8971        1030 :         spdk_thread_send_msg(blob->bs->md_thread, blob_insert_cluster_msg, ctx);
    8972        1030 : }
    8973             : 
    8974             : static void
    8975          75 : blob_free_cluster_msg(void *arg)
    8976             : {
    8977          75 :         struct spdk_blob_cluster_op_ctx *ctx = arg;
    8978             :         uint32_t *extent_page;
    8979             :         uint32_t start_cluster_idx;
    8980          75 :         bool free_extent_page = true;
    8981             :         size_t i;
    8982             : 
    8983          75 :         ctx->cluster = bs_lba_to_cluster(ctx->blob->bs, ctx->blob->active.clusters[ctx->cluster_num]);
    8984             : 
    8985             :         /* There were concurrent unmaps to the same cluster, only release the cluster on the first one */
    8986          75 :         if (ctx->cluster == 0) {
    8987          10 :                 blob_op_cluster_msg_cb(ctx, 0);
    8988          10 :                 return;
    8989             :         }
    8990             : 
    8991          65 :         ctx->blob->active.clusters[ctx->cluster_num] = 0;
    8992          65 :         if (ctx->cluster != 0) {
    8993          65 :                 ctx->blob->active.num_allocated_clusters--;
    8994          65 :         }
    8995             : 
    8996          65 :         if (ctx->blob->use_extent_table == false) {
    8997             :                 /* Extent table is not used, proceed with sync of md that will only use extents_rle. */
    8998          26 :                 spdk_spin_lock(&ctx->blob->bs->used_lock);
    8999          26 :                 bs_release_cluster(ctx->blob->bs, ctx->cluster);
    9000          26 :                 spdk_spin_unlock(&ctx->blob->bs->used_lock);
    9001          26 :                 ctx->blob->state = SPDK_BLOB_STATE_DIRTY;
    9002          26 :                 blob_sync_md(ctx->blob, blob_op_cluster_msg_cb, ctx);
    9003          26 :                 return;
    9004             :         }
    9005             : 
    9006          39 :         extent_page = bs_cluster_to_extent_page(ctx->blob, ctx->cluster_num);
    9007             : 
    9008             :         /* There shouldn't be parallel release operations on same cluster */
    9009          39 :         assert(*extent_page == ctx->extent_page);
    9010             : 
    9011          39 :         start_cluster_idx = (ctx->cluster_num / SPDK_EXTENTS_PER_EP) * SPDK_EXTENTS_PER_EP;
    9012          72 :         for (i = 0; i < SPDK_EXTENTS_PER_EP; ++i) {
    9013          72 :                 if (ctx->blob->active.clusters[start_cluster_idx + i] != 0) {
    9014          39 :                         free_extent_page = false;
    9015          39 :                         break;
    9016             :                 }
    9017          33 :         }
    9018             : 
    9019          39 :         if (free_extent_page) {
    9020           0 :                 assert(ctx->extent_page != 0);
    9021           0 :                 assert(spdk_bit_array_get(ctx->blob->bs->used_md_pages, ctx->extent_page) == true);
    9022           0 :                 ctx->blob->active.extent_pages[bs_cluster_to_extent_table_id(ctx->cluster_num)] = 0;
    9023           0 :                 blob_write_extent_page(ctx->blob, ctx->extent_page, ctx->cluster_num, ctx->page,
    9024           0 :                                        blob_free_cluster_free_ep_cb, ctx);
    9025           0 :         } else {
    9026          78 :                 blob_write_extent_page(ctx->blob, *extent_page, ctx->cluster_num, ctx->page,
    9027          39 :                                        blob_free_cluster_update_ep_cb, ctx);
    9028             :         }
    9029          75 : }
    9030             : 
    9031             : 
    9032             : static void
    9033          75 : blob_free_cluster_on_md_thread(struct spdk_blob *blob, uint32_t cluster_num, uint32_t extent_page,
    9034             :                                struct spdk_blob_md_page *page, spdk_blob_op_complete cb_fn, void *cb_arg)
    9035             : {
    9036             :         struct spdk_blob_cluster_op_ctx *ctx;
    9037             : 
    9038          75 :         ctx = calloc(1, sizeof(*ctx));
    9039          75 :         if (ctx == NULL) {
    9040           0 :                 cb_fn(cb_arg, -ENOMEM);
    9041           0 :                 return;
    9042             :         }
    9043             : 
    9044          75 :         ctx->thread = spdk_get_thread();
    9045          75 :         ctx->blob = blob;
    9046          75 :         ctx->cluster_num = cluster_num;
    9047          75 :         ctx->extent_page = extent_page;
    9048          75 :         ctx->page = page;
    9049          75 :         ctx->cb_fn = cb_fn;
    9050          75 :         ctx->cb_arg = cb_arg;
    9051             : 
    9052          75 :         spdk_thread_send_msg(blob->bs->md_thread, blob_free_cluster_msg, ctx);
    9053          75 : }
    9054             : 
    9055             : /* START spdk_blob_close */
    9056             : 
    9057             : static void
    9058        5211 : blob_close_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9059             : {
    9060        5211 :         struct spdk_blob *blob = cb_arg;
    9061             : 
    9062        5211 :         if (bserrno == 0) {
    9063        5211 :                 blob->open_ref--;
    9064        5211 :                 if (blob->open_ref == 0) {
    9065             :                         /*
    9066             :                          * Blobs with active.num_pages == 0 are deleted blobs.
    9067             :                          *  these blobs are removed from the blob_store list
    9068             :                          *  when the deletion process starts - so don't try to
    9069             :                          *  remove them again.
    9070             :                          */
    9071        4254 :                         if (blob->active.num_pages > 0) {
    9072        2392 :                                 spdk_bit_array_clear(blob->bs->open_blobids, blob->id);
    9073        2392 :                                 RB_REMOVE(spdk_blob_tree, &blob->bs->open_blobs, blob);
    9074        2392 :                         }
    9075        4254 :                         blob_free(blob);
    9076        4254 :                 }
    9077        5211 :         }
    9078             : 
    9079        5211 :         bs_sequence_finish(seq, bserrno);
    9080        5211 : }
    9081             : 
    9082             : static void
    9083         140 : blob_close_esnap_done(void *cb_arg, struct spdk_blob *blob, int bserrno)
    9084             : {
    9085         140 :         spdk_bs_sequence_t      *seq = cb_arg;
    9086             : 
    9087         140 :         if (bserrno != 0) {
    9088           0 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": close failed with error %d\n",
    9089             :                               blob->id, bserrno);
    9090           0 :                 bs_sequence_finish(seq, bserrno);
    9091           0 :                 return;
    9092             :         }
    9093             : 
    9094         140 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": closed, syncing metadata on thread %s\n",
    9095             :                       blob->id, spdk_thread_get_name(spdk_get_thread()));
    9096             : 
    9097             :         /* Sync metadata */
    9098         140 :         blob_persist(seq, blob, blob_close_cpl, blob);
    9099         140 : }
    9100             : 
    9101             : void
    9102        5211 : spdk_blob_close(struct spdk_blob *blob, spdk_blob_op_complete cb_fn, void *cb_arg)
    9103             : {
    9104             :         struct spdk_bs_cpl      cpl;
    9105             :         spdk_bs_sequence_t      *seq;
    9106             : 
    9107        5211 :         blob_verify_md_op(blob);
    9108             : 
    9109        5211 :         SPDK_DEBUGLOG(blob, "Closing blob 0x%" PRIx64 "\n", blob->id);
    9110             : 
    9111        5211 :         if (blob->open_ref == 0) {
    9112           0 :                 cb_fn(cb_arg, -EBADF);
    9113           0 :                 return;
    9114             :         }
    9115             : 
    9116        5211 :         cpl.type = SPDK_BS_CPL_TYPE_BLOB_BASIC;
    9117        5211 :         cpl.u.blob_basic.cb_fn = cb_fn;
    9118        5211 :         cpl.u.blob_basic.cb_arg = cb_arg;
    9119             : 
    9120        5211 :         seq = bs_sequence_start_bs(blob->bs->md_channel, &cpl);
    9121        5211 :         if (!seq) {
    9122           0 :                 cb_fn(cb_arg, -ENOMEM);
    9123           0 :                 return;
    9124             :         }
    9125             : 
    9126        5211 :         if (blob->open_ref == 1 && blob_is_esnap_clone(blob)) {
    9127         140 :                 blob_esnap_destroy_bs_dev_channels(blob, false, blob_close_esnap_done, seq);
    9128         140 :                 return;
    9129             :         }
    9130             : 
    9131             :         /* Sync metadata */
    9132        5071 :         blob_persist(seq, blob, blob_close_cpl, blob);
    9133        5211 : }
    9134             : 
    9135             : /* END spdk_blob_close */
    9136             : 
    9137         276 : struct spdk_io_channel *spdk_bs_alloc_io_channel(struct spdk_blob_store *bs)
    9138             : {
    9139         276 :         return spdk_get_io_channel(bs);
    9140             : }
    9141             : 
    9142             : void
    9143         276 : spdk_bs_free_io_channel(struct spdk_io_channel *channel)
    9144             : {
    9145         276 :         blob_esnap_destroy_bs_channel(spdk_io_channel_get_ctx(channel));
    9146         276 :         spdk_put_io_channel(channel);
    9147         276 : }
    9148             : 
    9149             : void
    9150         140 : spdk_blob_io_unmap(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9151             :                    uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg)
    9152             : {
    9153         140 :         blob_request_submit_op(blob, channel, NULL, offset, length, cb_fn, cb_arg,
    9154             :                                SPDK_BLOB_UNMAP);
    9155         140 : }
    9156             : 
    9157             : void
    9158          60 : spdk_blob_io_write_zeroes(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9159             :                           uint64_t offset, uint64_t length, spdk_blob_op_complete cb_fn, void *cb_arg)
    9160             : {
    9161          60 :         blob_request_submit_op(blob, channel, NULL, offset, length, cb_fn, cb_arg,
    9162             :                                SPDK_BLOB_WRITE_ZEROES);
    9163          60 : }
    9164             : 
    9165             : void
    9166       27349 : spdk_blob_io_write(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9167             :                    void *payload, uint64_t offset, uint64_t length,
    9168             :                    spdk_blob_op_complete cb_fn, void *cb_arg)
    9169             : {
    9170       27349 :         blob_request_submit_op(blob, channel, payload, offset, length, cb_fn, cb_arg,
    9171             :                                SPDK_BLOB_WRITE);
    9172       27349 : }
    9173             : 
    9174             : void
    9175       26143 : spdk_blob_io_read(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9176             :                   void *payload, uint64_t offset, uint64_t length,
    9177             :                   spdk_blob_op_complete cb_fn, void *cb_arg)
    9178             : {
    9179       26143 :         blob_request_submit_op(blob, channel, payload, offset, length, cb_fn, cb_arg,
    9180             :                                SPDK_BLOB_READ);
    9181       26143 : }
    9182             : 
    9183             : void
    9184         175 : spdk_blob_io_writev(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9185             :                     struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    9186             :                     spdk_blob_op_complete cb_fn, void *cb_arg)
    9187             : {
    9188         175 :         blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, false, NULL);
    9189         175 : }
    9190             : 
    9191             : void
    9192        1655 : spdk_blob_io_readv(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9193             :                    struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    9194             :                    spdk_blob_op_complete cb_fn, void *cb_arg)
    9195             : {
    9196        1655 :         blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, true, NULL);
    9197        1655 : }
    9198             : 
    9199             : void
    9200         260 : spdk_blob_io_writev_ext(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9201             :                         struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    9202             :                         spdk_blob_op_complete cb_fn, void *cb_arg, struct spdk_blob_ext_io_opts *io_opts)
    9203             : {
    9204         520 :         blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, false,
    9205         260 :                                    io_opts);
    9206         260 : }
    9207             : 
    9208             : void
    9209        2105 : spdk_blob_io_readv_ext(struct spdk_blob *blob, struct spdk_io_channel *channel,
    9210             :                        struct iovec *iov, int iovcnt, uint64_t offset, uint64_t length,
    9211             :                        spdk_blob_op_complete cb_fn, void *cb_arg, struct spdk_blob_ext_io_opts *io_opts)
    9212             : {
    9213        4210 :         blob_request_submit_rw_iov(blob, channel, iov, iovcnt, offset, length, cb_fn, cb_arg, true,
    9214        2105 :                                    io_opts);
    9215        2105 : }
    9216             : 
    9217             : struct spdk_bs_iter_ctx {
    9218             :         int64_t page_num;
    9219             :         struct spdk_blob_store *bs;
    9220             : 
    9221             :         spdk_blob_op_with_handle_complete cb_fn;
    9222             :         void *cb_arg;
    9223             : };
    9224             : 
    9225             : static void
    9226        1460 : bs_iter_cpl(void *cb_arg, struct spdk_blob *_blob, int bserrno)
    9227             : {
    9228        1460 :         struct spdk_bs_iter_ctx *ctx = cb_arg;
    9229        1460 :         struct spdk_blob_store *bs = ctx->bs;
    9230             :         spdk_blob_id id;
    9231             : 
    9232        1460 :         if (bserrno == 0) {
    9233         557 :                 ctx->cb_fn(ctx->cb_arg, _blob, bserrno);
    9234         557 :                 free(ctx);
    9235         557 :                 return;
    9236             :         }
    9237             : 
    9238         903 :         ctx->page_num++;
    9239         903 :         ctx->page_num = spdk_bit_array_find_first_set(bs->used_blobids, ctx->page_num);
    9240         903 :         if (ctx->page_num >= spdk_bit_array_capacity(bs->used_blobids)) {
    9241         336 :                 ctx->cb_fn(ctx->cb_arg, NULL, -ENOENT);
    9242         336 :                 free(ctx);
    9243         336 :                 return;
    9244             :         }
    9245             : 
    9246         567 :         id = bs_page_to_blobid(ctx->page_num);
    9247             : 
    9248         567 :         spdk_bs_open_blob(bs, id, bs_iter_cpl, ctx);
    9249        1460 : }
    9250             : 
    9251             : void
    9252         366 : spdk_bs_iter_first(struct spdk_blob_store *bs,
    9253             :                    spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
    9254             : {
    9255             :         struct spdk_bs_iter_ctx *ctx;
    9256             : 
    9257         366 :         ctx = calloc(1, sizeof(*ctx));
    9258         366 :         if (!ctx) {
    9259           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    9260           0 :                 return;
    9261             :         }
    9262             : 
    9263         366 :         ctx->page_num = -1;
    9264         366 :         ctx->bs = bs;
    9265         366 :         ctx->cb_fn = cb_fn;
    9266         366 :         ctx->cb_arg = cb_arg;
    9267             : 
    9268         366 :         bs_iter_cpl(ctx, NULL, -1);
    9269         366 : }
    9270             : 
    9271             : static void
    9272         527 : bs_iter_close_cpl(void *cb_arg, int bserrno)
    9273             : {
    9274         527 :         struct spdk_bs_iter_ctx *ctx = cb_arg;
    9275             : 
    9276         527 :         bs_iter_cpl(ctx, NULL, -1);
    9277         527 : }
    9278             : 
    9279             : void
    9280         527 : spdk_bs_iter_next(struct spdk_blob_store *bs, struct spdk_blob *blob,
    9281             :                   spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
    9282             : {
    9283             :         struct spdk_bs_iter_ctx *ctx;
    9284             : 
    9285         527 :         assert(blob != NULL);
    9286             : 
    9287         527 :         ctx = calloc(1, sizeof(*ctx));
    9288         527 :         if (!ctx) {
    9289           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
    9290           0 :                 return;
    9291             :         }
    9292             : 
    9293         527 :         ctx->page_num = bs_blobid_to_page(blob->id);
    9294         527 :         ctx->bs = bs;
    9295         527 :         ctx->cb_fn = cb_fn;
    9296         527 :         ctx->cb_arg = cb_arg;
    9297             : 
    9298             :         /* Close the existing blob */
    9299         527 :         spdk_blob_close(blob, bs_iter_close_cpl, ctx);
    9300         527 : }
    9301             : 
    9302             : static int
    9303        1178 : blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
    9304             :                uint16_t value_len, bool internal)
    9305             : {
    9306             :         struct spdk_xattr_tailq *xattrs;
    9307             :         struct spdk_xattr       *xattr;
    9308             :         size_t                  desc_size;
    9309             :         void                    *tmp;
    9310             : 
    9311        1178 :         blob_verify_md_op(blob);
    9312             : 
    9313        1178 :         if (blob->md_ro) {
    9314           5 :                 return -EPERM;
    9315             :         }
    9316             : 
    9317        1173 :         desc_size = sizeof(struct spdk_blob_md_descriptor_xattr) + strlen(name) + value_len;
    9318        1173 :         if (desc_size > SPDK_BS_MAX_DESC_SIZE) {
    9319           5 :                 SPDK_DEBUGLOG(blob, "Xattr '%s' of size %zu does not fix into single page %zu\n", name,
    9320             :                               desc_size, SPDK_BS_MAX_DESC_SIZE);
    9321           5 :                 return -ENOMEM;
    9322             :         }
    9323             : 
    9324        1168 :         if (internal) {
    9325         917 :                 xattrs = &blob->xattrs_internal;
    9326         917 :                 blob->invalid_flags |= SPDK_BLOB_INTERNAL_XATTR;
    9327         917 :         } else {
    9328         251 :                 xattrs = &blob->xattrs;
    9329             :         }
    9330             : 
    9331        1438 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9332         402 :                 if (!strcmp(name, xattr->name)) {
    9333         132 :                         tmp = malloc(value_len);
    9334         132 :                         if (!tmp) {
    9335           0 :                                 return -ENOMEM;
    9336             :                         }
    9337             : 
    9338         132 :                         free(xattr->value);
    9339         132 :                         xattr->value_len = value_len;
    9340         132 :                         xattr->value = tmp;
    9341         132 :                         memcpy(xattr->value, value, value_len);
    9342             : 
    9343         132 :                         blob->state = SPDK_BLOB_STATE_DIRTY;
    9344             : 
    9345         132 :                         return 0;
    9346             :                 }
    9347         270 :         }
    9348             : 
    9349        1036 :         xattr = calloc(1, sizeof(*xattr));
    9350        1036 :         if (!xattr) {
    9351           0 :                 return -ENOMEM;
    9352             :         }
    9353             : 
    9354        1036 :         xattr->name = strdup(name);
    9355        1036 :         if (!xattr->name) {
    9356           0 :                 free(xattr);
    9357           0 :                 return -ENOMEM;
    9358             :         }
    9359             : 
    9360        1036 :         xattr->value_len = value_len;
    9361        1036 :         xattr->value = malloc(value_len);
    9362        1036 :         if (!xattr->value) {
    9363           0 :                 free(xattr->name);
    9364           0 :                 free(xattr);
    9365           0 :                 return -ENOMEM;
    9366             :         }
    9367        1036 :         memcpy(xattr->value, value, value_len);
    9368        1036 :         TAILQ_INSERT_TAIL(xattrs, xattr, link);
    9369             : 
    9370        1036 :         blob->state = SPDK_BLOB_STATE_DIRTY;
    9371             : 
    9372        1036 :         return 0;
    9373        1178 : }
    9374             : 
    9375             : int
    9376         216 : spdk_blob_set_xattr(struct spdk_blob *blob, const char *name, const void *value,
    9377             :                     uint16_t value_len)
    9378             : {
    9379         216 :         return blob_set_xattr(blob, name, value, value_len, false);
    9380             : }
    9381             : 
    9382             : static int
    9383         511 : blob_remove_xattr(struct spdk_blob *blob, const char *name, bool internal)
    9384             : {
    9385             :         struct spdk_xattr_tailq *xattrs;
    9386             :         struct spdk_xattr       *xattr;
    9387             : 
    9388         511 :         blob_verify_md_op(blob);
    9389             : 
    9390         511 :         if (blob->md_ro) {
    9391           5 :                 return -EPERM;
    9392             :         }
    9393         506 :         xattrs = internal ? &blob->xattrs_internal : &blob->xattrs;
    9394             : 
    9395         521 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9396         456 :                 if (!strcmp(name, xattr->name)) {
    9397         441 :                         TAILQ_REMOVE(xattrs, xattr, link);
    9398         441 :                         free(xattr->value);
    9399         441 :                         free(xattr->name);
    9400         441 :                         free(xattr);
    9401             : 
    9402         441 :                         if (internal && TAILQ_EMPTY(&blob->xattrs_internal)) {
    9403         306 :                                 blob->invalid_flags &= ~SPDK_BLOB_INTERNAL_XATTR;
    9404         306 :                         }
    9405         441 :                         blob->state = SPDK_BLOB_STATE_DIRTY;
    9406             : 
    9407         441 :                         return 0;
    9408             :                 }
    9409          15 :         }
    9410             : 
    9411          65 :         return -ENOENT;
    9412         511 : }
    9413             : 
    9414             : int
    9415          45 : spdk_blob_remove_xattr(struct spdk_blob *blob, const char *name)
    9416             : {
    9417          45 :         return blob_remove_xattr(blob, name, false);
    9418             : }
    9419             : 
    9420             : static int
    9421        2852 : blob_get_xattr_value(struct spdk_blob *blob, const char *name,
    9422             :                      const void **value, size_t *value_len, bool internal)
    9423             : {
    9424             :         struct spdk_xattr       *xattr;
    9425             :         struct spdk_xattr_tailq *xattrs;
    9426             : 
    9427        2852 :         xattrs = internal ? &blob->xattrs_internal : &blob->xattrs;
    9428             : 
    9429        3636 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9430        1728 :                 if (!strcmp(name, xattr->name)) {
    9431         944 :                         *value = xattr->value;
    9432         944 :                         *value_len = xattr->value_len;
    9433         944 :                         return 0;
    9434             :                 }
    9435         784 :         }
    9436        1908 :         return -ENOENT;
    9437        2852 : }
    9438             : 
    9439             : int
    9440         192 : spdk_blob_get_xattr_value(struct spdk_blob *blob, const char *name,
    9441             :                           const void **value, size_t *value_len)
    9442             : {
    9443         192 :         blob_verify_md_op(blob);
    9444             : 
    9445         192 :         return blob_get_xattr_value(blob, name, value, value_len, false);
    9446             : }
    9447             : 
    9448             : struct spdk_xattr_names {
    9449             :         uint32_t        count;
    9450             :         const char      *names[0];
    9451             : };
    9452             : 
    9453             : static int
    9454           5 : blob_get_xattr_names(struct spdk_xattr_tailq *xattrs, struct spdk_xattr_names **names)
    9455             : {
    9456             :         struct spdk_xattr       *xattr;
    9457           5 :         int                     count = 0;
    9458             : 
    9459          15 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9460          10 :                 count++;
    9461          10 :         }
    9462             : 
    9463           5 :         *names = calloc(1, sizeof(struct spdk_xattr_names) + count * sizeof(char *));
    9464           5 :         if (*names == NULL) {
    9465           0 :                 return -ENOMEM;
    9466             :         }
    9467             : 
    9468          15 :         TAILQ_FOREACH(xattr, xattrs, link) {
    9469          10 :                 (*names)->names[(*names)->count++] = xattr->name;
    9470          10 :         }
    9471             : 
    9472           5 :         return 0;
    9473           5 : }
    9474             : 
    9475             : int
    9476           5 : spdk_blob_get_xattr_names(struct spdk_blob *blob, struct spdk_xattr_names **names)
    9477             : {
    9478           5 :         blob_verify_md_op(blob);
    9479             : 
    9480           5 :         return blob_get_xattr_names(&blob->xattrs, names);
    9481             : }
    9482             : 
    9483             : uint32_t
    9484           5 : spdk_xattr_names_get_count(struct spdk_xattr_names *names)
    9485             : {
    9486           5 :         assert(names != NULL);
    9487             : 
    9488           5 :         return names->count;
    9489             : }
    9490             : 
    9491             : const char *
    9492          10 : spdk_xattr_names_get_name(struct spdk_xattr_names *names, uint32_t index)
    9493             : {
    9494          10 :         if (index >= names->count) {
    9495           0 :                 return NULL;
    9496             :         }
    9497             : 
    9498          10 :         return names->names[index];
    9499          10 : }
    9500             : 
    9501             : void
    9502           5 : spdk_xattr_names_free(struct spdk_xattr_names *names)
    9503             : {
    9504           5 :         free(names);
    9505           5 : }
    9506             : 
    9507             : struct spdk_bs_type
    9508           2 : spdk_bs_get_bstype(struct spdk_blob_store *bs)
    9509             : {
    9510           2 :         return bs->bstype;
    9511             : }
    9512             : 
    9513             : void
    9514           0 : spdk_bs_set_bstype(struct spdk_blob_store *bs, struct spdk_bs_type bstype)
    9515             : {
    9516           0 :         memcpy(&bs->bstype, &bstype, sizeof(bstype));
    9517           0 : }
    9518             : 
    9519             : bool
    9520          60 : spdk_blob_is_read_only(struct spdk_blob *blob)
    9521             : {
    9522          60 :         assert(blob != NULL);
    9523          60 :         return (blob->data_ro || blob->md_ro);
    9524             : }
    9525             : 
    9526             : bool
    9527          65 : spdk_blob_is_snapshot(struct spdk_blob *blob)
    9528             : {
    9529             :         struct spdk_blob_list *snapshot_entry;
    9530             : 
    9531          65 :         assert(blob != NULL);
    9532             : 
    9533          65 :         snapshot_entry = bs_get_snapshot_entry(blob->bs, blob->id);
    9534          65 :         if (snapshot_entry == NULL) {
    9535          35 :                 return false;
    9536             :         }
    9537             : 
    9538          30 :         return true;
    9539          65 : }
    9540             : 
    9541             : bool
    9542          85 : spdk_blob_is_clone(struct spdk_blob *blob)
    9543             : {
    9544          85 :         assert(blob != NULL);
    9545             : 
    9546          85 :         if (blob->parent_id != SPDK_BLOBID_INVALID &&
    9547          65 :             blob->parent_id != SPDK_BLOBID_EXTERNAL_SNAPSHOT) {
    9548          50 :                 assert(spdk_blob_is_thin_provisioned(blob));
    9549          50 :                 return true;
    9550             :         }
    9551             : 
    9552          35 :         return false;
    9553          85 : }
    9554             : 
    9555             : bool
    9556       46657 : spdk_blob_is_thin_provisioned(struct spdk_blob *blob)
    9557             : {
    9558       46657 :         assert(blob != NULL);
    9559       46657 :         return !!(blob->invalid_flags & SPDK_BLOB_THIN_PROV);
    9560             : }
    9561             : 
    9562             : bool
    9563       57607 : spdk_blob_is_esnap_clone(const struct spdk_blob *blob)
    9564             : {
    9565       57607 :         return blob_is_esnap_clone(blob);
    9566             : }
    9567             : 
    9568             : static void
    9569        4291 : blob_update_clear_method(struct spdk_blob *blob)
    9570             : {
    9571             :         enum blob_clear_method stored_cm;
    9572             : 
    9573        4291 :         assert(blob != NULL);
    9574             : 
    9575             :         /* If BLOB_CLEAR_WITH_DEFAULT was passed in, use the setting stored
    9576             :          * in metadata previously.  If something other than the default was
    9577             :          * specified, ignore stored value and used what was passed in.
    9578             :          */
    9579        4291 :         stored_cm = ((blob->md_ro_flags & SPDK_BLOB_CLEAR_METHOD) >> SPDK_BLOB_CLEAR_METHOD_SHIFT);
    9580             : 
    9581        4291 :         if (blob->clear_method == BLOB_CLEAR_WITH_DEFAULT) {
    9582        4291 :                 blob->clear_method = stored_cm;
    9583        4291 :         } else if (blob->clear_method != stored_cm) {
    9584           0 :                 SPDK_WARNLOG("Using passed in clear method 0x%x instead of stored value of 0x%x\n",
    9585             :                              blob->clear_method, stored_cm);
    9586           0 :         }
    9587        4291 : }
    9588             : 
    9589             : spdk_blob_id
    9590         324 : spdk_blob_get_parent_snapshot(struct spdk_blob_store *bs, spdk_blob_id blob_id)
    9591             : {
    9592         324 :         struct spdk_blob_list *snapshot_entry = NULL;
    9593         324 :         struct spdk_blob_list *clone_entry = NULL;
    9594             : 
    9595         619 :         TAILQ_FOREACH(snapshot_entry, &bs->snapshots, link) {
    9596         916 :                 TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
    9597         621 :                         if (clone_entry->id == blob_id) {
    9598         211 :                                 return snapshot_entry->id;
    9599             :                         }
    9600         410 :                 }
    9601         295 :         }
    9602             : 
    9603         113 :         return SPDK_BLOBID_INVALID;
    9604         324 : }
    9605             : 
    9606             : int
    9607         246 : spdk_blob_get_clones(struct spdk_blob_store *bs, spdk_blob_id blobid, spdk_blob_id *ids,
    9608             :                      size_t *count)
    9609             : {
    9610             :         struct spdk_blob_list *snapshot_entry, *clone_entry;
    9611             :         size_t n;
    9612             : 
    9613         246 :         snapshot_entry = bs_get_snapshot_entry(bs, blobid);
    9614         246 :         if (snapshot_entry == NULL) {
    9615          35 :                 *count = 0;
    9616          35 :                 return 0;
    9617             :         }
    9618             : 
    9619         211 :         if (ids == NULL || *count < snapshot_entry->clone_count) {
    9620          10 :                 *count = snapshot_entry->clone_count;
    9621          10 :                 return -ENOMEM;
    9622             :         }
    9623         201 :         *count = snapshot_entry->clone_count;
    9624             : 
    9625         201 :         n = 0;
    9626         427 :         TAILQ_FOREACH(clone_entry, &snapshot_entry->clones, link) {
    9627         226 :                 ids[n++] = clone_entry->id;
    9628         226 :         }
    9629             : 
    9630         201 :         return 0;
    9631         246 : }
    9632             : 
    9633             : static void
    9634           5 : bs_load_grow_continue(struct spdk_bs_load_ctx *ctx)
    9635             : {
    9636             :         int rc;
    9637             : 
    9638           5 :         if (ctx->super->size == 0) {
    9639           0 :                 ctx->super->size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    9640           0 :         }
    9641             : 
    9642           5 :         if (ctx->super->io_unit_size == 0) {
    9643           0 :                 ctx->super->io_unit_size = SPDK_BS_PAGE_SIZE;
    9644           0 :         }
    9645           5 :         if (ctx->super->md_page_size == 0) {
    9646           0 :                 ctx->super->md_page_size = SPDK_BS_PAGE_SIZE;
    9647           0 :         }
    9648             : 
    9649             :         /* Parse the super block */
    9650           5 :         ctx->bs->clean = 1;
    9651           5 :         ctx->bs->cluster_sz = ctx->super->cluster_size;
    9652           5 :         ctx->bs->total_clusters = ctx->super->size / ctx->super->cluster_size;
    9653           5 :         ctx->bs->md_page_size = ctx->super->md_page_size;
    9654           5 :         ctx->bs->io_unit_size = ctx->super->io_unit_size;
    9655           5 :         bs_init_per_cluster_fields(ctx->bs);
    9656           5 :         rc = spdk_bit_array_resize(&ctx->used_clusters, ctx->bs->total_clusters);
    9657           5 :         if (rc < 0) {
    9658           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    9659           0 :                 return;
    9660             :         }
    9661           5 :         ctx->bs->md_start = ctx->super->md_start;
    9662           5 :         ctx->bs->md_len = ctx->super->md_len;
    9663           5 :         rc = spdk_bit_array_resize(&ctx->bs->open_blobids, ctx->bs->md_len);
    9664           5 :         if (rc < 0) {
    9665           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    9666           0 :                 return;
    9667             :         }
    9668             : 
    9669          10 :         ctx->bs->total_data_clusters = ctx->bs->total_clusters - spdk_divide_round_up(
    9670           5 :                                                ctx->bs->md_start + ctx->bs->md_len, ctx->bs->pages_per_cluster);
    9671           5 :         ctx->bs->super_blob = ctx->super->super_blob;
    9672           5 :         memcpy(&ctx->bs->bstype, &ctx->super->bstype, sizeof(ctx->super->bstype));
    9673             : 
    9674           5 :         if (ctx->super->used_blobid_mask_len == 0 || ctx->super->clean == 0) {
    9675           0 :                 SPDK_ERRLOG("Can not grow an unclean blobstore, please load it normally to clean it.\n");
    9676           0 :                 bs_load_ctx_fail(ctx, -EIO);
    9677           0 :                 return;
    9678             :         } else {
    9679           5 :                 bs_load_read_used_pages(ctx);
    9680             :         }
    9681           5 : }
    9682             : 
    9683             : static void
    9684           5 : bs_load_grow_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9685             : {
    9686           5 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    9687             : 
    9688           5 :         if (bserrno != 0) {
    9689           0 :                 bs_load_ctx_fail(ctx, bserrno);
    9690           0 :                 return;
    9691             :         }
    9692           5 :         bs_load_grow_continue(ctx);
    9693           5 : }
    9694             : 
    9695             : static void
    9696           5 : bs_load_grow_used_clusters_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9697             : {
    9698           5 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    9699             : 
    9700           5 :         if (bserrno != 0) {
    9701           0 :                 bs_load_ctx_fail(ctx, bserrno);
    9702           0 :                 return;
    9703             :         }
    9704             : 
    9705           5 :         spdk_free(ctx->mask);
    9706             : 
    9707          10 :         bs_sequence_write_dev(ctx->seq, ctx->super, bs_page_to_lba(ctx->bs, 0),
    9708           5 :                               bs_byte_to_lba(ctx->bs, sizeof(*ctx->super)),
    9709           5 :                               bs_load_grow_super_write_cpl, ctx);
    9710           5 : }
    9711             : 
    9712             : static void
    9713           5 : bs_load_grow_used_clusters_read_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9714             : {
    9715           5 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    9716             :         uint64_t                lba, lba_count;
    9717             :         uint64_t                dev_size;
    9718             :         uint64_t                total_clusters;
    9719             : 
    9720           5 :         if (bserrno != 0) {
    9721           0 :                 bs_load_ctx_fail(ctx, bserrno);
    9722           0 :                 return;
    9723             :         }
    9724             : 
    9725             :         /* The type must be correct */
    9726           5 :         assert(ctx->mask->type == SPDK_MD_MASK_TYPE_USED_CLUSTERS);
    9727             :         /* The length of the mask (in bits) must not be greater than the length of the buffer (converted to bits) */
    9728           5 :         assert(ctx->mask->length <= (ctx->super->used_cluster_mask_len * sizeof(
    9729             :                                              struct spdk_blob_md_page) * 8));
    9730           5 :         dev_size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    9731           5 :         total_clusters = dev_size / ctx->super->cluster_size;
    9732           5 :         ctx->mask->length = total_clusters;
    9733             : 
    9734           5 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
    9735           5 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
    9736          10 :         bs_sequence_write_dev(ctx->seq, ctx->mask, lba, lba_count,
    9737           5 :                               bs_load_grow_used_clusters_write_cpl, ctx);
    9738           5 : }
    9739             : 
    9740             : static void
    9741           5 : bs_load_try_to_grow(struct spdk_bs_load_ctx *ctx)
    9742             : {
    9743             :         uint64_t dev_size, total_clusters, used_cluster_mask_len, max_used_cluster_mask;
    9744             :         uint64_t lba, lba_count, mask_size;
    9745             : 
    9746           5 :         dev_size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    9747           5 :         total_clusters = dev_size / ctx->super->cluster_size;
    9748          10 :         used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    9749           5 :                                 spdk_divide_round_up(total_clusters, 8),
    9750           5 :                                 ctx->super->md_page_size);
    9751           5 :         max_used_cluster_mask = ctx->super->used_blobid_mask_start - ctx->super->used_cluster_mask_start;
    9752             :         /* No necessary to grow or no space to grow */
    9753           5 :         if (ctx->super->size >= dev_size || used_cluster_mask_len > max_used_cluster_mask) {
    9754           0 :                 SPDK_DEBUGLOG(blob, "No grow\n");
    9755           0 :                 bs_load_grow_continue(ctx);
    9756           0 :                 return;
    9757             :         }
    9758             : 
    9759           5 :         SPDK_DEBUGLOG(blob, "Resize blobstore\n");
    9760             : 
    9761           5 :         ctx->super->size = dev_size;
    9762           5 :         ctx->super->used_cluster_mask_len = used_cluster_mask_len;
    9763           5 :         ctx->super->crc = blob_md_page_calc_crc(ctx->super);
    9764             : 
    9765           5 :         mask_size = used_cluster_mask_len * ctx->super->md_page_size;
    9766           5 :         ctx->mask = spdk_zmalloc(mask_size, 0x1000, NULL, SPDK_ENV_NUMA_ID_ANY,
    9767             :                                  SPDK_MALLOC_DMA);
    9768           5 :         if (!ctx->mask) {
    9769           0 :                 bs_load_ctx_fail(ctx, -ENOMEM);
    9770           0 :                 return;
    9771             :         }
    9772           5 :         lba = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_start);
    9773           5 :         lba_count = bs_page_to_lba(ctx->bs, ctx->super->used_cluster_mask_len);
    9774          10 :         bs_sequence_read_dev(ctx->seq, ctx->mask, lba, lba_count,
    9775           5 :                              bs_load_grow_used_clusters_read_cpl, ctx);
    9776           5 : }
    9777             : 
    9778             : static void
    9779           5 : bs_grow_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9780             : {
    9781           5 :         struct spdk_bs_load_ctx *ctx = cb_arg;
    9782             :         int rc;
    9783             : 
    9784           5 :         rc = bs_super_validate(ctx->super, ctx->bs);
    9785           5 :         if (rc != 0) {
    9786           0 :                 bs_load_ctx_fail(ctx, rc);
    9787           0 :                 return;
    9788             :         }
    9789             : 
    9790           5 :         bs_load_try_to_grow(ctx);
    9791           5 : }
    9792             : 
    9793             : struct spdk_bs_grow_ctx {
    9794             :         struct spdk_blob_store          *bs;
    9795             :         struct spdk_bs_super_block      *super;
    9796             : 
    9797             :         struct spdk_bit_pool            *new_used_clusters;
    9798             :         struct spdk_bs_md_mask          *new_used_clusters_mask;
    9799             : 
    9800             :         spdk_bs_sequence_t              *seq;
    9801             : };
    9802             : 
    9803             : static void
    9804          40 : bs_grow_live_done(struct spdk_bs_grow_ctx *ctx, int bserrno)
    9805             : {
    9806          40 :         if (bserrno != 0) {
    9807          10 :                 spdk_bit_pool_free(&ctx->new_used_clusters);
    9808          10 :         }
    9809             : 
    9810          40 :         bs_sequence_finish(ctx->seq, bserrno);
    9811          40 :         free(ctx->new_used_clusters_mask);
    9812          40 :         spdk_free(ctx->super);
    9813          40 :         free(ctx);
    9814          40 : }
    9815             : 
    9816             : static void
    9817          10 : bs_grow_live_super_write_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9818             : {
    9819          10 :         struct spdk_bs_grow_ctx *ctx = cb_arg;
    9820          10 :         struct spdk_blob_store *bs = ctx->bs;
    9821             :         uint64_t total_clusters;
    9822             : 
    9823          10 :         if (bserrno != 0) {
    9824           0 :                 bs_grow_live_done(ctx, bserrno);
    9825           0 :                 return;
    9826             :         }
    9827             : 
    9828             :         /*
    9829             :          * Blobstore is not clean until unload, for now only the super block is up to date.
    9830             :          * This is similar to state right after blobstore init, when bs_write_used_md() didn't
    9831             :          * yet execute.
    9832             :          * When cleanly unloaded, the used md pages will be written out.
    9833             :          * In case of unclean shutdown, loading blobstore will go through recovery path correctly
    9834             :          * filling out the used_clusters with new size and writing it out.
    9835             :          */
    9836          10 :         bs->clean = 0;
    9837             : 
    9838             :         /* Reverting the super->size past this point is complex, avoid any error paths
    9839             :          * that require to do so. */
    9840          10 :         spdk_spin_lock(&bs->used_lock);
    9841             : 
    9842          10 :         total_clusters = ctx->super->size / ctx->super->cluster_size;
    9843             : 
    9844          10 :         assert(total_clusters >= spdk_bit_pool_capacity(bs->used_clusters));
    9845          10 :         spdk_bit_pool_store_mask(bs->used_clusters, ctx->new_used_clusters_mask);
    9846             : 
    9847          10 :         assert(total_clusters == spdk_bit_pool_capacity(ctx->new_used_clusters));
    9848          10 :         spdk_bit_pool_load_mask(ctx->new_used_clusters, ctx->new_used_clusters_mask);
    9849             : 
    9850          10 :         spdk_bit_pool_free(&bs->used_clusters);
    9851          10 :         bs->used_clusters = ctx->new_used_clusters;
    9852             : 
    9853          10 :         bs->total_clusters = total_clusters;
    9854          20 :         bs->total_data_clusters = bs->total_clusters - spdk_divide_round_up(
    9855          10 :                                           bs->md_start + bs->md_len, bs->pages_per_cluster);
    9856             : 
    9857          10 :         bs->num_free_clusters = spdk_bit_pool_count_free(bs->used_clusters);
    9858          10 :         assert(ctx->bs->num_free_clusters <= ctx->bs->total_clusters);
    9859          10 :         spdk_spin_unlock(&bs->used_lock);
    9860             : 
    9861          10 :         bs_grow_live_done(ctx, 0);
    9862          10 : }
    9863             : 
    9864             : static void
    9865          40 : bs_grow_live_load_super_cpl(spdk_bs_sequence_t *seq, void *cb_arg, int bserrno)
    9866             : {
    9867          40 :         struct spdk_bs_grow_ctx *ctx = cb_arg;
    9868             :         uint64_t dev_size, total_clusters, used_cluster_mask_len, max_used_cluster_mask;
    9869             :         int rc;
    9870             : 
    9871          40 :         if (bserrno != 0) {
    9872           0 :                 bs_grow_live_done(ctx, bserrno);
    9873           0 :                 return;
    9874             :         }
    9875             : 
    9876          40 :         rc = bs_super_validate(ctx->super, ctx->bs);
    9877          40 :         if (rc != 0) {
    9878           5 :                 bs_grow_live_done(ctx, rc);
    9879           5 :                 return;
    9880             :         }
    9881             : 
    9882          35 :         dev_size = ctx->bs->dev->blockcnt * ctx->bs->dev->blocklen;
    9883          35 :         total_clusters = dev_size / ctx->super->cluster_size;
    9884          70 :         used_cluster_mask_len = spdk_divide_round_up(sizeof(struct spdk_bs_md_mask) +
    9885          35 :                                 spdk_divide_round_up(total_clusters, 8),
    9886          35 :                                 ctx->super->md_page_size);
    9887          35 :         max_used_cluster_mask = ctx->super->used_blobid_mask_start - ctx->super->used_cluster_mask_start;
    9888             :         /* Only checking dev_size. Since it can change, but total_clusters remain the same. */
    9889          35 :         if (dev_size == ctx->super->size) {
    9890          20 :                 SPDK_DEBUGLOG(blob, "No need to grow blobstore\n");
    9891          20 :                 bs_grow_live_done(ctx, 0);
    9892          20 :                 return;
    9893             :         }
    9894             :         /*
    9895             :          * Blobstore cannot be shrunk, so check before if:
    9896             :          * - new size of the device is smaller than size in super_block
    9897             :          * - new total number of clusters is smaller than used_clusters bit_pool
    9898             :          * - there is enough space in metadata for used_cluster_mask to be written out
    9899             :          */
    9900          30 :         if (dev_size < ctx->super->size ||
    9901          15 :             total_clusters < spdk_bit_pool_capacity(ctx->bs->used_clusters) ||
    9902          15 :             used_cluster_mask_len > max_used_cluster_mask) {
    9903           5 :                 SPDK_DEBUGLOG(blob, "No space to grow blobstore\n");
    9904           5 :                 bs_grow_live_done(ctx, -ENOSPC);
    9905           5 :                 return;
    9906             :         }
    9907             : 
    9908          10 :         SPDK_DEBUGLOG(blob, "Resizing blobstore\n");
    9909             : 
    9910          10 :         ctx->new_used_clusters_mask = calloc(1, total_clusters);
    9911          10 :         if (!ctx->new_used_clusters_mask) {
    9912           0 :                 bs_grow_live_done(ctx, -ENOMEM);
    9913           0 :                 return;
    9914             :         }
    9915          10 :         ctx->new_used_clusters = spdk_bit_pool_create(total_clusters);
    9916          10 :         if (!ctx->new_used_clusters) {
    9917           0 :                 bs_grow_live_done(ctx, -ENOMEM);
    9918           0 :                 return;
    9919             :         }
    9920             : 
    9921          10 :         ctx->super->clean = 0;
    9922          10 :         ctx->super->size = dev_size;
    9923          10 :         ctx->super->used_cluster_mask_len = used_cluster_mask_len;
    9924          10 :         bs_write_super(seq, ctx->bs, ctx->super, bs_grow_live_super_write_cpl, ctx);
    9925          40 : }
    9926             : 
    9927             : void
    9928          40 : spdk_bs_grow_live(struct spdk_blob_store *bs,
    9929             :                   spdk_bs_op_complete cb_fn, void *cb_arg)
    9930             : {
    9931             :         struct spdk_bs_cpl      cpl;
    9932             :         struct spdk_bs_grow_ctx *ctx;
    9933             : 
    9934          40 :         assert(spdk_get_thread() == bs->md_thread);
    9935             : 
    9936          40 :         SPDK_DEBUGLOG(blob, "Growing blobstore on dev %p\n", bs->dev);
    9937             : 
    9938          40 :         cpl.type = SPDK_BS_CPL_TYPE_BS_BASIC;
    9939          40 :         cpl.u.bs_basic.cb_fn = cb_fn;
    9940          40 :         cpl.u.bs_basic.cb_arg = cb_arg;
    9941             : 
    9942          40 :         ctx = calloc(1, sizeof(struct spdk_bs_grow_ctx));
    9943          40 :         if (!ctx) {
    9944           0 :                 cb_fn(cb_arg, -ENOMEM);
    9945           0 :                 return;
    9946             :         }
    9947          40 :         ctx->bs = bs;
    9948             : 
    9949          40 :         ctx->super = spdk_zmalloc(sizeof(*ctx->super), 0x1000, NULL,
    9950             :                                   SPDK_ENV_NUMA_ID_ANY, SPDK_MALLOC_DMA);
    9951          40 :         if (!ctx->super) {
    9952           0 :                 free(ctx);
    9953           0 :                 cb_fn(cb_arg, -ENOMEM);
    9954           0 :                 return;
    9955             :         }
    9956             : 
    9957          40 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
    9958          40 :         if (!ctx->seq) {
    9959           0 :                 spdk_free(ctx->super);
    9960           0 :                 free(ctx);
    9961           0 :                 cb_fn(cb_arg, -ENOMEM);
    9962           0 :                 return;
    9963             :         }
    9964             : 
    9965             :         /* Read the super block */
    9966          80 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
    9967          40 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
    9968          40 :                              bs_grow_live_load_super_cpl, ctx);
    9969          40 : }
    9970             : 
    9971             : void
    9972           5 : spdk_bs_grow(struct spdk_bs_dev *dev, struct spdk_bs_opts *o,
    9973             :              spdk_bs_op_with_handle_complete cb_fn, void *cb_arg)
    9974             : {
    9975             :         struct spdk_blob_store  *bs;
    9976             :         struct spdk_bs_cpl      cpl;
    9977             :         struct spdk_bs_load_ctx *ctx;
    9978           5 :         struct spdk_bs_opts     opts = {};
    9979             :         int err;
    9980             : 
    9981           5 :         SPDK_DEBUGLOG(blob, "Loading blobstore from dev %p\n", dev);
    9982             : 
    9983           5 :         if ((dev->phys_blocklen % dev->blocklen) != 0) {
    9984           0 :                 SPDK_DEBUGLOG(blob, "unsupported dev block length of %d\n", dev->blocklen);
    9985           0 :                 dev->destroy(dev);
    9986           0 :                 cb_fn(cb_arg, NULL, -EINVAL);
    9987           0 :                 return;
    9988             :         }
    9989             : 
    9990           5 :         spdk_bs_opts_init(&opts, sizeof(opts));
    9991           5 :         if (o) {
    9992           5 :                 if (bs_opts_copy(o, &opts)) {
    9993           0 :                         dev->destroy(dev);
    9994           0 :                         cb_fn(cb_arg, NULL, -EINVAL);
    9995           0 :                         return;
    9996             :                 }
    9997           5 :         }
    9998             : 
    9999           5 :         if (opts.max_md_ops == 0 || opts.max_channel_ops == 0) {
   10000           0 :                 dev->destroy(dev);
   10001           0 :                 cb_fn(cb_arg, NULL, -EINVAL);
   10002           0 :                 return;
   10003             :         }
   10004             : 
   10005           5 :         err = bs_alloc(dev, &opts, &bs, &ctx);
   10006           5 :         if (err) {
   10007           0 :                 dev->destroy(dev);
   10008           0 :                 cb_fn(cb_arg, NULL, err);
   10009           0 :                 return;
   10010             :         }
   10011             : 
   10012           5 :         cpl.type = SPDK_BS_CPL_TYPE_BS_HANDLE;
   10013           5 :         cpl.u.bs_handle.cb_fn = cb_fn;
   10014           5 :         cpl.u.bs_handle.cb_arg = cb_arg;
   10015           5 :         cpl.u.bs_handle.bs = bs;
   10016             : 
   10017           5 :         ctx->seq = bs_sequence_start_bs(bs->md_channel, &cpl);
   10018           5 :         if (!ctx->seq) {
   10019           0 :                 spdk_free(ctx->super);
   10020           0 :                 free(ctx);
   10021           0 :                 bs_free(bs);
   10022           0 :                 cb_fn(cb_arg, NULL, -ENOMEM);
   10023           0 :                 return;
   10024             :         }
   10025             : 
   10026             :         /* Read the super block */
   10027          10 :         bs_sequence_read_dev(ctx->seq, ctx->super, bs_page_to_lba(bs, 0),
   10028           5 :                              bs_byte_to_lba(bs, sizeof(*ctx->super)),
   10029           5 :                              bs_grow_load_super_cpl, ctx);
   10030           5 : }
   10031             : 
   10032             : int
   10033          30 : spdk_blob_get_esnap_id(struct spdk_blob *blob, const void **id, size_t *len)
   10034             : {
   10035          30 :         if (!blob_is_esnap_clone(blob)) {
   10036          15 :                 return -EINVAL;
   10037             :         }
   10038             : 
   10039          15 :         return blob_get_xattr_value(blob, BLOB_EXTERNAL_SNAPSHOT_ID, id, len, true);
   10040          30 : }
   10041             : 
   10042             : struct spdk_io_channel *
   10043       17482 : blob_esnap_get_io_channel(struct spdk_io_channel *ch, struct spdk_blob *blob)
   10044             : {
   10045       17482 :         struct spdk_bs_channel          *bs_channel = spdk_io_channel_get_ctx(ch);
   10046       17482 :         struct spdk_bs_dev              *bs_dev = blob->back_bs_dev;
   10047       17482 :         struct blob_esnap_channel       find = {};
   10048             :         struct blob_esnap_channel       *esnap_channel, *existing;
   10049             : 
   10050       17482 :         find.blob_id = blob->id;
   10051       17482 :         esnap_channel = RB_FIND(blob_esnap_channel_tree, &bs_channel->esnap_channels, &find);
   10052       17482 :         if (spdk_likely(esnap_channel != NULL)) {
   10053       17427 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": using cached channel on thread %s\n",
   10054             :                               blob->id, spdk_thread_get_name(spdk_get_thread()));
   10055       17427 :                 return esnap_channel->channel;
   10056             :         }
   10057             : 
   10058          55 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": allocating channel on thread %s\n",
   10059             :                       blob->id, spdk_thread_get_name(spdk_get_thread()));
   10060             : 
   10061          55 :         esnap_channel = calloc(1, sizeof(*esnap_channel));
   10062          55 :         if (esnap_channel == NULL) {
   10063           0 :                 SPDK_NOTICELOG("blob 0x%" PRIx64 " channel allocation failed: no memory\n",
   10064             :                                find.blob_id);
   10065           0 :                 return NULL;
   10066             :         }
   10067          55 :         esnap_channel->channel = bs_dev->create_channel(bs_dev);
   10068          55 :         if (esnap_channel->channel == NULL) {
   10069           0 :                 SPDK_NOTICELOG("blob 0x%" PRIx64 " back channel allocation failed\n", blob->id);
   10070           0 :                 free(esnap_channel);
   10071           0 :                 return NULL;
   10072             :         }
   10073          55 :         esnap_channel->blob_id = find.blob_id;
   10074          55 :         existing = RB_INSERT(blob_esnap_channel_tree, &bs_channel->esnap_channels, esnap_channel);
   10075          55 :         if (spdk_unlikely(existing != NULL)) {
   10076             :                 /*
   10077             :                  * This should be unreachable: all modifications to this tree happen on this thread.
   10078             :                  */
   10079           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 "lost race to allocate a channel\n", find.blob_id);
   10080           0 :                 assert(false);
   10081             : 
   10082             :                 bs_dev->destroy_channel(bs_dev, esnap_channel->channel);
   10083             :                 free(esnap_channel);
   10084             : 
   10085             :                 return existing->channel;
   10086             :         }
   10087             : 
   10088          55 :         return esnap_channel->channel;
   10089       17482 : }
   10090             : 
   10091             : static int
   10092       17452 : blob_esnap_channel_compare(struct blob_esnap_channel *c1, struct blob_esnap_channel *c2)
   10093             : {
   10094       17452 :         return (c1->blob_id < c2->blob_id ? -1 : c1->blob_id > c2->blob_id);
   10095             : }
   10096             : 
   10097             : struct blob_esnap_destroy_ctx {
   10098             :         spdk_blob_op_with_handle_complete       cb_fn;
   10099             :         void                                    *cb_arg;
   10100             :         struct spdk_blob                        *blob;
   10101             :         struct spdk_bs_dev                      *back_bs_dev;
   10102             :         bool                                    abort_io;
   10103             : };
   10104             : 
   10105             : static void
   10106         170 : blob_esnap_destroy_channels_done(struct spdk_io_channel_iter *i, int status)
   10107             : {
   10108         170 :         struct blob_esnap_destroy_ctx   *ctx = spdk_io_channel_iter_get_ctx(i);
   10109         170 :         struct spdk_blob                *blob = ctx->blob;
   10110         170 :         struct spdk_blob_store          *bs = blob->bs;
   10111             : 
   10112         170 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": done destroying channels for this blob\n",
   10113             :                       blob->id);
   10114             : 
   10115         170 :         if (ctx->cb_fn != NULL) {
   10116         155 :                 ctx->cb_fn(ctx->cb_arg, blob, status);
   10117         155 :         }
   10118         170 :         free(ctx);
   10119             : 
   10120         170 :         bs->esnap_channels_unloading--;
   10121         170 :         if (bs->esnap_channels_unloading == 0 && bs->esnap_unload_cb_fn != NULL) {
   10122           5 :                 spdk_bs_unload(bs, bs->esnap_unload_cb_fn, bs->esnap_unload_cb_arg);
   10123           5 :         }
   10124         170 : }
   10125             : 
   10126             : static void
   10127         180 : blob_esnap_destroy_one_channel(struct spdk_io_channel_iter *i)
   10128             : {
   10129         180 :         struct blob_esnap_destroy_ctx   *ctx = spdk_io_channel_iter_get_ctx(i);
   10130         180 :         struct spdk_blob                *blob = ctx->blob;
   10131         180 :         struct spdk_bs_dev              *bs_dev = ctx->back_bs_dev;
   10132         180 :         struct spdk_io_channel          *channel = spdk_io_channel_iter_get_channel(i);
   10133         180 :         struct spdk_bs_channel          *bs_channel = spdk_io_channel_get_ctx(channel);
   10134             :         struct blob_esnap_channel       *esnap_channel;
   10135         180 :         struct blob_esnap_channel       find = {};
   10136             : 
   10137         180 :         assert(spdk_get_thread() == spdk_io_channel_get_thread(channel));
   10138             : 
   10139         180 :         find.blob_id = blob->id;
   10140         180 :         esnap_channel = RB_FIND(blob_esnap_channel_tree, &bs_channel->esnap_channels, &find);
   10141         180 :         if (esnap_channel != NULL) {
   10142          15 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": destroying channel on thread %s\n",
   10143             :                               blob->id, spdk_thread_get_name(spdk_get_thread()));
   10144          15 :                 RB_REMOVE(blob_esnap_channel_tree, &bs_channel->esnap_channels, esnap_channel);
   10145             : 
   10146          15 :                 if (ctx->abort_io) {
   10147             :                         spdk_bs_user_op_t *op, *tmp;
   10148             : 
   10149          10 :                         TAILQ_FOREACH_SAFE(op, &bs_channel->queued_io, link, tmp) {
   10150           0 :                                 if (op->back_channel == esnap_channel->channel) {
   10151           0 :                                         TAILQ_REMOVE(&bs_channel->queued_io, op, link);
   10152           0 :                                         bs_user_op_abort(op, -EIO);
   10153           0 :                                 }
   10154           0 :                         }
   10155          10 :                 }
   10156             : 
   10157          15 :                 bs_dev->destroy_channel(bs_dev, esnap_channel->channel);
   10158          15 :                 free(esnap_channel);
   10159          15 :         }
   10160             : 
   10161         180 :         spdk_for_each_channel_continue(i, 0);
   10162         180 : }
   10163             : 
   10164             : /*
   10165             :  * Destroy the channels for a specific blob on each thread with a blobstore channel. This should be
   10166             :  * used when closing an esnap clone blob and after decoupling from the parent.
   10167             :  */
   10168             : static void
   10169         606 : blob_esnap_destroy_bs_dev_channels(struct spdk_blob *blob, bool abort_io,
   10170             :                                    spdk_blob_op_with_handle_complete cb_fn, void *cb_arg)
   10171             : {
   10172             :         struct blob_esnap_destroy_ctx   *ctx;
   10173             : 
   10174         606 :         if (!blob_is_esnap_clone(blob) || blob->back_bs_dev == NULL) {
   10175         436 :                 if (cb_fn != NULL) {
   10176         436 :                         cb_fn(cb_arg, blob, 0);
   10177         436 :                 }
   10178         436 :                 return;
   10179             :         }
   10180             : 
   10181         170 :         ctx = calloc(1, sizeof(*ctx));
   10182         170 :         if (ctx == NULL) {
   10183           0 :                 if (cb_fn != NULL) {
   10184           0 :                         cb_fn(cb_arg, blob, -ENOMEM);
   10185           0 :                 }
   10186           0 :                 return;
   10187             :         }
   10188         170 :         ctx->cb_fn = cb_fn;
   10189         170 :         ctx->cb_arg = cb_arg;
   10190         170 :         ctx->blob = blob;
   10191         170 :         ctx->back_bs_dev = blob->back_bs_dev;
   10192         170 :         ctx->abort_io = abort_io;
   10193             : 
   10194         170 :         SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64 ": destroying channels for this blob\n",
   10195             :                       blob->id);
   10196             : 
   10197         170 :         blob->bs->esnap_channels_unloading++;
   10198         170 :         spdk_for_each_channel(blob->bs, blob_esnap_destroy_one_channel, ctx,
   10199             :                               blob_esnap_destroy_channels_done);
   10200         606 : }
   10201             : 
   10202             : /*
   10203             :  * Destroy all bs_dev channels on a specific blobstore channel. This should be used when a
   10204             :  * bs_channel is destroyed.
   10205             :  */
   10206             : static void
   10207        1284 : blob_esnap_destroy_bs_channel(struct spdk_bs_channel *ch)
   10208             : {
   10209             :         struct blob_esnap_channel *esnap_channel, *esnap_channel_tmp;
   10210             : 
   10211        1284 :         assert(spdk_get_thread() == spdk_io_channel_get_thread(spdk_io_channel_from_ctx(ch)));
   10212             : 
   10213        1284 :         SPDK_DEBUGLOG(blob_esnap, "destroying channels on thread %s\n",
   10214             :                       spdk_thread_get_name(spdk_get_thread()));
   10215        1324 :         RB_FOREACH_SAFE(esnap_channel, blob_esnap_channel_tree, &ch->esnap_channels,
   10216             :                         esnap_channel_tmp) {
   10217          40 :                 SPDK_DEBUGLOG(blob_esnap, "blob 0x%" PRIx64
   10218             :                               ": destroying one channel in thread %s\n",
   10219             :                               esnap_channel->blob_id, spdk_thread_get_name(spdk_get_thread()));
   10220          40 :                 RB_REMOVE(blob_esnap_channel_tree, &ch->esnap_channels, esnap_channel);
   10221          40 :                 spdk_put_io_channel(esnap_channel->channel);
   10222          40 :                 free(esnap_channel);
   10223          40 :         }
   10224        1284 :         SPDK_DEBUGLOG(blob_esnap, "done destroying channels on thread %s\n",
   10225             :                       spdk_thread_get_name(spdk_get_thread()));
   10226        1284 : }
   10227             : 
   10228             : static void
   10229          35 : blob_set_back_bs_dev_done(void *_ctx, int bserrno)
   10230             : {
   10231          35 :         struct set_bs_dev_ctx   *ctx = _ctx;
   10232             : 
   10233          35 :         if (bserrno != 0) {
   10234             :                 /* Even though the unfreeze failed, the update may have succeed. */
   10235           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": unfreeze failed with error %d\n", ctx->blob->id,
   10236             :                             bserrno);
   10237           0 :         }
   10238          35 :         ctx->cb_fn(ctx->cb_arg, ctx->bserrno);
   10239          35 :         free(ctx);
   10240          35 : }
   10241             : 
   10242             : static void
   10243          35 : blob_frozen_set_back_bs_dev(void *_ctx, struct spdk_blob *blob, int bserrno)
   10244             : {
   10245          35 :         struct set_bs_dev_ctx   *ctx = _ctx;
   10246             :         int rc;
   10247             : 
   10248          35 :         if (bserrno != 0) {
   10249           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to release old back_bs_dev with error %d\n",
   10250             :                             blob->id, bserrno);
   10251           0 :                 ctx->bserrno = bserrno;
   10252           0 :                 blob_unfreeze_io(blob, blob_set_back_bs_dev_done, ctx);
   10253           0 :                 return;
   10254             :         }
   10255             : 
   10256          35 :         if (blob->back_bs_dev != NULL) {
   10257          35 :                 blob_unref_back_bs_dev(blob);
   10258          35 :         }
   10259             : 
   10260          35 :         if (ctx->parent_refs_cb_fn) {
   10261          25 :                 rc = ctx->parent_refs_cb_fn(blob, ctx->parent_refs_cb_arg);
   10262          25 :                 if (rc != 0) {
   10263           0 :                         ctx->bserrno = rc;
   10264           0 :                         blob_unfreeze_io(blob, blob_set_back_bs_dev_done, ctx);
   10265           0 :                         return;
   10266             :                 }
   10267          25 :         }
   10268             : 
   10269          35 :         SPDK_NOTICELOG("blob 0x%" PRIx64 ": hotplugged back_bs_dev\n", blob->id);
   10270          35 :         blob->back_bs_dev = ctx->back_bs_dev;
   10271          35 :         ctx->bserrno = 0;
   10272             : 
   10273          35 :         blob_unfreeze_io(blob, blob_set_back_bs_dev_done, ctx);
   10274          35 : }
   10275             : 
   10276             : static void
   10277          35 : blob_set_back_bs_dev_frozen(void *_ctx, int bserrno)
   10278             : {
   10279          35 :         struct set_bs_dev_ctx   *ctx = _ctx;
   10280          35 :         struct spdk_blob        *blob = ctx->blob;
   10281             : 
   10282          35 :         if (bserrno != 0) {
   10283           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": failed to freeze with error %d\n", blob->id,
   10284             :                             bserrno);
   10285           0 :                 ctx->cb_fn(ctx->cb_arg, bserrno);
   10286           0 :                 free(ctx);
   10287           0 :                 return;
   10288             :         }
   10289             : 
   10290             :         /*
   10291             :          * This does not prevent future reads from the esnap device because any future IO will
   10292             :          * lazily create a new esnap IO channel.
   10293             :          */
   10294          35 :         blob_esnap_destroy_bs_dev_channels(blob, true, blob_frozen_set_back_bs_dev, ctx);
   10295          35 : }
   10296             : 
   10297             : void
   10298          10 : spdk_blob_set_esnap_bs_dev(struct spdk_blob *blob, struct spdk_bs_dev *back_bs_dev,
   10299             :                            spdk_blob_op_complete cb_fn, void *cb_arg)
   10300             : {
   10301          10 :         if (!blob_is_esnap_clone(blob)) {
   10302           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": not an esnap clone\n", blob->id);
   10303           0 :                 cb_fn(cb_arg, -EINVAL);
   10304           0 :                 return;
   10305             :         }
   10306             : 
   10307          10 :         blob_set_back_bs_dev(blob, back_bs_dev, NULL, NULL, cb_fn, cb_arg);
   10308          10 : }
   10309             : 
   10310             : struct spdk_bs_dev *
   10311           5 : spdk_blob_get_esnap_bs_dev(const struct spdk_blob *blob)
   10312             : {
   10313           5 :         if (!blob_is_esnap_clone(blob)) {
   10314           0 :                 SPDK_ERRLOG("blob 0x%" PRIx64 ": not an esnap clone\n", blob->id);
   10315           0 :                 return NULL;
   10316             :         }
   10317             : 
   10318           5 :         return blob->back_bs_dev;
   10319           5 : }
   10320             : 
   10321             : bool
   10322          35 : spdk_blob_is_degraded(const struct spdk_blob *blob)
   10323             : {
   10324          35 :         if (blob->bs->dev->is_degraded != NULL && blob->bs->dev->is_degraded(blob->bs->dev)) {
   10325           5 :                 return true;
   10326             :         }
   10327          30 :         if (blob->back_bs_dev == NULL || blob->back_bs_dev->is_degraded == NULL) {
   10328          15 :                 return false;
   10329             :         }
   10330             : 
   10331          15 :         return blob->back_bs_dev->is_degraded(blob->back_bs_dev);
   10332          35 : }
   10333             : 
   10334           3 : SPDK_LOG_REGISTER_COMPONENT(blob)
   10335           3 : SPDK_LOG_REGISTER_COMPONENT(blob_esnap)
   10336             : 
   10337             : static void
   10338           0 : blob_trace(void)
   10339             : {
   10340           0 :         struct spdk_trace_tpoint_opts opts[] = {
   10341             :                 {
   10342             :                         "BLOB_REQ_SET_START", TRACE_BLOB_REQ_SET_START,
   10343             :                         OWNER_TYPE_NONE, OBJECT_BLOB_CB_ARG, 1,
   10344             :                         {
   10345             :                                 { "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }
   10346             :                         }
   10347             :                 },
   10348             :                 {
   10349             :                         "BLOB_REQ_SET_COMPLETE", TRACE_BLOB_REQ_SET_COMPLETE,
   10350             :                         OWNER_TYPE_NONE, OBJECT_BLOB_CB_ARG, 0,
   10351             :                         {
   10352             :                                 { "ctx", SPDK_TRACE_ARG_TYPE_PTR, 8 }
   10353             :                         }
   10354             :                 },
   10355             :         };
   10356             : 
   10357           0 :         spdk_trace_register_object(OBJECT_BLOB_CB_ARG, 'a');
   10358           0 :         spdk_trace_register_description_ext(opts, SPDK_COUNTOF(opts));
   10359           0 :         spdk_trace_tpoint_register_relation(TRACE_BDEV_IO_START, OBJECT_BLOB_CB_ARG, 1);
   10360           0 :         spdk_trace_tpoint_register_relation(TRACE_BDEV_IO_DONE, OBJECT_BLOB_CB_ARG, 0);
   10361           0 : }
   10362           3 : SPDK_TRACE_REGISTER_FN(blob_trace, "blob", TRACE_GROUP_BLOB)

Generated by: LCOV version 1.15